In [1]:
import numpy as np
import pandas as pd
import json
import os
import random
import string
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

pd.set_option("max_rows", 10)

In [2]:
# Reading Spotify web API credentials from settings.env hidden file

with open('settings.env') as f:
    env_vars = json.loads(f.read())

# Set environment variables
os.environ['SPOTIPY_CLIENT_ID'] = env_vars['SPOTIPY_CLIENT_ID']
os.environ['SPOTIPY_CLIENT_SECRET'] = env_vars['SPOTIPY_CLIENT_SECRET']

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())

In [5]:
# Getting 8100 random songs to balance the hit/not-hit songs datasets
# Year range same as selected for hit songs: 2000-2020

track_id_list = []

for i in range(0,8100):
    search = sp.search(q=random.choice(string.ascii_letters + string.digits) + ' year:2000-2020',
                   type='track',
                   market='US', 
                   offset=random.randint(0,1990))
    track_id = search['tracks']['items'][0]['id']
    track_id_list.append(track_id)

In [9]:
len(track_id_list)

8100

In [6]:
df = pd.DataFrame()
df['track_id'] = track_id_list

In [10]:
df

Unnamed: 0,track_id
0,608a1wIsSd5KzMEqm1O7w3
1,33VcQq9PLgJhMllZtPRP4M
2,5mI9HJdkqtqWr7KsA8hIbt
3,4FDG9SHyQkxkJxGLrF1ZIp
4,21kOVEG3bDCVphKhXL8XmQ
...,...
8095,6YXG34IWbilypPwjBTOrUC
8096,08cXy6KUizaAelYXtcew3w
8097,6Hmj7SrLRbreLVfVS7mV1S
8098,30AW1wD8qK1Bqw3Z44xs3f


In [11]:
# No NaN's

df[df['track_id'].isnull()]

Unnamed: 0,track_id


In [13]:
# 1837 duplicates

df[df.duplicated()].shape

(1837, 1)

In [14]:
df.drop_duplicates(inplace=True)
df

Unnamed: 0,track_id
0,608a1wIsSd5KzMEqm1O7w3
1,33VcQq9PLgJhMllZtPRP4M
2,5mI9HJdkqtqWr7KsA8hIbt
3,4FDG9SHyQkxkJxGLrF1ZIp
4,21kOVEG3bDCVphKhXL8XmQ
...,...
8092,6MBPfkgkEpivlswUuTMdj0
8095,6YXG34IWbilypPwjBTOrUC
8096,08cXy6KUizaAelYXtcew3w
8098,30AW1wD8qK1Bqw3Z44xs3f


In [40]:
df.reset_index(drop=True, inplace=True)

In [42]:
df.to_csv('6262_random_songs_2000-2020.csv', encoding='utf-8', index=False)

In [56]:
# Getting 4000 more random songs to balance the hit/not-hit songs datasets
# Year range same as selected for hit songs: 2000-2020

track_id_list = []

for i in range(0,4000):
    search = sp.search(q=random.choice(string.ascii_letters + string.digits) + ' year:2000-2020',
                   type='track',
                   market='US', 
                   offset=random.randint(0,1990))
    track_id = search['tracks']['items'][0]['id']
    track_id_list.append(track_id)

In [57]:
len(track_id_list)

4000

In [58]:
df = pd.DataFrame()
df['track_id'] = track_id_list

In [59]:
df

Unnamed: 0,track_id
0,4ek3pPdfvIbPxynmlisI0k
1,2aibwv5hGXSgw7Yru8IYTO
2,3LUWWox8YYykohBbHUrrxd
3,1ckLp8lCl8LipXI0ypX72m
4,3fBbfrWZUuWWrMQXFISr7N
...,...
3995,6e7hIhOLH9zvb3zP5O5gt0
3996,0CLibGiioSuyci4NSbYi9q
3997,6otiaV2fagE3s8IvP6WkwG
3998,6mifT2myhvgBlPpf8kyHT7


In [60]:
# No NaN's

df[df['track_id'].isnull()]

Unnamed: 0,track_id


In [62]:
# 501 duplicates

df[df.duplicated()].shape

(501, 1)

In [63]:
df.drop_duplicates(inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,track_id
0,4ek3pPdfvIbPxynmlisI0k
1,2aibwv5hGXSgw7Yru8IYTO
2,3LUWWox8YYykohBbHUrrxd
3,1ckLp8lCl8LipXI0ypX72m
4,3fBbfrWZUuWWrMQXFISr7N
...,...
3494,6V81K1OcvYgHEnCA6A2Qx9
3495,4D1eVq5AUILwjg3tAe7o6M
3496,6otiaV2fagE3s8IvP6WkwG
3497,6mifT2myhvgBlPpf8kyHT7


In [64]:
df.to_csv('3499_random_songs_2000-2020.csv', encoding='utf-8', index=False)

In [3]:
df = pd.concat(map(pd.read_csv, 
                   ['6262_random_songs_2000-2020.csv',
                    '3499_random_songs_2000-2020.csv'], 
                  ), ignore_index=True)

In [4]:
df

Unnamed: 0,track_id
0,608a1wIsSd5KzMEqm1O7w3
1,33VcQq9PLgJhMllZtPRP4M
2,5mI9HJdkqtqWr7KsA8hIbt
3,4FDG9SHyQkxkJxGLrF1ZIp
4,21kOVEG3bDCVphKhXL8XmQ
...,...
9757,6V81K1OcvYgHEnCA6A2Qx9
9758,4D1eVq5AUILwjg3tAe7o6M
9759,6otiaV2fagE3s8IvP6WkwG
9760,6mifT2myhvgBlPpf8kyHT7


In [5]:
df.drop_duplicates(inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,track_id
0,608a1wIsSd5KzMEqm1O7w3
1,33VcQq9PLgJhMllZtPRP4M
2,5mI9HJdkqtqWr7KsA8hIbt
3,4FDG9SHyQkxkJxGLrF1ZIp
4,21kOVEG3bDCVphKhXL8XmQ
...,...
8384,3h1ghsF7s2VplSMp9pcTTr
8385,2GQEM9JuHu30sGFvRYeCxz
8386,4aceMabp5rzZYoKKXsUffr
8387,6mifT2myhvgBlPpf8kyHT7


In [6]:
df.to_csv('8300_random_songs_2000-2020.csv', encoding='utf-8', index=False)

In [7]:
# Get audio_features

audio = [sp.audio_features(x) for x in df['track_id']]

In [8]:
audio_df = pd.DataFrame()

empty_row = {'danceability': np.nan,
 'energy': np.nan,
 'key': np.nan,
 'loudness': np.nan,
 'mode': np.nan,
 'speechiness': np.nan,
 'acousticness': np.nan,
 'instrumentalness': np.nan,
 'liveness': np.nan,
 'valence': np.nan,
 'tempo': np.nan,
 'type': np.nan,
 'id': np.nan,
 'uri': np.nan,
 'track_href': np.nan,
 'analysis_url': np.nan,
 'duration_ms': np.nan,
 'time_signature': np.nan}

for i in range(0,len(df['track_id'])):
    if type(audio[i][0]) != type(None):
        audio_df = audio_df.append(pd.json_normalize(audio[i][0]))
    else:
        audio_df = audio_df.append(pd.json_normalize(empty_row))

In [11]:
audio_df.isnull().sum()

danceability      581
energy            581
key               581
loudness          581
mode              581
                 ... 
uri               581
track_href        581
analysis_url      581
duration_ms       581
time_signature    581
Length: 18, dtype: int64

In [12]:
audio_df.shape

(8389, 18)

In [29]:
audio_df[audio_df['danceability'].isnull()]

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
40,,,,,,,,,,,,,,,,,,
65,,,,,,,,,,,,,,,,,,
73,,,,,,,,,,,,,,,,,,
90,,,,,,,,,,,,,,,,,,
99,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8340,,,,,,,,,,,,,,,,,,
8366,,,,,,,,,,,,,,,,,,
8367,,,,,,,,,,,,,,,,,,
8383,,,,,,,,,,,,,,,,,,


In [31]:
audio_df.dropna(inplace=True)

In [33]:
audio_df.reset_index(drop=True, inplace=True)
audio_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.413,0.807,11.0,-3.499,0.0,0.3180,0.05360,0.000000,0.6310,0.438,149.330,audio_features,608a1wIsSd5KzMEqm1O7w3,spotify:track:608a1wIsSd5KzMEqm1O7w3,https://api.spotify.com/v1/tracks/608a1wIsSd5K...,https://api.spotify.com/v1/audio-analysis/608a...,296147.0,4.0
1,0.131,0.279,0.0,-13.504,1.0,0.0379,0.70400,0.002490,0.1210,0.175,179.252,audio_features,33VcQq9PLgJhMllZtPRP4M,spotify:track:33VcQq9PLgJhMllZtPRP4M,https://api.spotify.com/v1/tracks/33VcQq9PLgJh...,https://api.spotify.com/v1/audio-analysis/33Vc...,201080.0,4.0
2,0.373,0.249,2.0,-23.432,0.0,0.0477,0.99500,0.920000,0.1060,0.656,148.346,audio_features,5mI9HJdkqtqWr7KsA8hIbt,spotify:track:5mI9HJdkqtqWr7KsA8hIbt,https://api.spotify.com/v1/tracks/5mI9HJdkqtqW...,https://api.spotify.com/v1/audio-analysis/5mI9...,132000.0,4.0
3,0.751,0.672,1.0,-3.017,1.0,0.2450,0.00698,0.000000,0.1720,0.340,158.087,audio_features,4FDG9SHyQkxkJxGLrF1ZIp,spotify:track:4FDG9SHyQkxkJxGLrF1ZIp,https://api.spotify.com/v1/tracks/4FDG9SHyQkxk...,https://api.spotify.com/v1/audio-analysis/4FDG...,195818.0,4.0
4,0.831,0.795,1.0,-6.186,1.0,0.2640,0.22100,0.000000,0.2910,0.744,160.063,audio_features,21kOVEG3bDCVphKhXL8XmQ,spotify:track:21kOVEG3bDCVphKhXL8XmQ,https://api.spotify.com/v1/tracks/21kOVEG3bDCV...,https://api.spotify.com/v1/audio-analysis/21kO...,96064.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7803,0.569,0.840,1.0,-4.123,1.0,0.1180,0.02950,0.000118,0.0995,0.749,183.059,audio_features,5JWPUEov2wlX7c0jhYZpeB,spotify:track:5JWPUEov2wlX7c0jhYZpeB,https://api.spotify.com/v1/tracks/5JWPUEov2wlX...,https://api.spotify.com/v1/audio-analysis/5JWP...,249587.0,3.0
7804,0.187,0.191,2.0,-22.582,1.0,0.0360,0.98900,0.877000,0.1270,0.685,190.959,audio_features,3h1ghsF7s2VplSMp9pcTTr,spotify:track:3h1ghsF7s2VplSMp9pcTTr,https://api.spotify.com/v1/tracks/3h1ghsF7s2Vp...,https://api.spotify.com/v1/audio-analysis/3h1g...,190569.0,3.0
7805,0.867,0.477,9.0,-7.183,0.0,0.0490,0.00843,0.175000,0.1130,0.614,124.979,audio_features,2GQEM9JuHu30sGFvRYeCxz,spotify:track:2GQEM9JuHu30sGFvRYeCxz,https://api.spotify.com/v1/tracks/2GQEM9JuHu30...,https://api.spotify.com/v1/audio-analysis/2GQE...,223480.0,4.0
7806,0.419,0.532,5.0,-4.759,0.0,0.0344,0.12700,0.000000,0.0785,0.337,184.197,audio_features,4aceMabp5rzZYoKKXsUffr,spotify:track:4aceMabp5rzZYoKKXsUffr,https://api.spotify.com/v1/tracks/4aceMabp5rzZ...,https://api.spotify.com/v1/audio-analysis/4ace...,218737.0,4.0


In [34]:
audio_df['success'] = 0.0

In [35]:
audio_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,sucess
0,0.413,0.807,11.0,-3.499,0.0,0.3180,0.05360,0.000000,0.6310,0.438,149.330,audio_features,608a1wIsSd5KzMEqm1O7w3,spotify:track:608a1wIsSd5KzMEqm1O7w3,https://api.spotify.com/v1/tracks/608a1wIsSd5K...,https://api.spotify.com/v1/audio-analysis/608a...,296147.0,4.0,1.0
1,0.131,0.279,0.0,-13.504,1.0,0.0379,0.70400,0.002490,0.1210,0.175,179.252,audio_features,33VcQq9PLgJhMllZtPRP4M,spotify:track:33VcQq9PLgJhMllZtPRP4M,https://api.spotify.com/v1/tracks/33VcQq9PLgJh...,https://api.spotify.com/v1/audio-analysis/33Vc...,201080.0,4.0,1.0
2,0.373,0.249,2.0,-23.432,0.0,0.0477,0.99500,0.920000,0.1060,0.656,148.346,audio_features,5mI9HJdkqtqWr7KsA8hIbt,spotify:track:5mI9HJdkqtqWr7KsA8hIbt,https://api.spotify.com/v1/tracks/5mI9HJdkqtqW...,https://api.spotify.com/v1/audio-analysis/5mI9...,132000.0,4.0,1.0
3,0.751,0.672,1.0,-3.017,1.0,0.2450,0.00698,0.000000,0.1720,0.340,158.087,audio_features,4FDG9SHyQkxkJxGLrF1ZIp,spotify:track:4FDG9SHyQkxkJxGLrF1ZIp,https://api.spotify.com/v1/tracks/4FDG9SHyQkxk...,https://api.spotify.com/v1/audio-analysis/4FDG...,195818.0,4.0,1.0
4,0.831,0.795,1.0,-6.186,1.0,0.2640,0.22100,0.000000,0.2910,0.744,160.063,audio_features,21kOVEG3bDCVphKhXL8XmQ,spotify:track:21kOVEG3bDCVphKhXL8XmQ,https://api.spotify.com/v1/tracks/21kOVEG3bDCV...,https://api.spotify.com/v1/audio-analysis/21kO...,96064.0,4.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7803,0.569,0.840,1.0,-4.123,1.0,0.1180,0.02950,0.000118,0.0995,0.749,183.059,audio_features,5JWPUEov2wlX7c0jhYZpeB,spotify:track:5JWPUEov2wlX7c0jhYZpeB,https://api.spotify.com/v1/tracks/5JWPUEov2wlX...,https://api.spotify.com/v1/audio-analysis/5JWP...,249587.0,3.0,1.0
7804,0.187,0.191,2.0,-22.582,1.0,0.0360,0.98900,0.877000,0.1270,0.685,190.959,audio_features,3h1ghsF7s2VplSMp9pcTTr,spotify:track:3h1ghsF7s2VplSMp9pcTTr,https://api.spotify.com/v1/tracks/3h1ghsF7s2Vp...,https://api.spotify.com/v1/audio-analysis/3h1g...,190569.0,3.0,1.0
7805,0.867,0.477,9.0,-7.183,0.0,0.0490,0.00843,0.175000,0.1130,0.614,124.979,audio_features,2GQEM9JuHu30sGFvRYeCxz,spotify:track:2GQEM9JuHu30sGFvRYeCxz,https://api.spotify.com/v1/tracks/2GQEM9JuHu30...,https://api.spotify.com/v1/audio-analysis/2GQE...,223480.0,4.0,1.0
7806,0.419,0.532,5.0,-4.759,0.0,0.0344,0.12700,0.000000,0.0785,0.337,184.197,audio_features,4aceMabp5rzZYoKKXsUffr,spotify:track:4aceMabp5rzZYoKKXsUffr,https://api.spotify.com/v1/tracks/4aceMabp5rzZ...,https://api.spotify.com/v1/audio-analysis/4ace...,218737.0,4.0,1.0


In [37]:
audio_df.to_csv('7808_random_songs_with_audio_features.csv', encoding='utf-8', index=False)

In [42]:
# Getting 2000 more random songs to balance the hit/not-hit songs datasets
# Year range same as selected for hit songs: 2000-2020

track_id_list = []

for i in range(0,2000):
    search = sp.search(q=random.choice(string.ascii_letters + string.digits) + ' year:2000-2020',
                   type='track',
                   market='US', 
                   offset=random.randint(0,1990))
    track_id = search['tracks']['items'][0]['id']
    track_id_list.append(track_id)

In [57]:
len(track_id_list)

2000

In [44]:
df = pd.DataFrame()
df['track_id'] = track_id_list

In [45]:
df[df.duplicated()].shape

(149, 1)

In [46]:
df.drop_duplicates(inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,track_id
0,2twGKYnxAYNRQ9z9F3sQ4o
1,6Kn5yzAyOw3jf4Y5a6d1Oq
2,1YlulsUKDduOmC7WxvXYPW
3,3DA4SE262k6rdEtlHteNyq
4,6ihL9TjfRjadfEePzXXyVF
...,...
1846,4TTV7EcfroSLWzXRY6gLv6
1847,3cFHCXuBZKMJd8Q4Wt8RB3
1848,0KkIkfsLEJbrcIhYsCL7L5
1849,6NtUvEqIOgO1rRkQk6F0aE


In [51]:
# Get audio_features

audio = [sp.audio_features(x) for x in df['track_id']]

In [60]:
audio_df = pd.DataFrame()

empty_row = {'danceability': np.nan,
 'energy': np.nan,
 'key': np.nan,
 'loudness': np.nan,
 'mode': np.nan,
 'speechiness': np.nan,
 'acousticness': np.nan,
 'instrumentalness': np.nan,
 'liveness': np.nan,
 'valence': np.nan,
 'tempo': np.nan,
 'type': np.nan,
 'id': np.nan,
 'uri': np.nan,
 'track_href': np.nan,
 'analysis_url': np.nan,
 'duration_ms': np.nan,
 'time_signature': np.nan}

for i in range(0,len(df['track_id'])):
    if type(audio[i][0]) != type(None):
        audio_df = audio_df.append(pd.json_normalize(audio[i][0]))
    else:
        audio_df = audio_df.append(pd.json_normalize(empty_row))

In [61]:
audio_df.shape

(1851, 18)

In [63]:
audio_df.dropna(inplace=True)
audio_df.reset_index(drop=True, inplace=True)
audio_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.728,0.656,6.0,-5.652,1.0,0.4160,0.068800,0.000000,0.1700,0.817,155.866,audio_features,2twGKYnxAYNRQ9z9F3sQ4o,spotify:track:2twGKYnxAYNRQ9z9F3sQ4o,https://api.spotify.com/v1/tracks/2twGKYnxAYNR...,https://api.spotify.com/v1/audio-analysis/2twG...,184673.0,4.0
1,0.476,0.478,11.0,-11.766,1.0,0.0395,0.779000,0.117000,0.1030,0.489,89.060,audio_features,6Kn5yzAyOw3jf4Y5a6d1Oq,spotify:track:6Kn5yzAyOw3jf4Y5a6d1Oq,https://api.spotify.com/v1/tracks/6Kn5yzAyOw3j...,https://api.spotify.com/v1/audio-analysis/6Kn5...,259080.0,4.0
2,0.683,0.420,5.0,-6.510,1.0,0.0542,0.087500,0.000000,0.2250,0.469,128.621,audio_features,1YlulsUKDduOmC7WxvXYPW,spotify:track:1YlulsUKDduOmC7WxvXYPW,https://api.spotify.com/v1/tracks/1YlulsUKDduO...,https://api.spotify.com/v1/audio-analysis/1Ylu...,213507.0,4.0
3,0.596,0.522,9.0,-6.633,1.0,0.0358,0.373000,0.000000,0.2910,0.443,76.042,audio_features,3DA4SE262k6rdEtlHteNyq,spotify:track:3DA4SE262k6rdEtlHteNyq,https://api.spotify.com/v1/tracks/3DA4SE262k6r...,https://api.spotify.com/v1/audio-analysis/3DA4...,188320.0,4.0
4,0.718,0.691,4.0,-6.440,1.0,0.0387,0.015900,0.000000,0.0627,0.552,100.008,audio_features,6ihL9TjfRjadfEePzXXyVF,spotify:track:6ihL9TjfRjadfEePzXXyVF,https://api.spotify.com/v1/tracks/6ihL9TjfRjad...,https://api.spotify.com/v1/audio-analysis/6ihL...,213107.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1809,0.609,0.435,11.0,-7.861,0.0,0.2850,0.522000,0.000000,0.1180,0.552,131.995,audio_features,4TTV7EcfroSLWzXRY6gLv6,spotify:track:4TTV7EcfroSLWzXRY6gLv6,https://api.spotify.com/v1/tracks/4TTV7EcfroSL...,https://api.spotify.com/v1/audio-analysis/4TTV...,236738.0,4.0
1810,0.864,0.689,10.0,-7.273,0.0,0.2200,0.000077,0.000467,0.1110,0.412,152.946,audio_features,3cFHCXuBZKMJd8Q4Wt8RB3,spotify:track:3cFHCXuBZKMJd8Q4Wt8RB3,https://api.spotify.com/v1/tracks/3cFHCXuBZKMJ...,https://api.spotify.com/v1/audio-analysis/3cFH...,148246.0,4.0
1811,0.786,0.808,7.0,-3.702,1.0,0.0881,0.084600,0.000289,0.0822,0.608,105.029,audio_features,0KkIkfsLEJbrcIhYsCL7L5,spotify:track:0KkIkfsLEJbrcIhYsCL7L5,https://api.spotify.com/v1/tracks/0KkIkfsLEJbr...,https://api.spotify.com/v1/audio-analysis/0KkI...,301714.0,4.0
1812,0.694,0.669,8.0,-5.143,0.0,0.1150,0.079400,0.000000,0.2960,0.522,128.083,audio_features,6NtUvEqIOgO1rRkQk6F0aE,spotify:track:6NtUvEqIOgO1rRkQk6F0aE,https://api.spotify.com/v1/tracks/6NtUvEqIOgO1...,https://api.spotify.com/v1/audio-analysis/6NtU...,197720.0,4.0


In [64]:
audio_df['success'] = 0.0

In [65]:
audio_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,success
0,0.728,0.656,6.0,-5.652,1.0,0.4160,0.068800,0.000000,0.1700,0.817,155.866,audio_features,2twGKYnxAYNRQ9z9F3sQ4o,spotify:track:2twGKYnxAYNRQ9z9F3sQ4o,https://api.spotify.com/v1/tracks/2twGKYnxAYNR...,https://api.spotify.com/v1/audio-analysis/2twG...,184673.0,4.0,1.0
1,0.476,0.478,11.0,-11.766,1.0,0.0395,0.779000,0.117000,0.1030,0.489,89.060,audio_features,6Kn5yzAyOw3jf4Y5a6d1Oq,spotify:track:6Kn5yzAyOw3jf4Y5a6d1Oq,https://api.spotify.com/v1/tracks/6Kn5yzAyOw3j...,https://api.spotify.com/v1/audio-analysis/6Kn5...,259080.0,4.0,1.0
2,0.683,0.420,5.0,-6.510,1.0,0.0542,0.087500,0.000000,0.2250,0.469,128.621,audio_features,1YlulsUKDduOmC7WxvXYPW,spotify:track:1YlulsUKDduOmC7WxvXYPW,https://api.spotify.com/v1/tracks/1YlulsUKDduO...,https://api.spotify.com/v1/audio-analysis/1Ylu...,213507.0,4.0,1.0
3,0.596,0.522,9.0,-6.633,1.0,0.0358,0.373000,0.000000,0.2910,0.443,76.042,audio_features,3DA4SE262k6rdEtlHteNyq,spotify:track:3DA4SE262k6rdEtlHteNyq,https://api.spotify.com/v1/tracks/3DA4SE262k6r...,https://api.spotify.com/v1/audio-analysis/3DA4...,188320.0,4.0,1.0
4,0.718,0.691,4.0,-6.440,1.0,0.0387,0.015900,0.000000,0.0627,0.552,100.008,audio_features,6ihL9TjfRjadfEePzXXyVF,spotify:track:6ihL9TjfRjadfEePzXXyVF,https://api.spotify.com/v1/tracks/6ihL9TjfRjad...,https://api.spotify.com/v1/audio-analysis/6ihL...,213107.0,4.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1809,0.609,0.435,11.0,-7.861,0.0,0.2850,0.522000,0.000000,0.1180,0.552,131.995,audio_features,4TTV7EcfroSLWzXRY6gLv6,spotify:track:4TTV7EcfroSLWzXRY6gLv6,https://api.spotify.com/v1/tracks/4TTV7EcfroSL...,https://api.spotify.com/v1/audio-analysis/4TTV...,236738.0,4.0,1.0
1810,0.864,0.689,10.0,-7.273,0.0,0.2200,0.000077,0.000467,0.1110,0.412,152.946,audio_features,3cFHCXuBZKMJd8Q4Wt8RB3,spotify:track:3cFHCXuBZKMJd8Q4Wt8RB3,https://api.spotify.com/v1/tracks/3cFHCXuBZKMJ...,https://api.spotify.com/v1/audio-analysis/3cFH...,148246.0,4.0,1.0
1811,0.786,0.808,7.0,-3.702,1.0,0.0881,0.084600,0.000289,0.0822,0.608,105.029,audio_features,0KkIkfsLEJbrcIhYsCL7L5,spotify:track:0KkIkfsLEJbrcIhYsCL7L5,https://api.spotify.com/v1/tracks/0KkIkfsLEJbr...,https://api.spotify.com/v1/audio-analysis/0KkI...,301714.0,4.0,1.0
1812,0.694,0.669,8.0,-5.143,0.0,0.1150,0.079400,0.000000,0.2960,0.522,128.083,audio_features,6NtUvEqIOgO1rRkQk6F0aE,spotify:track:6NtUvEqIOgO1rRkQk6F0aE,https://api.spotify.com/v1/tracks/6NtUvEqIOgO1...,https://api.spotify.com/v1/audio-analysis/6NtU...,197720.0,4.0,1.0


In [66]:
audio_df.to_csv('1814_random_songs_with_audio_features.csv', encoding='utf-8', index=False)

In [67]:
# Concatenating several csv files

In [68]:
df = pd.concat(map(pd.read_csv, 
                   ['7808_random_songs_with_audio_features.csv',
                    '1814_random_songs_with_audio_features.csv'], 
                  ), ignore_index=True)

In [72]:
df['success'] = 0.0
df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,success
0,0.413,0.807,11.0,-3.499,0.0,0.3180,0.053600,0.000000,0.6310,0.438,149.330,audio_features,608a1wIsSd5KzMEqm1O7w3,spotify:track:608a1wIsSd5KzMEqm1O7w3,https://api.spotify.com/v1/tracks/608a1wIsSd5K...,https://api.spotify.com/v1/audio-analysis/608a...,296147.0,4.0,0.0
1,0.131,0.279,0.0,-13.504,1.0,0.0379,0.704000,0.002490,0.1210,0.175,179.252,audio_features,33VcQq9PLgJhMllZtPRP4M,spotify:track:33VcQq9PLgJhMllZtPRP4M,https://api.spotify.com/v1/tracks/33VcQq9PLgJh...,https://api.spotify.com/v1/audio-analysis/33Vc...,201080.0,4.0,0.0
2,0.373,0.249,2.0,-23.432,0.0,0.0477,0.995000,0.920000,0.1060,0.656,148.346,audio_features,5mI9HJdkqtqWr7KsA8hIbt,spotify:track:5mI9HJdkqtqWr7KsA8hIbt,https://api.spotify.com/v1/tracks/5mI9HJdkqtqW...,https://api.spotify.com/v1/audio-analysis/5mI9...,132000.0,4.0,0.0
3,0.751,0.672,1.0,-3.017,1.0,0.2450,0.006980,0.000000,0.1720,0.340,158.087,audio_features,4FDG9SHyQkxkJxGLrF1ZIp,spotify:track:4FDG9SHyQkxkJxGLrF1ZIp,https://api.spotify.com/v1/tracks/4FDG9SHyQkxk...,https://api.spotify.com/v1/audio-analysis/4FDG...,195818.0,4.0,0.0
4,0.831,0.795,1.0,-6.186,1.0,0.2640,0.221000,0.000000,0.2910,0.744,160.063,audio_features,21kOVEG3bDCVphKhXL8XmQ,spotify:track:21kOVEG3bDCVphKhXL8XmQ,https://api.spotify.com/v1/tracks/21kOVEG3bDCV...,https://api.spotify.com/v1/audio-analysis/21kO...,96064.0,4.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9617,0.609,0.435,11.0,-7.861,0.0,0.2850,0.522000,0.000000,0.1180,0.552,131.995,audio_features,4TTV7EcfroSLWzXRY6gLv6,spotify:track:4TTV7EcfroSLWzXRY6gLv6,https://api.spotify.com/v1/tracks/4TTV7EcfroSL...,https://api.spotify.com/v1/audio-analysis/4TTV...,236738.0,4.0,0.0
9618,0.864,0.689,10.0,-7.273,0.0,0.2200,0.000077,0.000467,0.1110,0.412,152.946,audio_features,3cFHCXuBZKMJd8Q4Wt8RB3,spotify:track:3cFHCXuBZKMJd8Q4Wt8RB3,https://api.spotify.com/v1/tracks/3cFHCXuBZKMJ...,https://api.spotify.com/v1/audio-analysis/3cFH...,148246.0,4.0,0.0
9619,0.786,0.808,7.0,-3.702,1.0,0.0881,0.084600,0.000289,0.0822,0.608,105.029,audio_features,0KkIkfsLEJbrcIhYsCL7L5,spotify:track:0KkIkfsLEJbrcIhYsCL7L5,https://api.spotify.com/v1/tracks/0KkIkfsLEJbr...,https://api.spotify.com/v1/audio-analysis/0KkI...,301714.0,4.0,0.0
9620,0.694,0.669,8.0,-5.143,0.0,0.1150,0.079400,0.000000,0.2960,0.522,128.083,audio_features,6NtUvEqIOgO1rRkQk6F0aE,spotify:track:6NtUvEqIOgO1rRkQk6F0aE,https://api.spotify.com/v1/tracks/6NtUvEqIOgO1...,https://api.spotify.com/v1/audio-analysis/6NtU...,197720.0,4.0,0.0


In [76]:
df[df.duplicated(subset=['id'])]

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,success
7811,0.596,0.522,9.0,-6.633,1.0,0.0358,0.373000,0.000000,0.2910,0.443,76.042,audio_features,3DA4SE262k6rdEtlHteNyq,spotify:track:3DA4SE262k6rdEtlHteNyq,https://api.spotify.com/v1/tracks/3DA4SE262k6r...,https://api.spotify.com/v1/audio-analysis/3DA4...,188320.0,4.0,0.0
7812,0.718,0.691,4.0,-6.440,1.0,0.0387,0.015900,0.000000,0.0627,0.552,100.008,audio_features,6ihL9TjfRjadfEePzXXyVF,spotify:track:6ihL9TjfRjadfEePzXXyVF,https://api.spotify.com/v1/tracks/6ihL9TjfRjad...,https://api.spotify.com/v1/audio-analysis/6ihL...,213107.0,4.0,0.0
7814,0.525,0.774,9.0,-4.532,1.0,0.2410,0.240000,0.000001,0.4950,0.526,154.192,audio_features,5h0Jgt873QtgL6nJRBGfT6,spotify:track:5h0Jgt873QtgL6nJRBGfT6,https://api.spotify.com/v1/tracks/5h0Jgt873Qtg...,https://api.spotify.com/v1/audio-analysis/5h0J...,202391.0,4.0,0.0
7815,0.771,0.700,1.0,-6.460,1.0,0.2330,0.032300,0.000000,0.0911,0.417,141.065,audio_features,1TcJDKandYbyo4cCHhKzgr,spotify:track:1TcJDKandYbyo4cCHhKzgr,https://api.spotify.com/v1/tracks/1TcJDKandYby...,https://api.spotify.com/v1/audio-analysis/1TcJ...,191771.0,4.0,0.0
7816,0.722,0.758,11.0,-4.477,0.0,0.0471,0.011100,0.000000,0.3080,0.620,128.047,audio_features,7LcfRTgAVTs5pQGEQgUEzN,spotify:track:7LcfRTgAVTs5pQGEQgUEzN,https://api.spotify.com/v1/tracks/7LcfRTgAVTs5...,https://api.spotify.com/v1/audio-analysis/7Lcf...,201160.0,4.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9601,0.388,0.595,1.0,-9.799,1.0,0.0325,0.663000,0.000056,0.1780,0.596,190.043,audio_features,08cXy6KUizaAelYXtcew3w,spotify:track:08cXy6KUizaAelYXtcew3w,https://api.spotify.com/v1/tracks/08cXy6KUizaA...,https://api.spotify.com/v1/audio-analysis/08cX...,204213.0,4.0,0.0
9609,0.606,0.826,0.0,-6.040,1.0,0.0307,0.025600,0.000016,0.1170,0.322,121.040,audio_features,4rsW3WCZBGwhHfJWuHRwyT,spotify:track:4rsW3WCZBGwhHfJWuHRwyT,https://api.spotify.com/v1/tracks/4rsW3WCZBGwh...,https://api.spotify.com/v1/audio-analysis/4rsW...,210173.0,4.0,0.0
9612,0.653,0.796,7.0,-4.050,0.0,0.2450,0.240000,0.000000,0.1080,0.405,160.026,audio_features,0aQA9DP54h37OevE7hRc2a,spotify:track:0aQA9DP54h37OevE7hRc2a,https://api.spotify.com/v1/tracks/0aQA9DP54h37...,https://api.spotify.com/v1/audio-analysis/0aQA...,252712.0,4.0,0.0
9618,0.864,0.689,10.0,-7.273,0.0,0.2200,0.000077,0.000467,0.1110,0.412,152.946,audio_features,3cFHCXuBZKMJd8Q4Wt8RB3,spotify:track:3cFHCXuBZKMJd8Q4Wt8RB3,https://api.spotify.com/v1/tracks/3cFHCXuBZKMJ...,https://api.spotify.com/v1/audio-analysis/3cFH...,148246.0,4.0,0.0


In [77]:
df.drop_duplicates(subset=['id'], inplace=True)
df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,success
0,0.413,0.807,11.0,-3.499,0.0,0.3180,0.053600,0.000000,0.6310,0.438,149.330,audio_features,608a1wIsSd5KzMEqm1O7w3,spotify:track:608a1wIsSd5KzMEqm1O7w3,https://api.spotify.com/v1/tracks/608a1wIsSd5K...,https://api.spotify.com/v1/audio-analysis/608a...,296147.0,4.0,0.0
1,0.131,0.279,0.0,-13.504,1.0,0.0379,0.704000,0.002490,0.1210,0.175,179.252,audio_features,33VcQq9PLgJhMllZtPRP4M,spotify:track:33VcQq9PLgJhMllZtPRP4M,https://api.spotify.com/v1/tracks/33VcQq9PLgJh...,https://api.spotify.com/v1/audio-analysis/33Vc...,201080.0,4.0,0.0
2,0.373,0.249,2.0,-23.432,0.0,0.0477,0.995000,0.920000,0.1060,0.656,148.346,audio_features,5mI9HJdkqtqWr7KsA8hIbt,spotify:track:5mI9HJdkqtqWr7KsA8hIbt,https://api.spotify.com/v1/tracks/5mI9HJdkqtqW...,https://api.spotify.com/v1/audio-analysis/5mI9...,132000.0,4.0,0.0
3,0.751,0.672,1.0,-3.017,1.0,0.2450,0.006980,0.000000,0.1720,0.340,158.087,audio_features,4FDG9SHyQkxkJxGLrF1ZIp,spotify:track:4FDG9SHyQkxkJxGLrF1ZIp,https://api.spotify.com/v1/tracks/4FDG9SHyQkxk...,https://api.spotify.com/v1/audio-analysis/4FDG...,195818.0,4.0,0.0
4,0.831,0.795,1.0,-6.186,1.0,0.2640,0.221000,0.000000,0.2910,0.744,160.063,audio_features,21kOVEG3bDCVphKhXL8XmQ,spotify:track:21kOVEG3bDCVphKhXL8XmQ,https://api.spotify.com/v1/tracks/21kOVEG3bDCV...,https://api.spotify.com/v1/audio-analysis/21kO...,96064.0,4.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9615,0.604,0.950,9.0,-6.144,0.0,0.1740,0.000106,0.730000,0.3140,0.368,125.978,audio_features,6l0Xa0PJ04RQlyjJt4JDRB,spotify:track:6l0Xa0PJ04RQlyjJt4JDRB,https://api.spotify.com/v1/tracks/6l0Xa0PJ04RQ...,https://api.spotify.com/v1/audio-analysis/6l0X...,197143.0,4.0,0.0
9616,0.470,0.116,4.0,-24.948,1.0,0.0440,0.994000,0.918000,0.1430,0.877,93.801,audio_features,1qSyStOitrHUti4xyuqwil,spotify:track:1qSyStOitrHUti4xyuqwil,https://api.spotify.com/v1/tracks/1qSyStOitrHU...,https://api.spotify.com/v1/audio-analysis/1qSy...,96093.0,1.0,0.0
9617,0.609,0.435,11.0,-7.861,0.0,0.2850,0.522000,0.000000,0.1180,0.552,131.995,audio_features,4TTV7EcfroSLWzXRY6gLv6,spotify:track:4TTV7EcfroSLWzXRY6gLv6,https://api.spotify.com/v1/tracks/4TTV7EcfroSL...,https://api.spotify.com/v1/audio-analysis/4TTV...,236738.0,4.0,0.0
9619,0.786,0.808,7.0,-3.702,1.0,0.0881,0.084600,0.000289,0.0822,0.608,105.029,audio_features,0KkIkfsLEJbrcIhYsCL7L5,spotify:track:0KkIkfsLEJbrcIhYsCL7L5,https://api.spotify.com/v1/tracks/0KkIkfsLEJbr...,https://api.spotify.com/v1/audio-analysis/0KkI...,301714.0,4.0,0.0


In [78]:
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,success
0,0.413,0.807,11.0,-3.499,0.0,0.3180,0.053600,0.000000,0.6310,0.438,149.330,audio_features,608a1wIsSd5KzMEqm1O7w3,spotify:track:608a1wIsSd5KzMEqm1O7w3,https://api.spotify.com/v1/tracks/608a1wIsSd5K...,https://api.spotify.com/v1/audio-analysis/608a...,296147.0,4.0,0.0
1,0.131,0.279,0.0,-13.504,1.0,0.0379,0.704000,0.002490,0.1210,0.175,179.252,audio_features,33VcQq9PLgJhMllZtPRP4M,spotify:track:33VcQq9PLgJhMllZtPRP4M,https://api.spotify.com/v1/tracks/33VcQq9PLgJh...,https://api.spotify.com/v1/audio-analysis/33Vc...,201080.0,4.0,0.0
2,0.373,0.249,2.0,-23.432,0.0,0.0477,0.995000,0.920000,0.1060,0.656,148.346,audio_features,5mI9HJdkqtqWr7KsA8hIbt,spotify:track:5mI9HJdkqtqWr7KsA8hIbt,https://api.spotify.com/v1/tracks/5mI9HJdkqtqW...,https://api.spotify.com/v1/audio-analysis/5mI9...,132000.0,4.0,0.0
3,0.751,0.672,1.0,-3.017,1.0,0.2450,0.006980,0.000000,0.1720,0.340,158.087,audio_features,4FDG9SHyQkxkJxGLrF1ZIp,spotify:track:4FDG9SHyQkxkJxGLrF1ZIp,https://api.spotify.com/v1/tracks/4FDG9SHyQkxk...,https://api.spotify.com/v1/audio-analysis/4FDG...,195818.0,4.0,0.0
4,0.831,0.795,1.0,-6.186,1.0,0.2640,0.221000,0.000000,0.2910,0.744,160.063,audio_features,21kOVEG3bDCVphKhXL8XmQ,spotify:track:21kOVEG3bDCVphKhXL8XmQ,https://api.spotify.com/v1/tracks/21kOVEG3bDCV...,https://api.spotify.com/v1/audio-analysis/21kO...,96064.0,4.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8794,0.604,0.950,9.0,-6.144,0.0,0.1740,0.000106,0.730000,0.3140,0.368,125.978,audio_features,6l0Xa0PJ04RQlyjJt4JDRB,spotify:track:6l0Xa0PJ04RQlyjJt4JDRB,https://api.spotify.com/v1/tracks/6l0Xa0PJ04RQ...,https://api.spotify.com/v1/audio-analysis/6l0X...,197143.0,4.0,0.0
8795,0.470,0.116,4.0,-24.948,1.0,0.0440,0.994000,0.918000,0.1430,0.877,93.801,audio_features,1qSyStOitrHUti4xyuqwil,spotify:track:1qSyStOitrHUti4xyuqwil,https://api.spotify.com/v1/tracks/1qSyStOitrHU...,https://api.spotify.com/v1/audio-analysis/1qSy...,96093.0,1.0,0.0
8796,0.609,0.435,11.0,-7.861,0.0,0.2850,0.522000,0.000000,0.1180,0.552,131.995,audio_features,4TTV7EcfroSLWzXRY6gLv6,spotify:track:4TTV7EcfroSLWzXRY6gLv6,https://api.spotify.com/v1/tracks/4TTV7EcfroSL...,https://api.spotify.com/v1/audio-analysis/4TTV...,236738.0,4.0,0.0
8797,0.786,0.808,7.0,-3.702,1.0,0.0881,0.084600,0.000289,0.0822,0.608,105.029,audio_features,0KkIkfsLEJbrcIhYsCL7L5,spotify:track:0KkIkfsLEJbrcIhYsCL7L5,https://api.spotify.com/v1/tracks/0KkIkfsLEJbr...,https://api.spotify.com/v1/audio-analysis/0KkI...,301714.0,4.0,0.0


In [79]:
df.to_csv('8799_random_songs_with_audio_features.csv', encoding='utf-8', index=False)