## Finalize the Dataset
Before I jump into analysis, I need to create a final version of the dataset.

In [1]:
import pandas as pd
import numpy as np

In [2]:
# bring in the datasets
songs = pd.read_csv("../data/all_decades_songs_V3.csv", index_col=0)
librosa_features = pd.read_csv("../data/librosa_features.csv", index_col=0)
spotify_features = pd.read_csv("../data/spotify_features.csv", index_col=0)

for df in [songs, librosa_features, spotify_features]:
    print(df.shape)

(1189, 9)
(1189, 37)
(1189, 18)


All the datasets have the same number of records, now I can decide which features to keep.

In [3]:
songs.columns

Index(['artist', 'song', 'decade', 'track_id', 'preview_url', 'track_name',
       'artist_name', 'artist_id', 'genres'],
      dtype='object')

In [19]:
spotify_features['key'].value_counts()

0     159
7     131
1     124
5     112
2     111
9     109
4      89
10     86
8      81
11     78
6      62
3      47
Name: key, dtype: int64

In [13]:
# removing columns from `spotify_features`
cols = list(spotify_features.columns)

for x in ['type','uri','track_href','analysis_url']:
    cols.remove(x)

spotify_features_final  = spotify_features[cols]
spotify_features_final.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,duration_ms,time_signature
0,0.596,0.315,8,-9.175,1,0.0428,0.961,0.0,0.258,0.64,119.935,6ymkab3FTjiFzSJwhal59m,171773,4
1,0.27,0.177,0,-9.791,1,0.0298,0.922,0.0,0.104,0.237,87.373,4oP8eYnsSKJPC4VNfPB7dZ,160000,4
2,0.589,0.396,7,-13.58,1,0.237,0.705,0.0,0.108,0.979,200.533,0fVtEGoXeRhllDU9ChQAZl,159948,4
3,0.725,0.373,10,-15.925,1,0.0494,0.613,0.0202,0.118,0.846,148.367,7Jf323ttHKUnPylFWiaGl3,169000,4
4,0.752,0.443,2,-14.392,1,0.0398,0.667,2.1e-05,0.154,0.919,99.136,0lO5EKoz1Rb1pJoPoldE4D,160667,4


In [21]:
librosa_features_df = librosa_features[['track_id','zero_crossing_rate']]
librosa_features_df

Unnamed: 0,track_id,zero_crossing_rate
0,6ymkab3FTjiFzSJwhal59m,0.080147
1,4oP8eYnsSKJPC4VNfPB7dZ,0.090326
2,0fVtEGoXeRhllDU9ChQAZl,0.073879
3,7Jf323ttHKUnPylFWiaGl3,0.066668
4,0lO5EKoz1Rb1pJoPoldE4D,0.110896
...,...,...
1184,7qEHsqek33rTcFNT9PFqLf,0.075431
1185,4l0Mvzj72xxOpRrp6h8nHi,0.099590
1186,21jGcNKet2qwijlDFuPiPb,0.066331
1187,7aH5zH4TxVotW0meTNqEJj,0.091192


In [25]:
combined = pd.merge(spotify_features_final, librosa_features_df, left_on='id', right_on='track_id')
combined

KeyError: 'track_id'

In [26]:
all_data = pd.merge(songs, combined, on='track_id')
all_data

Unnamed: 0,artist,song,decade,track_id,preview_url,track_name,artist_name,artist_id,genres,danceability,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,duration_ms,time_signature,zero_crossing_rate
0,Gene Autry,"Rudolph, The Red-nosed Reindeer",1950,6ymkab3FTjiFzSJwhal59m,https://audio-ssl.itunes.apple.com/itunes-asse...,Rudolph The Red-Nosed Reindeer,Gene Autry,5ixB75BQR3ADoWQkcHQJTs,holiday,0.596,...,0.0428,0.961,0.000000,0.2580,0.6400,119.935,6ymkab3FTjiFzSJwhal59m,171773,4,0.080147
1,The Andrews Sisters,"I Can Dream, Can't I",1950,4oP8eYnsSKJPC4VNfPB7dZ,https://audio-ssl.itunes.apple.com/itunes-asse...,"I Can Dream, Can't I? - Single Version",The Andrews Sisters,2NCGI6dLTxLdI9XHdv7QfM,pop,0.270,...,0.0298,0.922,0.000000,0.1040,0.2370,87.373,4oP8eYnsSKJPC4VNfPB7dZ,160000,4,0.090326
2,The Ames Brothers,Rag Mop,1950,0fVtEGoXeRhllDU9ChQAZl,https://p.scdn.co/mp3-preview/ebd8099b71ecd6f1...,Rag Mop,The Ames Brothers,4oXaAEofJFedGweFqy5qiv,"['adult standards', 'deep adult standards', 'e...",0.589,...,0.2370,0.705,0.000000,0.1080,0.9790,200.533,0fVtEGoXeRhllDU9ChQAZl,159948,4,0.073879
3,Red Foley,Chattanoogie Shoe Shine Boy,1950,7Jf323ttHKUnPylFWiaGl3,https://audio-ssl.itunes.apple.com/itunes-asse...,Chattanoogie Shoe Shine Boy - 1949 Single Version,Red Foley,56tggwKsz5OqCDf1i0Str9,country,0.725,...,0.0494,0.613,0.020200,0.1180,0.8460,148.367,7Jf323ttHKUnPylFWiaGl3,169000,4,0.066668
4,Teresa Brewer,Music! Music! Music!,1950,0lO5EKoz1Rb1pJoPoldE4D,https://audio-ssl.itunes.apple.com/itunes-asse...,(Put Another Nickel In) Music! Music! Music!,Teresa Brewer,2mPL4g4v9DS55zi6QctLbP,vocal,0.752,...,0.0398,0.667,0.000021,0.1540,0.9190,99.136,0lO5EKoz1Rb1pJoPoldE4D,160667,4,0.110896
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1208,Lewis Capaldi,Someone You Loved,2010,7qEHsqek33rTcFNT9PFqLf,https://audio-ssl.itunes.apple.com/itunes-asse...,Someone You Loved,Lewis Capaldi,4GNC7GD6oZMSxPGyXy4MNB,alternative,0.501,...,0.0319,0.751,0.000000,0.1050,0.4460,109.891,7qEHsqek33rTcFNT9PFqLf,182161,4,0.075431
1209,Selena Gomez,Lose You to Love Me,2010,4l0Mvzj72xxOpRrp6h8nHi,https://audio-ssl.itunes.apple.com/itunes-asse...,Lose You To Love Me,Selena Gomez,0C8ZW7ezQVs4URX5aX7Kqx,pop,0.488,...,0.0436,0.556,0.000000,0.2100,0.0978,102.819,4l0Mvzj72xxOpRrp6h8nHi,206459,4,0.099590
1210,Post Malone,Circles,2010,21jGcNKet2qwijlDFuPiPb,https://audio-ssl.itunes.apple.com/itunes-asse...,Circles,Post Malone,246dkjvS1zLTtiykXe5h60,house,0.695,...,0.0395,0.192,0.002440,0.0863,0.5530,120.042,21jGcNKet2qwijlDFuPiPb,215280,4,0.066331
1211,The Weeknd,Heartless,2010,7aH5zH4TxVotW0meTNqEJj,https://audio-ssl.itunes.apple.com/itunes-asse...,Heartless - Vapor Wave Remix,The Weeknd,1Xyo4u8uXC1ZmMpatF05PJ,r&b/soul,0.544,...,0.0981,0.105,0.000000,0.1270,0.2000,74.965,7aH5zH4TxVotW0meTNqEJj,165354,4,0.091192


In [29]:
all_data_final = all_data[['track_id', 'decade', 'track_name', 'artist_name', 
                           'danceability', 'energy', 'key', 'loudness', 
                           'mode', 'speechiness', 'acousticness', 'instrumentalness',
                           'liveness', 'valence', 'tempo', 'zero_crossing_rate',
                           'duration_ms','time_signature','genres']]

all_data_final

Unnamed: 0,track_id,decade,track_name,artist_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,zero_crossing_rate,duration_ms,time_signature,genres
0,6ymkab3FTjiFzSJwhal59m,1950,Rudolph The Red-Nosed Reindeer,Gene Autry,0.596,0.315,8,-9.175,1,0.0428,0.961,0.000000,0.2580,0.6400,119.935,0.080147,171773,4,holiday
1,4oP8eYnsSKJPC4VNfPB7dZ,1950,"I Can Dream, Can't I? - Single Version",The Andrews Sisters,0.270,0.177,0,-9.791,1,0.0298,0.922,0.000000,0.1040,0.2370,87.373,0.090326,160000,4,pop
2,0fVtEGoXeRhllDU9ChQAZl,1950,Rag Mop,The Ames Brothers,0.589,0.396,7,-13.580,1,0.2370,0.705,0.000000,0.1080,0.9790,200.533,0.073879,159948,4,"['adult standards', 'deep adult standards', 'e..."
3,7Jf323ttHKUnPylFWiaGl3,1950,Chattanoogie Shoe Shine Boy - 1949 Single Version,Red Foley,0.725,0.373,10,-15.925,1,0.0494,0.613,0.020200,0.1180,0.8460,148.367,0.066668,169000,4,country
4,0lO5EKoz1Rb1pJoPoldE4D,1950,(Put Another Nickel In) Music! Music! Music!,Teresa Brewer,0.752,0.443,2,-14.392,1,0.0398,0.667,0.000021,0.1540,0.9190,99.136,0.110896,160667,4,vocal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1208,7qEHsqek33rTcFNT9PFqLf,2010,Someone You Loved,Lewis Capaldi,0.501,0.405,1,-5.679,1,0.0319,0.751,0.000000,0.1050,0.4460,109.891,0.075431,182161,4,alternative
1209,4l0Mvzj72xxOpRrp6h8nHi,2010,Lose You To Love Me,Selena Gomez,0.488,0.343,4,-8.985,1,0.0436,0.556,0.000000,0.2100,0.0978,102.819,0.099590,206459,4,pop
1210,21jGcNKet2qwijlDFuPiPb,2010,Circles,Post Malone,0.695,0.762,0,-3.497,1,0.0395,0.192,0.002440,0.0863,0.5530,120.042,0.066331,215280,4,house
1211,7aH5zH4TxVotW0meTNqEJj,2010,Heartless - Vapor Wave Remix,The Weeknd,0.544,0.729,1,-3.911,0,0.0981,0.105,0.000000,0.1270,0.2000,74.965,0.091192,165354,4,r&b/soul


In [30]:
all_data_final.to_csv('../data/final_dataset.csv')