In [25]:
import pandas as pd
import numpy as np

import shutil
import os
import time
from tqdm import tqdm
from tqdm import tqdm_notebook as tqdm_notebook

import spotipy
import spotipy.util as util

from requests.exceptions import ConnectionError 
from requests.exceptions import HTTPError
from requests.exceptions import SSLError
from urllib3.exceptions import MaxRetryError

import pretty_midi
import mido

import ast

In [2]:
# Directories

data_dir = "/Users/chrisolen/Documents/uchicago_courses/deep_learning_and_image_recognition/audio_generation/data/"
scripts_dir = "/Users/chrisolen/Documents/uchicago_courses/deep_learning_and_image_recognition/audio_generation/NADE_music_generation/"

# Midi File Local Organization
Download LMD-Matched from: https://colinraffel.com/projects/lmd/

In [53]:
'''

# Moving into a single directory

# Location to move all midi files:

destination = data_dir+"midi_files/"

# Iterating through original file system:

source = data_dir+"lmd_matched/"
level_1 = os.listdir(source)
for i in range(len(level_1)):
    if level_1[i] != ".DS_Store":
        level_2 = os.listdir(source+level_1[i]+"/")
        for j in range(len(level_2)):
            if level_2[j] != ".DS_Store":
                level_3 = os.listdir(source+level_1[i]+"/"+level_2[j]+"/")
                for k in range(len(level_3)):
                    if level_3[k] != ".DS_Store":
                        level_4 = os.listdir(source+level_1[i]+"/"+level_2[j]+"/"+level_3[k]+"/")
                        for f in range(len(level_4)):
                            if level_4[f] != ".DS_Store":
                                shutil.move(source+level_1[i]+"/"+level_2[j]+"/"+level_3[k]+"/"+level_4[f],destination)
                
                
'''        


# Song and Artist Reference Name Text File
Download "all track Echo Nest ID" here: http://millionsongdataset.com/pages/getting-dataset/

In [42]:
# Read in textfile of 'Million Song Dataset' names:

file = open(data_dir+"songlist_with_mbid.txt", 'r')
lines = np.array(file.readlines())

In [43]:
# Remove newline markers:

cleaned_lines = [lines[i].replace("\n","") for i in range(len(lines))]

# Split along "<SEP>"

split_lines = [cleaned_lines[i].split("<SEP>") for i in range(len(cleaned_lines))] 

In [44]:
# Merge into dataframe:

tracks = pd.DataFrame.from_records(split_lines)
tracks.rename(columns={0:"track_id",1:"mb_id",2:"artist",3:"song_title"},inplace=True)


In [45]:
# Removing 'feat.' from the artists field:

cleaned_artist = [tracks['artist'].iloc[i] if 'Feat.' not in tracks['artist'].iloc[i] else tracks['artist'].iloc[i].replace(tracks['artist'].iloc[i],tracks['artist'].iloc[i][tracks['artist'].iloc[i].find('Feat.'):][6:]) for i in range(len(tracks))]

cleaned_artist_2 = [cleaned_artist[i] if 'feat.' not in cleaned_artist[i] else cleaned_artist[i].replace(cleaned_artist[i],cleaned_artist[i][cleaned_artist[i].find('feat.'):][6:]) for i in range(len(cleaned_artist))]






In [46]:
tracks['artist'] = cleaned_artist_2

In [47]:
tracks.head()

Unnamed: 0,track_id,mb_id,artist,song_title
0,TROXMWJ12903CF4396,f275d9a5-3271-436b-8c32-806ec275e23c,Arnold Jarvis,Life Goes On
1,TRDDLOD128F92FF922,f26c72d3-e52c-467b-b651-679c73d8e1a7,!!!,A New Name
2,TRGCKLJ128F92FF90E,f26c72d3-e52c-467b-b651-679c73d8e1a7,!!!,All My Heroes Are Weirdos
3,TRRKQQU128F92FF947,f26c72d3-e52c-467b-b651-679c73d8e1a7,!!!,Bend Over Beethoven
4,TRZRBYS128F92FF94F,f26c72d3-e52c-467b-b651-679c73d8e1a7,!!!,Break In Case Of Anything


In [48]:
tracks.shape

(1000000, 4)

# Retaining Only Records for Which Midi Files Exist

In [49]:
unique_tracks = os.listdir(data_dir+'midi_files')
tracks = tracks[tracks['track_id'].isin(unique_tracks)]
tracks.shape

(31032, 4)

In [50]:
tracks.head()

Unnamed: 0,track_id,mb_id,artist,song_title
277,TRBQUUS12903CB2580,7b28fb58-882b-4907-856e-ef327b624b11,+ / - {Plus/Minus},Flight Data Recorder
300,TRRXTJI12903D01048,6bd4eaa7-aa1c-4138-a392-41693229c7fc,+39,My Relax
463,TRMKKFV12903CB7EA4,,...arrived at ten,Flake
522,TRNVJXQ128F14AFD63,2fc659fa-293f-4288-bff5-29447f31f628,1. Futurologischer Congress,Atem
574,TRLIEDO12903CA41B4,f37c537b-3557-4031-bfd6-ab63ced32854,10 CC,The Things We Do For Love


# Spotify API Authentication and Calls

In [756]:
# Function to authenticate using credentials:

def auth_spotify(my_username, chosen_scope, my_client_id, my_client_secret, my_redirect_uri):
    token = util.prompt_for_user_token(username=my_username,
                                       scope=chosen_scope,
                                       client_id=my_client_id,
                                       client_secret=my_client_secret,
                                       redirect_uri=my_redirect_uri)
    sp = spotipy.Spotify(auth=token, requests_timeout=180)
    return sp




In [757]:
# Define credentials:

my_username="chrisolen"
chosen_scope='user-library-read'
my_client_id='###########'
my_client_secret='###########'
my_redirect_uri='https://grahamschool.uchicago.edu/academic-programs/masters-degrees/analytics'


In [758]:
# Authenticate:

sp = auth_spotify(my_username,
               chosen_scope,
               my_client_id,
               my_client_secret,
               my_redirect_uri)

In [771]:
# Function for one round of hitting the API (for a given track):

def hit_spotify(track_name, artist_name, my_username, chosen_scope, my_client_id, my_client_secret, my_redirect_uri):
    
    sp = ''
    while sp == '':
        try:
            sp = auth_spotify(my_username,
                   chosen_scope,
                   my_client_id,
                   my_client_secret,
                   my_redirect_uri)
        except (spotipy.client.SpotifyException, ConnectionError, MaxRetryError, TimeoutError, SSLError, HTTPError):
            tqdm.write("Going to sleep for 1 minute - Errored out on initial connect attempt")
            time.sleep(60)
        
    track_result = ''
    features = ''
    while track_result == '' and features == '':
        try:
            track_result = sp.search(q=track_name+" "+artist_name,type='track')
            track_id = track_result['tracks']['items'][0]['id']
            artist_id = track_result['tracks']['items'][0]['artists'][0]['id']
            features = sp.audio_features(track_id)
            time_sig = features[0]['time_signature'] 
            tempo = features[0]['tempo'] 
            energy = features[0]['energy'] 
            loudness = features[0]['loudness'] 
            speechiness = features[0]['speechiness'] 
            acousticness = features[0]['acousticness']
            instrumentalness = features[0]['instrumentalness'] 
            genre = sp.artists([artist_id])['artists'][0]['genres']
            result = np.array([track_id, artist_id, time_sig, tempo, energy, loudness, speechiness, acousticness, instrumentalness, genre])
            return result
        except (IndexError, TypeError): # Return NoneType when there are no results
            pass
        except (spotipy.client.SpotifyException, ConnectionError, MaxRetryError, TimeoutError, SSLError, HTTPError):
            tqdm.write("Going to sleep for 1 minute - Errored out on call")
            time.sleep(60)
            sp = auth_spotify(my_username, # Reauthenticate when the token expires
                   chosen_scope,
                   my_client_id,
                   my_client_secret,
                   my_redirect_uri)
            continue
        
        

In [772]:
# API calls:

for i in tqdm_notebook(range(26585,len(tracks)), mininterval = 5.0, leave = False):
    result_array = hit_spotify(tracks.iloc[i]['song_title'], tracks.iloc[i]['artist'],my_username="chrisolen",
                              chosen_scope='user-library-read', my_client_id=my_client_id,
                              my_client_secret=my_client_secret,
                               my_redirect_uri='https://grahamschool.uchicago.edu/academic-programs/masters-degrees/analytics')
    try:
        with open(scripts_dir+"data_cleaning_scripts/track_metadata.txt","ab") as textfile:
            np.savetxt(textfile,result_array.reshape(1, result_array.shape[0]), fmt="%s", delimiter=' | ')
    except AttributeError: 
        with open(scripts_dir+"data_cleaning_scripts/track_metadata.txt","ab") as textfile:
            np.savetxt(textfile,np.array([np.NaN]), fmt="%s", delimiter=' | ')
        
        
    

HBox(children=(IntProgress(value=0, max=4447), HTML(value='')))

retrying ...1secs
retrying ...1secs
retrying ...1secs
retrying ...1secs


# Cleaning Up Spotify Metadata

In [51]:
textfile = open(scripts_dir+"data_cleaning_scripts/track_metadata.txt", 'r')
track_features = np.array(textfile.readlines())

In [52]:
track_features

array(['5aXSB4ywBWcPVBuOqxJAsZ | 4j4NbPKY5EnsEUhdNICINb | 3 | 108.021 | 0.584 | -9.032 | 0.027 | 0.00184 | 0.155 | []\n',
       '2j7kfbjTPqpaYHide0hFxi | 5Cicr7PrzTNv4bXiZntlex | 4 | 119.997 | 0.553 | -7.488 | 0.216 | 0.00514 | 0.869 | []\n',
       '1T3zWGYGHIAX36IHYHL9Ar | 49myFlNbBmL4X25pekwLqR | 4 | 106.003 | 0.437 | -12.814 | 0.0261 | 0.913 | 0.881 | []\n',
       ...,
       "0wY6UGmdUSOs38DDKMfNfA | 7E3BRXV9ZbCt5lQTCXMTia | 4 | 135.456 | 0.0292 | -23.61 | 0.0346 | 0.967 | 0.968 | ['bow pop', 'chamber pop', 'compositional ambient', 'focus', 'icelandic classical', 'neo-classical', 'neoclassical']\n",
       "4HMHfABjrAez8PucpSM6Qe | 7E3BRXV9ZbCt5lQTCXMTia | 4 | 89.852 | 0.272 | -14.941 | 0.0281 | 0.92 | 0.836 | ['bow pop', 'chamber pop', 'compositional ambient', 'focus', 'icelandic classical', 'neo-classical', 'neoclassical']\n",
       "1i9YBowsDKdycECqeloT9I | 1yZbA9817FU14OeRIJ1Dig | 4 | 125.018 | 0.87 | -11.325 | 0.0425 | 0.059 | 0.538 | ['future ambient']\n"],
      dtype='<

In [53]:
# Remove newline markers:

cleaned_features = [track_features[i].replace("\n","") for i in range(len(track_features))]

# Split along " | "

split_features = [cleaned_features[i].split(" | ") for i in range(len(cleaned_features))] 

In [54]:
# Make the 'nan' lists have the same dimensions as the others:

indexes = [i for i,x in enumerate(split_features) if x[0] == 'nan']

extended = ['nan','nan','nan','nan','nan','nan','nan','nan','nan',['nan']]

for i in range(len(indexes)):
    split_features[indexes[i]] = extended

In [55]:
# Throwing the spotify features in a dataframe:

spotify_features = pd.DataFrame(split_features, columns = ['sp_track_id', 'sp_artist_id', 'time_sig', 'tempo', 'energy', 
                                                             'loudness', 'speechiness', 'acousticness', 
                                                             'instrumentalness', 'genre'])

In [56]:
spotify_features.head()

Unnamed: 0,sp_track_id,sp_artist_id,time_sig,tempo,energy,loudness,speechiness,acousticness,instrumentalness,genre
0,5aXSB4ywBWcPVBuOqxJAsZ,4j4NbPKY5EnsEUhdNICINb,3.0,108.021,0.584,-9.032,0.027,0.00184,0.155,[]
1,2j7kfbjTPqpaYHide0hFxi,5Cicr7PrzTNv4bXiZntlex,4.0,119.997,0.553,-7.488,0.216,0.00514,0.869,[]
2,1T3zWGYGHIAX36IHYHL9Ar,49myFlNbBmL4X25pekwLqR,4.0,106.003,0.437,-12.814,0.0261,0.913,0.881,[]
3,6bsV04oYvnEEh2tmMzbuE3,6MD8yUEX0V8XJR2Xrwz0HO,4.0,136.25,0.883,-7.47,0.0513,0.0366,0.00704,['neue deutsche welle']
4,,,,,,,,,,[nan]


# Merging the Two Frames

In [57]:
# Reseting index of track dataframe to match up with features dataframe:

tracks.reset_index(inplace= True)
tracks.drop(['index'], axis = 1, inplace = True)
tracks.head()

Unnamed: 0,track_id,mb_id,artist,song_title
0,TRBQUUS12903CB2580,7b28fb58-882b-4907-856e-ef327b624b11,+ / - {Plus/Minus},Flight Data Recorder
1,TRRXTJI12903D01048,6bd4eaa7-aa1c-4138-a392-41693229c7fc,+39,My Relax
2,TRMKKFV12903CB7EA4,,...arrived at ten,Flake
3,TRNVJXQ128F14AFD63,2fc659fa-293f-4288-bff5-29447f31f628,1. Futurologischer Congress,Atem
4,TRLIEDO12903CA41B4,f37c537b-3557-4031-bfd6-ab63ced32854,10 CC,The Things We Do For Love


In [58]:
# Merging two dateframes:

track_metadata = tracks.merge(spotify_features, left_on=tracks.index, right_on=spotify_features.index)

In [None]:
track_metadata.drop(['key_0'], axis=1, inplace=True)

In [61]:
track_metadata.tail()

Unnamed: 0,track_id,mb_id,artist,song_title,sp_track_id,sp_artist_id,time_sig,tempo,energy,loudness,speechiness,acousticness,instrumentalness,genre
31027,TRJMGED128F1485763,1e0de31c-4957-4649-9aa3-7b0f1d9d2c84,Étienne Daho,Quelqu'un Qui M'ressemble (Démo),,,,,,,,,,[nan]
31028,TRXFHVQ128F1453822,1e0de31c-4957-4649-9aa3-7b0f1d9d2c84,Étienne Daho,Saudade (Live 2004),3fJymrFJ6BlODWK0QzuVlH,5aDKJuLDczfmHfRSOmHCJk,4.0,116.905,0.966,-2.431,0.0421,0.0112,0.000326,"['chanson', 'french indie pop', 'french indiet..."
31029,TRGSZLI128F4230F3A,6655955b-1c1e-4bcb-84e4-81bcd9efab30,Ólafur Arnalds,0040,0wY6UGmdUSOs38DDKMfNfA,7E3BRXV9ZbCt5lQTCXMTia,4.0,135.456,0.0292,-23.61,0.0346,0.967,0.968,"['bow pop', 'chamber pop', 'compositional ambi..."
31030,TRIAHBB128F930136B,6655955b-1c1e-4bcb-84e4-81bcd9efab30,Ólafur Arnalds,Þau hafa sloppið undan þunga myrkursins,4HMHfABjrAez8PucpSM6Qe,7E3BRXV9ZbCt5lQTCXMTia,4.0,89.852,0.272,-14.941,0.0281,0.92,0.836,"['bow pop', 'chamber pop', 'compositional ambi..."
31031,TRCFVJF128F42AAD7B,aab128f8-a013-46b3-97c7-73f0d18dd7c1,üNN,Three Pilots,1i9YBowsDKdycECqeloT9I,1yZbA9817FU14OeRIJ1Dig,4.0,125.018,0.87,-11.325,0.0425,0.059,0.538,['future ambient']


In [62]:
# Write intermediate df to csv:

track_metadata.to_csv(scripts_dir+"data_cleaning_scripts/track_metadata.csv")

# Gathering Additional Metadata from Pretty Midi

In [615]:
track_metadata = pd.read_csv(scripts_dir+"data_cleaning_scripts/track_metadata.csv")
track_metadata.drop(["Unnamed: 0"], axis = 1, inplace = True)
track_metadata = track_metadata.reindex(columns = track_metadata.columns.tolist() + ["instruments","percussion","time_signature","time_signature_changes","key_signature","mid_id"])
track_metadata.set_index('track_id', inplace=True)


In [616]:
# Function to extract time signature, key signature, and instrumentation from pretty midi metadata:

def meta_from_prettymidi(midi_data):

    instruments = {}
    percussion = {}
    time_signature = ""
    time_signature_changes = ""
    key_signature = []

    try: 
        
        for i in range(len(midi_data.instruments)): # pulling out instruments and percussion into sep dicts
            if not midi_data.instruments[i].is_drum:
                instruments.update({midi_data.instruments[i].program:(midi_data.instruments[i].name).rstrip()})
            else:
                percussion.update({midi_data.instruments[i].program:(midi_data.instruments[i].name).rstrip()})

        if len(midi_data.time_signature_changes) > 1: # pulling out time sigs if there are changes
            time_signature_changes = "yes"
            res = ["{}/{}".format(midi_data.time_signature_changes[i].numerator,midi_data.time_signature_changes[i].denominator) for i in range(len(midi_data.time_signature_changes))]
            time_signature = res
            
        elif len(midi_data.time_signature_changes) == 1: # pulling out time sigs if there no changes
            time_signature_changes = "no"
            res = "{}/{}".format(midi_data.time_signature_changes[0].numerator,midi_data.time_signature_changes[0].denominator)
            time_signature = res
        
        else:
            time_signature_changes = np.NaN
            time_signature = np.NaN
            
    except:
        pass
        
    
    keys = midi_data.get_pitch_class_histogram().argsort()[-2:][::-1]
    key_ref = ["C","C*","D","D*","E","F","F*","G","G*","A","A*","B"]    
    key_signature = [key_ref[i] for i in keys]
    

    frame = [instruments,percussion,time_signature,time_signature_changes,key_signature]

    return frame
        
        
    

In [627]:
# Iterating through file structure to apply meta_from_prettymidi and concat to track_metadata:

source = data_dir+"midi_files/"
level_1 = os.listdir(source)

 
for i in range(29743,len(level_1)):
    print(i,level_1[i])
    try: 
        
    # Some of the corrupted files error out despite measures taken below
    # Namely: TRMPFNL128F427F0BF, TRNCSKU128F4265639, TRQODEY12903CF7594
    
        if level_1[i] != ".DS_Store":
            level_2 = os.listdir(source + level_1[i]+ "/")
            if level_2[0] == ".DS_Store":
                dest = source + level_1[i] + "/" + level_2[1]
                try:
                
                # In certain cases, we can clip midi files to be able to read them in
                    
                    midi_data = pretty_midi.PrettyMIDI(dest)
                    output = meta_from_prettymidi(midi_data)    
                    track_metadata.loc[level_1[i],["instruments","percussion","time_signature",
                                       "time_signature_changes","key_signature"]] = output   
                    track_metadata.loc[level_1[i],'mid_id'] = level_2[1]

                except(OSError, KeyError):
                    mido.MidiFile(dest, clip = True).save(source + level_1[i] + "/" + "clipped")
                    midi_data = pretty_midi.PrettyMIDI(source + level_1[i] + "/" + "clipped")
                    output = meta_from_prettymidi(midi_data)    
                    track_metadata.loc[level_1[i],["instruments","percussion","time_signature",
                                       "time_signature_changes","key_signature"]] = output   
                    track_metadata.loc[level_1[i],'mid_id'] = "clipped"
                    
                # In other cases, clipping does not work    
            
                except mido.midifiles.meta.KeySignatureError as k:
                    track_metadata.loc[level_1[i],'mid_id'] = "{}".format(str(k))
                    print(str(k))
                
                except ValueError as v:
                    track_metadata.loc[level_1[i],'mid_id'] = "{}".format(str(v))
                    print(str(v))
                
                except EOFError as e:
                    track_metadata.loc[level_1[i],'mid_id'] = "{}".format(str(e))
                    print(str(e))
                    
            else:
                dest = source + level_1[i] + "/" + level_2[0]
                try:
                    midi_data = pretty_midi.PrettyMIDI(dest)
                    output = meta_from_prettymidi(midi_data)    
                    track_metadata.loc[level_1[i],["instruments","percussion","time_signature",
                                       "time_signature_changes","key_signature"]] = output   
                    track_metadata.loc[level_1[i],'mid_id'] = level_2[0] 
                
                except(OSError, KeyError):
                    mido.MidiFile(dest, clip = True).save(source + level_1[i] + "/" + "clipped")
                    midi_data = pretty_midi.PrettyMIDI(source + level_1[i] + "/" + "clipped")
                    output = meta_from_prettymidi(midi_data)    
                    track_metadata.loc[level_1[i],["instruments","percussion","time_signature",
                                       "time_signature_changes","key_signature"]] = output   
                    track_metadata.loc[level_1[i],'mid_id'] = "clipped"
                
                except mido.midifiles.meta.KeySignatureError as k:
                    track_metadata.loc[level_1[i],'mid_id'] = "{}".format(str(k))
                    print(str(k))
            
                except ValueError as v:
                    track_metadata.loc[level_1[i],'mid_id'] = "{}".format(str(v))
                    print(str(v))
                
                except EOFError as e:
                    track_metadata.loc[level_1[i],'mid_id'] = "{}".format(str(e))
                    print(str(e))
            
    except:
        track_metadata.loc[level_1[i],'mid_id'] = "unknown error"
        pass

            
            
                
   


29743 TRMDSOV128F42769A9
29744 TRKZKDB12903C96FFF
29745 TRWANYR128F9345558
29746 TRZYOGR128F42948AF
29747 TRMMNOW128F42557E3
29748 TRGTGOA128F426B995
29749 TRKHYEV128F92D6686
29750 TRYESON128F930B81D
29751 TRVLXOH128F148F6E5
29752 TRVDSNZ128F42B7146
29753 TRXAKSS12903CA97E0
29754 TRMUCFC128F4272CCD
29755 TRPSWXI128F4280E84
29756 TRKAPSC128F14580A4
29757 TRHQTVN128F92DF7D8
29758 TRJUGID128F92E0068
29759 TROCTTG128EF33FDD3
29760 TRKJZTV128F933720F
29761 TRUKMVT128F931EB85
29762 TRJBNRX12903CB2233
29763 TRUSANS12903CB2600
29764 TRZMPZW128F934B7A4
29765 TRUVRBC128F147844B
29766 TRMXKDJ128F4232BB6
29767 TRPBVTR128EF34EB9F
29768 TRLFAIO128F427215C
29769 TRHGEZA128F1489AA1
29770 TRNNQIM128F9341D3C
29771 TRDQHLC128F423FBE9
29772 TRFANUB128F428096E
29773 TRONFWX128F4267D99
29774 TRAOFBZ128F424E884
29775 TRLCNDV128F4272700
29776 TRXISDW128F1456F5E
29777 TRBMAOO128F9327ECB
29778 TRRJWLF128F9339EC7
29779 TRWDYUH128F427775A
29780 TRBGODE128E0785BC8
29781 TRNVGYG128F423C24A
29782 TRCSDPD128F42AD897


30071 TRBOMAC12903CE35E1
30072 TRESQZI12903CAA3BD
30073 TRESFEL128F423C531
30074 TRDLICY12903CB23CC
30075 TRYEONX128F92F014F
30076 TRMXHWQ128F9333A39
30077 TRFRAXF128F92F2CFB
30078 TRANPDP128F42902BA
Could not decode key with 3 flats and mode 255
30079 TRNEZSF128F92E44A4
30080 TRXXDEM128F4255F25
30081 TRAMDRT12903CA121C
30082 TRLCLET12903CE48F0
30083 TRTVVSZ128F92D8CCD
30084 TRGXBGR12903CADC7B
30085 TRHDFYF128F1459959
30086 TROPKKC128F4257BDC
30087 TRIPAYV128F930A09F
30088 TRQJXUC128E078C739
30089 TROMYDY128F146056A
30090 TRVHPDQ128F4292171
30091 TRZRTQB128F428D07D
30092 TRHPODS12903C9AB20
30093 TRZVSLU12903CB2C84
30094 TREGBFD12903CB73C3
30095 TRCEKVC128F92E79C8
30096 TRWABSM128F9303ACA
30097 TRBVQJH12903CD6B25
30098 TRXEVPY128F9309588
30099 TRNJIWN128F14A9B2D
30100 TRKGFYO128F4264578
30101 TRSTEYS128F1495B37
30102 TRXCEHB12903CEE429
30103 TRNACUI12903CA4479
30104 TRHTLCM128E0783A19
30105 TREINJX12903CACAE8
30106 TRKIZWL128EF342C5A
30107 TRLEEUP12903CE26AF
30108 TRLBPLO128E078987F
301

30397 TRVZDNA128F149180F
30398 TRWAPWL128EF367FCE
30399 TREZDXZ128E0780C30
30400 TRGVHLY12903CDC731
30401 TRHIZHZ128F4295EFA
30402 TRLVRUV12903CF1F41
30403 TRQQDFY128F92F7C6A
30404 TRAXJRP128F423505F
30405 TRECUXT128F42A2C72
30406 TRXTWRG128F4259C00
30407 TROQXTS128F9336B2A
30408 TRRVRKY128F4225302
30409 TRTQDOW12903D1268E
30410 TRHTEUK128F146E6D5
30411 TRCYORZ128F4258A29
30412 TRYGBID128F42A74CF
30413 TRCPELX128F425ABB7
30414 TRWKJLM128F9344096
30415 TRPUIEM128F1466979
30416 TRZUGPW128F93498EF
30417 TRONJDJ12903D13D28
30418 TRGPGBU128F4236089
30419 TRZXWTR128F93027B2
30420 TRCQRKU128F9336EFF
30421 TRMILMI12903CEC756
30422 TRYWCGI128F4294A99
30423 TRYNLDI12903CDB17C
30424 TRMINQZ128E0785BCB
30425 TRVKNKR128F42B098D
30426 TRNZSWB128F14836CE
30427 TRSDWDN128F4274C03
30428 TRREDXG128F42A10BC
30429 TRICSZE128F148B706
30430 TRUZIGS128E078EDF4
30431 TRIJKBS128F92F9DA8
30432 TRWXJKG128F92FB01E
30433 TROLRWJ128F933895F
30434 TRTLUDO128F930A41C
30435 TREWUHQ128F92EEA14
30436 TRQOFPH128F429220F


30725 TRMLJYS128F930EA95
30726 TRVNYSZ128E0792A2B
30727 TRYTRKZ128F92D183C
30728 TRRBLEW128F4282D31
30729 TRMNAXK128F9302C7D
30730 TROVAAA128F428CE4B
30731 TRXLTDH128F145E24F
30732 TROTVUY128F9323824
30733 TRAVQWO128F429BC6B
30734 TRPUGLX128F145C03B
30735 TRODOQS128F4255910
30736 TRSJXNY128F4285CC5
30737 TRXLRKW128EF35F199
30738 TRCGPCB128F42BA5D2
30739 TREHSIM128F424FAD7
30740 TRELDFJ128F427B9BC
30741 TRNMGEU128F930BC04
30742 TRJKULR12903CCCC4B
30743 TRGJXJB128F422A6EB
30744 TRNCSXW12903CCDB74
30745 TRIVFDC128F1468406
30746 TRNGBSC128F424508F
30747 TRSYWRD12903CE1798
30748 TRAZYXX128F4276DDF
30749 TRNMLFJ128F92D4048
30750 TRAUVUF128F148D1AA
30751 TRKLRFS128F92F2025
30752 TRPHTZP128F427698A
30753 TRLKSGE128F92EF834
30754 TRLWYMH128F424C7DA
30755 TRAWSDM12903CCAA3B
30756 TRCBHGR12903CFAAE3
30757 TRRWFDC128F92C7614
30758 TRUJONK12903C96987
30759 TRZNLRL128F427BC82
30760 TRZURCC128F4247F95
30761 TRPDRHR128F425894E
30762 TRQWKPP12903CE517E
30763 TRYABSA128F932F84D
30764 TRCTGWN128F4231039


In [628]:
# View results:

track_metadata.head()

Unnamed: 0_level_0,mb_id,artist,song_title,sp_track_id,sp_artist_id,time_sig,tempo,energy,loudness,speechiness,acousticness,instrumentalness,genre,instruments,percussion,time_signature,time_signature_changes,key_signature,mid_id
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
TRBQUUS12903CB2580,7b28fb58-882b-4907-856e-ef327b624b11,+ / - {Plus/Minus},Flight Data Recorder,5aXSB4ywBWcPVBuOqxJAsZ,4j4NbPKY5EnsEUhdNICINb,3.0,108.021,0.584,-9.032,0.027,0.00184,0.155,[],"{33: '****', 0: '****', 24: 'Acoustic Guitar(S...",{0: 'mmm - Drums'},4/4,no,"[E, A]",638e8e25bab2eefec58558869ca98d3b.mid
TRRXTJI12903D01048,6bd4eaa7-aa1c-4138-a392-41693229c7fc,+39,My Relax,2j7kfbjTPqpaYHide0hFxi,5Cicr7PrzTNv4bXiZntlex,4.0,119.997,0.553,-7.488,0.216,0.00514,0.869,[],{},{0: 'SWING10'},4/4,no,"[B, A*]",fa6a527f3ccf5a5298759ac77a8b7b67.mid
TRMKKFV12903CB7EA4,,...arrived at ten,Flake,1T3zWGYGHIAX36IHYHL9Ar,49myFlNbBmL4X25pekwLqR,4.0,106.003,0.437,-12.814,0.0261,0.913,0.881,[],"{4: 'Wurly', 34: 'Bass', 18: 'Organ', 64: 'Mel...",{0: 'Kick'},"[4/4, 2/4, 4/4]",yes,"[D*, C]",ffb6d4f9b16f21df437d2e9d035fb7f1.mid
TRNVJXQ128F14AFD63,2fc659fa-293f-4288-bff5-29447f31f628,1. Futurologischer Congress,Atem,6bsV04oYvnEEh2tmMzbuE3,6MD8yUEX0V8XJR2Xrwz0HO,4.0,136.25,0.883,-7.47,0.0513,0.0366,0.00704,['neue deutsche welle'],"{38: '', 87: '', 73: '', 81: '', 119: '', 16: ...",{0: ''},,,"[C, G*]",03d74e9c2822b250a604a281a9063ab9.mid
TRLIEDO12903CA41B4,f37c537b-3557-4031-bfd6-ab63ced32854,10 CC,The Things We Do For Love,,,,,,,,,,['nan'],"{33: 'thingswedo', 0: 'piano', 16: 'organ', 52...",{0: 'drums'},4/4,no,"[C*, G*]",d5f1fe3c7c1eb4262226a476f764e430.mid


In [629]:
# Write final to csv:

track_metadata.to_csv(scripts_dir+"data_cleaning_scripts/track_metadata_final.csv")

In [3]:
df = pd.read_csv(scripts_dir+"data_cleaning_scripts/track_metadata_final.csv")

In [4]:
df.head()

Unnamed: 0,track_id,mb_id,artist,song_title,sp_track_id,sp_artist_id,time_sig,tempo,energy,loudness,speechiness,acousticness,instrumentalness,genre,instruments,percussion,time_signature,time_signature_changes,key_signature,mid_id
0,TRBQUUS12903CB2580,7b28fb58-882b-4907-856e-ef327b624b11,+ / - {Plus/Minus},Flight Data Recorder,5aXSB4ywBWcPVBuOqxJAsZ,4j4NbPKY5EnsEUhdNICINb,3.0,108.021,0.584,-9.032,0.027,0.00184,0.155,[],"{33: '****', 0: '****', 24: 'Acoustic Guitar(S...",{0: 'mmm - Drums'},4/4,no,"['E', 'A']",638e8e25bab2eefec58558869ca98d3b.mid
1,TRRXTJI12903D01048,6bd4eaa7-aa1c-4138-a392-41693229c7fc,+39,My Relax,2j7kfbjTPqpaYHide0hFxi,5Cicr7PrzTNv4bXiZntlex,4.0,119.997,0.553,-7.488,0.216,0.00514,0.869,[],{},{0: 'SWING10'},4/4,no,"['B', 'A*']",fa6a527f3ccf5a5298759ac77a8b7b67.mid
2,TRMKKFV12903CB7EA4,,...arrived at ten,Flake,1T3zWGYGHIAX36IHYHL9Ar,49myFlNbBmL4X25pekwLqR,4.0,106.003,0.437,-12.814,0.0261,0.913,0.881,[],"{4: 'Wurly', 34: 'Bass', 18: 'Organ', 64: 'Mel...",{0: 'Kick'},"['4/4', '2/4', '4/4']",yes,"['D*', 'C']",ffb6d4f9b16f21df437d2e9d035fb7f1.mid
3,TRNVJXQ128F14AFD63,2fc659fa-293f-4288-bff5-29447f31f628,1. Futurologischer Congress,Atem,6bsV04oYvnEEh2tmMzbuE3,6MD8yUEX0V8XJR2Xrwz0HO,4.0,136.25,0.883,-7.47,0.0513,0.0366,0.00704,['neue deutsche welle'],"{38: '', 87: '', 73: '', 81: '', 119: '', 16: ...",{0: ''},,,"['C', 'G*']",03d74e9c2822b250a604a281a9063ab9.mid
4,TRLIEDO12903CA41B4,f37c537b-3557-4031-bfd6-ab63ced32854,10 CC,The Things We Do For Love,,,,,,,,,,['nan'],"{33: 'thingswedo', 0: 'piano', 16: 'organ', 52...",{0: 'drums'},4/4,no,"['C*', 'G*']",d5f1fe3c7c1eb4262226a476f764e430.mid
