### Data/Feature Collection
Notebook for collecting features from the Spotify API and Librosa.

In [51]:
import pandas as pd
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import librosa
import json
import requests
from secret import *
from IPython.display import clear_output

client_credentials_manager = SpotifyClientCredentials(client_id=spotify_credentials['client_id'],
                                                      client_secret=spotify_credentials['client_secret'])

sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [49]:
all_decade_songs = pd.read_csv('../data/all_decade_songs.csv')
all_decade_songs

Unnamed: 0,artist,song,decade
0,Gene Autry,"Rudolph, The Red-nosed Reindeer",1950
1,The Andrews Sisters,"I Can Dream, Can't I",1950
2,The Ames Brothers,Rag Mop,1950
3,Red Foley,Chattanoogie Shoe Shine Boy,1950
4,Teresa Brewer,Music! Music! Music!,1950
...,...,...,...
1184,Lewis Capaldi,Someone You Loved,2010
1185,Selena Gomez,Lose You to Love Me,2010
1186,Post Malone,Circles,2010
1187,The Weeknd,Heartless,2010


Get song objects from Spotify.

In [50]:
def get_missing_url(artist,song):
    '''falls back on the iTunes API to get a 30 sec. preview of a song if Spotify
        doesn't provide one, also assigns a different genre since iTunes uses
        more traditional genres, returns track metadata'''

    artist = artist.replace(" ","+")
    song = song.replace(" ","+")

    try:
        r = requests.get(f"https://itunes.apple.com/search?term={artist}+{song}&limit=1")
        content = json.loads(r.text)
        preview = content['results'][0]["previewUrl"]
        genre = content['results'][0]["primaryGenreName"]
        return str(preview), genre.lower()

    except:
        pass
    
def search_and_extract(track_query):
    '''A function that takes in a song query and returns
    the track id and preview url for that track in a dict.'''

    track_query = str(track_query).strip().replace(" ","+")

    #uses the API to search for a track
    try:
        search = sp.search(track_query, type='track', limit=1, market='US')

        track_id = search['tracks']['items'][0]['id']
        preview_url = search['tracks']['items'][0]['preview_url']
        track_name = search['tracks']['items'][0]['name']
        artist = search['tracks']['items'][0]['artists'][0]['name']
        artist_id = search['tracks']['items'][0]['artists'][0]['id']

        search = sp.artist(artist_id)
        genre_list = search['genres']
        
        track_data = [track_id, preview_url, track_name, artist, artist_id, genre_list]

    except:
        track_query = str(track_query).split(",")
        name = track_query[0]
        search = sp.search(name, type='track', limit=1, market='US')

        track_id = search['tracks']['items'][0]['id']
        preview_url = search['tracks']['items'][0]['preview_url']
        track_name = search['tracks']['items'][0]['name']
        artist = search['tracks']['items'][0]['artists'][0]['name']
        artist_id = search['tracks']['items'][0]['artists'][0]['id']

        search = sp.artist(artist_id)
        genre_list = search['genres']


        track_data = [track_id, preview_url, track_name, artist, artist_id, genre_list]


    return track_data

In [52]:
all_decade_songs_df = all_decade_songs.copy()

all_decade_songs_df['track_id'] = np.nan
all_decade_songs_df['preview_url'] = np.nan
all_decade_songs_df['track_name'] = np.nan
all_decade_songs_df['artist_name'] = np.nan
all_decade_songs_df['artist_id'] = np.nan
all_decade_songs_df['genres'] = np.nan

error = []

for i,row in all_decade_songs_df.iterrows():
    artist = row['artist']
    song = row['song']
    
    try:
        query = f"{song} {artist}"
        track_data = search_and_extract(query)
        all_decade_songs_df.loc[i,'track_id'] = track_data[0]
        all_decade_songs_df.loc[i,'preview_url'] = track_data[1]
        all_decade_songs_df.loc[i, 'track_name'] = track_data[2]
        all_decade_songs_df.loc[i, 'artist_name'] = track_data[3]
        all_decade_songs_df.loc[i,'artist_id'] = track_data[4]
        all_decade_songs_df.loc[i,'genres'] = track_data[5]
        
    except:
        
        try:
            query = f"{song}"
            track_data = search_and_extract(query)
            all_decade_songs_df.loc[i,'track_id'] = track_data[0]
            all_decade_songs_df.loc[i,'preview_url'] = track_data[1]
            all_decade_songs_df.loc[i, 'track_name'] = track_data[2]
            all_decade_songs_df.loc[i, 'artist_name'] = track_data[3]
            all_decade_songs_df.loc[i,'artist_id'] = track_data[4]
            all_decade_songs_df.loc[i,'genres'] = track_data[5]
            
        except:
            error.append(f"{song} {artist}")
            
    clear_output(wait=True)
    print(f"{i}/{len(all_decade_songs_df)}")

1188/1189


In [53]:
error

['Rudolph, The Red-nosed Reindeer Gene Autry',
 "I Can Dream, Can't I The Andrews Sisters",
 'Rag Mop The Ames Brothers',
 'Chattanoogie Shoe Shine Boy Red Foley',
 'Music! Music! Music! Teresa Brewer',
 "If I Knew You Were Comin' I'd've Baked a Cake Eileen Barton",
 'The Third Man Theme Anton Karas',
 'Mona Lisa Nat King Cole',
 'Goodnight Irene Gordon Jenkins and The Weavers',
 'Harbor Lights Sammy Kaye',
 'The Thing Phil Harris',
 'The Tennessee Waltz Patti Page',
 'If Perry Como',
 'Be My Love Mario Lanza',
 'How High The Moon Les Paul and Mary Ford',
 'Too Young Nat King Cole',
 'Come On-a My House Rosemary Clooney',
 'Because Of You Tony Bennett',
 'Cold, Cold Heart Tony Bennett',
 "Sin (It's No Sin) Eddy Howard",
 'Cry Johnnie Ray and The Four Lads',
 'Wheel Of Fortune Kay Starr',
 'Blue Tango Leroy Anderson',
 'Here In My Heart Al Martino',
 'Delicado Percy Faith',
 "Auf Wiederseh'n Sweetheart Vera Lynn",
 'You Belong To Me Jo Stafford',
 'I Went To Your Wedding Patti Page',
 "

In [60]:
missing_idx = all_decade_songs_df[all_decade_songs_df['track_name'].isnull()==True].index
all_decade_songs_df.loc[missing_idx,'track_id'] = missing_tracks
all_decade_songs_df[all_decade_songs_df['track_name'].isnull()==True]

Unnamed: 0,artist,song,decade,track_id,preview_url,track_name,artist_name,artist_id,genres
19,Eddy Howard,Sin (It's No Sin),1950,4cyPBIYP1z47ZIdT7fgDxu,,,,,
28,Johnny Standley,It's In The Book (parts 1 & 2),1950,67HAs4WdVmQl4H8AvMDCVB,,,,,
33,Patti Page,The Doggie In The Window,1950,7bs4wDnaX0z7BTeM6lTcYA,,,,,
36,Les Paul and Mary Ford,Vaya Con Dios (May God Be With You),1950,3qhxI5f7klTVfK0iWYmKdM,,,,,
148,Joe Dowell,Wooden Heart (Muss I Denn),1960,2ELAMT68a78OGwhCgj9Kai,,,,,
234,Herman's Hermits,"I'm Henry VIII, I Am",1960,1vJeuxgfo1DnpfkjYrqBuz,,,,,
510,Barbra Streisand,Love Theme From A Star Is Born (Evergreen),1970,1nFZeR1l3FAXnkYPbrBtGV,,,,,
521,Marvin Gaye,Got to Give It Up (Part 1),1970,7ohR0qPH6f2Vuj2pUNanJG,,,,,
797,Milli Vanilli,Girl I'm Gonna Miss You,1980,2dThEJP9S344qzNhePcSzj,,,,,
903,Los Del Rio,Macarena (Bayside Boys Mix),1990,4oaj36KzXRgDg4McgcTsZK,,,,,


In [62]:
for i,row in all_decade_songs_df[all_decade_songs_df['track_name'].isnull()==True].iterrows():
    
    search = sp.track(row['track_id'])
    
    try:
        all_decade_songs_df.loc[i,'track_id'] = search['id']
        all_decade_songs_df.loc[i,'preview_url'] = search['preview_url']
        all_decade_songs_df.loc[i,'track_name'] = search['name']
        all_decade_songs_df.loc[i,'artist_name'] = search['artists'][0]['name']
        all_decade_songs_df.loc[i,'artist_id'] = search['artists'][0]['id']

        search = sp.artist(artist_id)
        all_decade_songs_df.loc[i,'genres'] = search['genres']
        
    except:
        pass

    

all_decade_songs_df.iloc[missing_idx,:]

Unnamed: 0,artist,song,decade,track_id,preview_url,track_name,artist_name,artist_id,genres
19,Eddy Howard,Sin (It's No Sin),1950,4cyPBIYP1z47ZIdT7fgDxu,,Sin (It's No) Sin,Eddy Howard,6Vb8yGTdhnyIbGS5evBxbF,
28,Johnny Standley,It's In The Book (parts 1 & 2),1950,67HAs4WdVmQl4H8AvMDCVB,https://p.scdn.co/mp3-preview/04221457f0ae3945...,Rock & Roll Must Go (It's in the Book),Johnny Standley,2joTlkHhWGH5fTEztQ6nWL,
33,Patti Page,The Doggie In The Window,1950,7bs4wDnaX0z7BTeM6lTcYA,,(How Much Is That) Doggie In The Window,Patti Page,4nZN9kln8toEzOifhWG2uF,
36,Les Paul and Mary Ford,Vaya Con Dios (May God Be With You),1950,3qhxI5f7klTVfK0iWYmKdM,,Vaya Con Dios,Les Paul,2hkZGvBotqZ7uBBUnBwmLC,
148,Joe Dowell,Wooden Heart (Muss I Denn),1960,2ELAMT68a78OGwhCgj9Kai,,Wooden Heart,Joe Dowell,1b5taabb9eKSbyzVFVtEjh,
234,Herman's Hermits,"I'm Henry VIII, I Am",1960,1vJeuxgfo1DnpfkjYrqBuz,,"Henry The VIII, I Am",Herman's Hermits,48YxSlb23RAaCd4RyHcV9V,
510,Barbra Streisand,Love Theme From A Star Is Born (Evergreen),1970,1nFZeR1l3FAXnkYPbrBtGV,https://p.scdn.co/mp3-preview/e0db9f7d1809e9fe...,"Evergreen (Love Theme from, ""A Star Is Born"")",Barbra Streisand,7jmTilWYlKOuavFfmQAcu6,
521,Marvin Gaye,Got to Give It Up (Part 1),1970,7ohR0qPH6f2Vuj2pUNanJG,,Got To Give It Up - Pt. 1,Marvin Gaye,3koiLjNrgRTNbOwViDipeA,
797,Milli Vanilli,Girl I'm Gonna Miss You,1980,2dThEJP9S344qzNhePcSzj,https://p.scdn.co/mp3-preview/51eb89c36e4f0962...,I'm Gonna Miss You,Milli Vanilli,3vRclCt9VnNhYIxFMQCxuM,
903,Los Del Rio,Macarena (Bayside Boys Mix),1990,4oaj36KzXRgDg4McgcTsZK,https://p.scdn.co/mp3-preview/1f702250ec72803b...,Macarena,Los Del Rio,2JXn03fudjyRkQ1Ye9f5rk,


In [64]:
all_decade_songs_df[all_decade_songs_df['artist'] != all_decade_songs_df['artist_name']]

Unnamed: 0,artist,song,decade,track_id,preview_url,track_name,artist_name,artist_id,genres
0,Gene Autry,"Rudolph, The Red-nosed Reindeer",1950,6ymkab3FTjiFzSJwhal59m,,Rudolph The Red-Nosed Reindeer,Ella Fitzgerald,5V0MlUE1Bft0mbLlND7FJz,
2,The Ames Brothers,Rag Mop,1950,5SOjgaven2icwH6GrRgzGL,,Rag Mop,Lionel Hampton,2PjgZkwAEk7UTin4jP6HLP,
4,Teresa Brewer,Music! Music! Music!,1950,6157eGcWcBaHZcYIfQQZjn,,"Music, Music, Music - Live At The Pershing, Ch...",Ahmad Jamal Trio,0BqALs1lInR9TTOulUADH7,
5,Eileen Barton,If I Knew You Were Comin' I'd've Baked a Cake,1950,5qhM3qBHEIFvXL8xRe3dhb,https://p.scdn.co/mp3-preview/2a67dda5b5b9681a...,"If I Knew You Were Comin', I'd've Baked a Cake",The New Yorkers,0rzpgxQRtcaGme1Yx4Rv41,
6,Anton Karas,The Third Man Theme,1950,7x4LHRPEqVaSoLZbQOiM7o,https://p.scdn.co/mp3-preview/9aa55e26cacc20cf...,The Third Man Theme,Chet Atkins and his Galloping Guitar,6jLlhBG9MmX7k8vWDcDtHe,
...,...,...,...,...,...,...,...,...,...
1177,Lady Gaga and Bradley Cooper,Shallow,2010,2VxeLyX666F8uXCJ0dZF8B,,Shallow,Lady Gaga,1HY2Jd0NmPuamShAr6KMms,
1179,Lil Nas X solo or featuring Billy Ray Cyrus2,Old Town Road,2010,2YpeDb67231RjR0MgVLzsG,https://p.scdn.co/mp3-preview/d94ec2ebe62fd52c...,Old Town Road - Remix,Lil Nas X,7jVv8c5Fj3E9VhNjxT4snq,
1181,Shawn Mendes and Camila Cabello,Señorita,2010,0TK2YIli7K1leLovkQiNik,,Señorita,Shawn Mendes,7n2wHs1TKAczGzO7Dd2rGr,
1187,The Weeknd,Heartless,2010,3FU6urUVsgXa6RBuV2PdRk,https://p.scdn.co/mp3-preview/2223210f0cf4fd77...,Heartless (feat. Morgan Wallen),Diplo,5fMUXHkw8R8eOP2RNVYEZX,


__Notes:__
There is still some cleaning to do. Very few rows have their associated genre, the `artist` and `artist_name` columns don't match, which means they are difference versions. Also, several previews are missing and will need to be tracked down.

In [65]:
all_decade_songs_df.to_csv('../data/all_decades_songs_V1.csv')