Alex Cohen <br>
BrainStation Capstone Project

Goal: Can we use Spotify data and Machine Learning to predict songs that will win Grammys based on previous winners

This workbook will showcase the process for collecting audio data from Spotify playlists conatining Record of the Year winners, nominees as well as a variety of other songs from the past 60 years

In [1]:
# Standard imports
import numpy as np
import pandas as pd

# For web scraping
import requests
from bs4 import BeautifulSoup

# For performing regex operations
import re

# For adding delays so that we don't spam requests
import time

## Installing spotipy to access Spotify API

In [2]:
pip install spotify

Note: you may need to restart the kernel to use updated packages.


IMPORTING SPOTIFY API - SPOTIPY

In [3]:
import spotipy

from spotipy.oauth2 import SpotifyClientCredentials

cid = '3005424aca5d4ca0bc006510258039ee'
secret = '0b2ff02fc80644b9a33c6dd900a3ea7d'

client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager
=
client_credentials_manager)

In [4]:
pip install requests

Note: you may need to restart the kernel to use updated packages.


In [32]:
import ast
from typing import List
from os import listdir
def get_streamings(path: str = 'MyData') -> List[dict]:
    
    files = ['MyData/' + x for x in listdir(path)
             if x.split('.')[0][:-1] == 'StreamingHistory']
    
    all_streamings = []
    
    for file in files: 
        with open(file, 'r', encoding='UTF-8') as f:
            new_streamings = ast.literal_eval(f.read())
            all_streamings += [streaming for streaming 
                               in new_streamings]
    return all_streamings

In [33]:
all_streamings

NameError: name 'all_streamings' is not defined

In [5]:
import spotipy.util as util

username = '12138672436'
client_id ='3005424aca5d4ca0bc006510258039ee'
client_secret = '0b2ff02fc80644b9a33c6dd900a3ea7d'
redirect_uri = 'http://localhost:7777/callback'
scope = 'user-read-recently-played'

token = util.prompt_for_user_token(username=username, 
                                   scope=scope, 
                                   client_id=client_id,   
                                   client_secret=client_secret,     
                                   redirect_uri=redirect_uri)

Couldn't read cache at: .cache-12138672436
Couldn't read cache at: .cache-12138672436


In [6]:
print(token)

BQCU98QTkvTla1dkpjXzwy-53tR6Q8pL3QRpYBWZXtBkkPdKXtjNgYWIX2I-vwfGTRnd3rvbL7_MsKoPbTxxkiH9dYAmRtzGq_v1FGviE3ADmKRCT5IeNkwsgc8r1zNPCsDVpZKX_FA2-ijxIxDs1mPN


## Writing code to pull out track IDs from Spotify API

In [7]:
import requests

def get_id(track_name: str, token: str):
    headers = {'Accept': 'application/json', 'Content-Type': 'application/json', 'Authorization': f'Bearer ' + token}
    
    params = [('q', track_name), ('type', 'track')]
    
    try:
        response = requests.get('https://api.spotify.com/v1/search', 
                    headers = headers, params = params, timeout = 5)
        json = response.json()
        first_result = json['tracks']['items'][0]
        track_id = first_result['id']
        
        return track_id
    except:
        return None

In [8]:
dmcb_id = get_id('Dreams Money Can Buy', token)
print(dmcb_id)

1qyFlfPREPbRcS2BNszdYI


In [9]:
def get_features(track_id: str, token: str) -> dict:
    sp = spotipy.Spotify(auth=token)
    try:
        features = sp.audio_features([track_id])
        return features[0]
    except:
        return None

In [10]:
dmcb_features = get_features(dmcb_id, token)
print(dmcb_features)

{'danceability': 0.423, 'energy': 0.587, 'key': 6, 'loudness': -6.635, 'mode': 1, 'speechiness': 0.529, 'acousticness': 0.765, 'instrumentalness': 0, 'liveness': 0.114, 'valence': 0.33, 'tempo': 180.331, 'type': 'audio_features', 'id': '1qyFlfPREPbRcS2BNszdYI', 'uri': 'spotify:track:1qyFlfPREPbRcS2BNszdYI', 'track_href': 'https://api.spotify.com/v1/tracks/1qyFlfPREPbRcS2BNszdYI', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/1qyFlfPREPbRcS2BNszdYI', 'duration_ms': 253803, 'time_signature': 4}


## Getting playlist of Record of the Year Winners from my Spotify Library

In [11]:
def getTrackIDs(user, playlist_id):
    ids = []
    playlist = sp.user_playlist(user, playlist_id)
    for item in playlist['tracks']['items']:
        track = item['track']
        ids.append(track['id'])
    return ids

winner_ids = getTrackIDs('12138672436', '2BpxU8U7SC702hu66lWIUz')

In [12]:
print(len(winner_ids))

62


## Setting up data frame outlines to pull out audio features of songs from playlists

In [13]:
def getTrackFeatures(id):
  meta = sp.track(id)
  features = sp.audio_features(id)

  # meta
  name = meta['name']
  album = meta['album']['name']
  artist = meta['album']['artists'][0]['name']
  release_date = meta['album']['release_date']
  length = meta['duration_ms']
  popularity = meta['popularity']

  # features
  acousticness = features[0]['acousticness']
  danceability = features[0]['danceability']
  energy = features[0]['energy']
  instrumentalness = features[0]['instrumentalness']
  liveness = features[0]['liveness']
  loudness = features[0]['loudness']
  speechiness = features[0]['speechiness']
  tempo = features[0]['tempo']
  time_signature = features[0]['time_signature']

  track = [name, album, artist, release_date, length, popularity, danceability, acousticness, danceability, energy, instrumentalness, liveness, loudness, speechiness, tempo, time_signature]
  return track

## Putting songs from Record of the Year Winners into a Data Frame

In [15]:
# loop over track ids 
tracks = []
for i in range(len(winner_ids)):
  track = getTrackFeatures(winner_ids[i])
  tracks.append(track)

# create dataset
df_winners = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature'])

In [135]:
df_winners

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,Nel Blu Dipinto Di Blu,Nel Blu Dipinto Di Blu,Domenico Modugno,2008-04-28,219120,56,0.547,0.98200,0.547,0.0588,0.000016,0.1930,-17.062,0.0405,129.510,4
1,Mack the Knife,That's All,Bobby Darin,1959,184333,62,0.549,0.76000,0.549,0.5290,0.000000,0.2060,-12.291,0.1080,82.755,4
2,"The Theme from ""A Summer Place"" - Single Version",Percy Faith'S Greatest Hits,Percy Faith & His Orchestra,1953,144893,49,0.466,0.63100,0.466,0.3890,0.843000,0.2950,-12.825,0.0253,92.631,4
3,Moon River,The Essential Henry Mancini,Henry Mancini,1958,160693,39,0.235,0.96600,0.235,0.2640,0.074700,0.1280,-12.817,0.0312,91.656,3
4,(I Left My Heart) In San Francisco,I Left My Heart In San Francisco,Tony Bennett,1962-06-18,170960,62,0.313,0.95500,0.313,0.0780,0.000004,0.1690,-17.583,0.0332,128.065,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,Uptown Funk (feat. Bruno Mars),Uptown Special,Mark Ronson,2015-01-12,269666,81,0.856,0.00801,0.856,0.6090,0.000082,0.0344,-7.223,0.0824,114.988,4
58,Hello,25,Adele,2016-06-24,295493,72,0.481,0.33600,0.481,0.4510,0.000000,0.0872,-6.095,0.0347,157.966,4
59,24K Magic,24K Magic,Bruno Mars,2016-11-17,225983,77,0.818,0.03400,0.818,0.8030,0.000000,0.1530,-4.282,0.0797,106.970,4
60,This Is America,This Is America,Childish Gambino,2018-05-06,225773,75,0.854,0.11700,0.854,0.4630,0.000000,0.3540,-6.159,0.1370,120.024,4


Now getting playlists of non-winners (this playlist has 1995-2020 nominees)

In [82]:
def show_tracks(results, uriArray):
    for i, item in enumerate(results['items']):
        track = item['track']
        uriArray.append(track['id'])
        
def getTrackIDs(username, playlist_id):
    trackID = []
    results = sp.user_playlist(username, playlist_id)
    tracks = results['tracks']
    show_tracks(tracks, trackID)
    while tracks['next']:
        tracks = sp.next(tracks)
        show_tracks(tracks, trackID)
        
    return trackID

# DataFrame = getTrackIDS('username', 'playlistURI')
nw_ids = getTrackIDs('Paul Gasca', '7i0HGpe6q17NS4ek2Vtg30')

In [83]:
print(len(nw_ids))

478


In [84]:
# loop over track ids 
tracks = []
for i in range(len(nw_ids)):
  track = getTrackFeatures(nw_ids[i])
  tracks.append(track)

# create dataset
df_others1 = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature'])

In [85]:
df_others1.shape

(478, 16)

In [86]:
df_others1.head()

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,I'll Make Love To You,II,Boyz II Men,1994-01-01,236773,63,0.567,0.0995,0.567,0.501,0.0,0.0986,-8.038,0.0241,142.589,3
1,He Thinks He'll Keep Her,Come On Come On,Mary Chapin Carpenter,1992-06-20,241800,31,0.599,0.603,0.599,0.741,3e-06,0.122,-5.859,0.0289,145.58,4
2,All I Wanna Do,Tuesday Night Music Club,Sheryl Crow,1993-08-03,272106,0,0.82,0.111,0.82,0.528,0.0186,0.257,-11.179,0.0321,120.091,4
3,Love Sneakin' Up On You,Longing In Their Hearts,Bonnie Raitt,1994-01-01,221626,26,0.641,0.0171,0.641,0.63,3e-06,0.0559,-10.321,0.0297,94.654,4
4,Streets of Philadelphia,PHILADELPHIA - Music From The Motion Picture,Various Artists,1993-01-04,252493,37,0.708,0.194,0.708,0.19,0.226,0.0916,-16.578,0.0326,93.669,4


Now getting playlists of non-winners (this playlist has 1970-1979 nominees)

In [87]:
# DataFrame = getTrackIDS('username', 'playlistURI')
nw_ids2 = getTrackIDs('Ludger Mayer', '7bemaSI1eSlbg78w1O8eKO')

In [88]:
print(len(nw_ids2))

35


In [89]:
# loop over track ids 
tracks = []
for i in range(len(nw_ids2)):
  track = getTrackFeatures(nw_ids2[i])
  tracks.append(track)

# create dataset
df_others2 = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature'])

In [90]:
df_others2.head()

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,Aquarius/Let The Sunshine In (The Flesh Failur...,The Age Of Aquarius,The 5th Dimension,1969,289293,58,0.293,0.446,0.293,0.74,0.00151,0.0998,-6.098,0.0399,118.23,4
1,Games People Play,Introspect (Bonus Track Version),Joe South,1968-01-01,215160,45,0.334,0.251,0.334,0.714,4.3e-05,0.257,-8.379,0.155,172.288,4
2,Is That All There Is?,Is That All There Is?,Peggy Lee,1969-01-01,262000,51,0.458,0.863,0.458,0.0899,0.0,0.137,-18.824,0.0362,109.574,4
3,"Everybody's Talkin' - From ""Midnight Cowboy""",Aerial Ballet,Harry Nilsson,1968-07-01,163586,67,0.438,0.338,0.438,0.349,5.7e-05,0.417,-15.569,0.038,124.984,4
4,Share Your Love with Me,This Girl's in Love with You,Aretha Franklin,1970-01-15,201906,37,0.435,0.528,0.435,0.311,5.6e-05,0.116,-12.342,0.0331,121.414,3


Now getting playlists of non-winners (Another playlist of nominees for song & record of the year)

In [91]:
# DataFrame = getTrackIDS('username', 'playlistURI')
nw_ids3 = getTrackIDs('jeremyheilman', '28eVcsXZ43LT7LGMQBdMHF')

In [92]:
print(len(nw_ids3))

455


In [93]:
# loop over track ids 
tracks = []
for i in range(len(nw_ids3)):
  track = getTrackFeatures(nw_ids3[i])
  tracks.append(track)

# create dataset
df_others3 = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature'])

In [94]:
df_others3.head()

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,bad guy,"WHEN WE ALL FALL ASLEEP, WHERE DO WE GO?",Billie Eilish,2019-03-29,194087,88,0.701,0.328,0.701,0.425,0.13,0.1,-10.965,0.375,135.128,4
1,Always Remember Us This Way,A Star Is Born Soundtrack (Without Dialogue),Lady Gaga,2018-10-05,210200,78,0.553,0.299,0.553,0.502,0.0,0.764,-5.972,0.0409,129.976,4
2,Bring My Flowers Now,While I'm Livin',Tanya Tucker,2019-08-23,260360,44,0.557,0.948,0.557,0.198,9e-06,0.217,-9.911,0.0461,133.823,4
3,Hard Place,Hard Place,H.E.R.,2018-11-02,271733,65,0.614,0.179,0.614,0.719,1.7e-05,0.163,-4.694,0.0955,160.075,4
4,Lover,Lover,Taylor Swift,2019-08-23,221306,79,0.359,0.492,0.359,0.543,1.6e-05,0.118,-7.582,0.0919,68.534,4


In [95]:
df_others3.shape

(455, 16)

Another playlist 2015-2020

In [96]:
nw_ids4 = getTrackIDs('Rasmus Tobiasen', '3tIEIgEUkSbFiPasg8ziem')

In [97]:
# loop over track ids 
tracks = []
for i in range(len(nw_ids4)):
  track = getTrackFeatures(nw_ids4[i])
  tracks.append(track)

# create dataset
df_others4 = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature'])

In [99]:
df_others4.head()

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,Like I Can,In The Lonely Hour (Deluxe Edition),Sam Smith,2014-01-01,167065,0,0.654,0.356,0.654,0.631,2.6e-05,0.124,-6.611,0.0395,99.94,4
1,Firestone,Firestone,Kygo,2015-06-24,273684,20,0.486,0.408,0.486,0.652,6.6e-05,0.077,-7.226,0.052,113.055,4
2,Thinking out Loud,x (Deluxe Edition),Ed Sheeran,2014-06-21,281560,83,0.781,0.474,0.781,0.445,0.0,0.184,-6.061,0.0295,78.998,4
3,Samsara (feat. Emila),Samsara (feat. Emila),Tungevaag,2015-01-21,211884,62,0.489,0.00598,0.489,0.641,0.0201,0.122,-7.011,0.032,108.901,4
4,Sugar,V (Deluxe),Maroon 5,2014-09-02,235493,0,0.744,0.0553,0.744,0.783,0.0,0.086,-7.077,0.0337,120.042,4


In [100]:
df_others4.shape

(1863, 16)

Another playlist 1980s

In [105]:
nw_ids5 = getTrackIDs('Diane Elizabeth Jansen Riber', '2uzORc0dpwNs8LdQ2IwFCa')

In [102]:
# loop over track ids 
tracks = []
for i in range(len(nw_ids5)):
  track = getTrackFeatures(nw_ids5[i])
  tracks.append(track)

# create dataset
df_others5 = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature'])

In [103]:
df_others5.head()

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,Roam,Cosmic Thing,The B-52's,1989-06-23,295133,57,0.631,0.0447,0.631,0.938,8e-06,0.3,-5.573,0.0833,134.916,4
1,Legal Tender,Whammy!,The B-52's,1983-04-20,220666,44,0.756,0.0248,0.756,0.675,0.00108,0.0714,-7.73,0.0478,148.049,4
2,Give Me Back My Man,Wild Planet,The B-52's,1980,240800,43,0.571,0.0137,0.571,0.96,0.00817,0.0591,-5.498,0.0354,162.297,4
3,Really Saying Something,Really Saying Something: The Best of Fun Boy T...,Fun Boy Three,1997-02-03,165079,18,0.725,0.617,0.725,0.939,5e-06,0.34,-7.044,0.0548,134.077,4
4,Cruel Summer,The Greatest Hits Collection (Collector Edition),Bananarama,2017-11-24,215400,54,0.697,0.343,0.697,0.828,0.0154,0.271,-8.796,0.0266,108.367,4


Another playlist 1990s

In [106]:
nw_ids6 = getTrackIDs('-_JT_-', '4vhyj0rJjdb2Nzzu8LJRsv')

In [107]:
# loop over track ids 
tracks = []
for i in range(len(nw_ids6)):
  track = getTrackFeatures(nw_ids6[i])
  tracks.append(track)

# create dataset
df_others6 = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature'])

In [108]:
df_others6.head()

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,Thunderstruck,The Razors Edge,AC/DC,1990-09-24,292880,82,0.502,0.000147,0.502,0.89,0.0117,0.217,-5.175,0.0364,133.52,4
1,All That She Wants,The Sign,Ace of Base,1993-12-24,211213,72,0.798,0.00547,0.798,0.625,8e-05,0.103,-9.689,0.0548,93.764,4
2,Beautiful Life,The Bridge,Ace of Base,1995-11-07,221186,63,0.749,0.202,0.749,0.988,0.00631,0.17,-5.266,0.0387,135.015,4
3,Don't Turn Around,The Sign,Ace of Base,1993-12-24,230186,56,0.745,0.00104,0.745,0.771,0.387,0.0574,-8.231,0.0755,95.623,4
4,The Sign,The Sign,Ace of Base,1993-12-24,191240,69,0.808,0.00928,0.808,0.786,0.0701,0.0574,-6.897,0.0447,96.987,4


Last Playlist (4K songs)

In [113]:
nw_ids7 = getTrackIDs('Susanna Ketola', '4rnleEAOdmFAbRcNCgZMpY')

In [114]:
# loop over track ids 
tracks = []
for i in range(len(nw_ids7)):
  track = getTrackFeatures(nw_ids7[i])
  tracks.append(track)

# create dataset
df_others7 = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature'])

In [115]:
df_others7.head()

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,Take Me To Church,Hozier (Deluxe),Hozier,2014-05-20,241688,0,0.566,0.634,0.566,0.664,0.0,0.116,-5.303,0.0464,128.945,4
1,Cooler Than Me - Single Mix,31 Minutes to Takeoff,Mike Posner,2010-08-09,213293,72,0.768,0.179,0.768,0.82,0.0,0.704,-4.63,0.0475,129.969,4
2,See You Again (feat. Kali Uchis),Flower Boy,"Tyler, The Creator",2017-07-21,180386,79,0.558,0.371,0.558,0.559,7e-06,0.109,-9.222,0.0959,78.558,4
3,Pompeii,Bad Blood,Bastille,2013-01-01,214147,71,0.679,0.0755,0.679,0.715,0.0,0.271,-6.383,0.0407,127.435,4
4,Hips Don't Lie (feat. Wyclef Jean),"Oral Fixation, Vol. 2 (Expanded Edition)",Shakira,2005-11-28,218093,83,0.778,0.284,0.778,0.824,0.0,0.405,-5.892,0.0712,100.024,4


Now going to merge all of the dataframes BESIDES the winners one

In [116]:
frames = [df_others1, df_others2, df_others3, df_others4, df_others5, df_others6, df_others7]

df_all_others = pd.concat(frames)

In [117]:
df_all_others.shape

(8396, 16)

In [118]:
# Check info of this data frame

df_all_others.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8396 entries, 0 to 4777
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              8396 non-null   object 
 1   album             8396 non-null   object 
 2   artist            8396 non-null   object 
 3   release_date      8396 non-null   object 
 4   length            8396 non-null   int64  
 5   popularity        8396 non-null   int64  
 6   danceability      8396 non-null   float64
 7   acousticness      8396 non-null   float64
 8   danceability      8396 non-null   float64
 9   energy            8396 non-null   float64
 10  instrumentalness  8396 non-null   float64
 11  liveness          8396 non-null   float64
 12  loudness          8396 non-null   float64
 13  speechiness       8396 non-null   float64
 14  tempo             8396 non-null   float64
 15  time_signature    8396 non-null   int64  
dtypes: float64(9), int64(3), object(4)
memory 

In [121]:
# dropping any duplicated rows

df_all_others.drop_duplicates()

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,I'll Make Love To You,II,Boyz II Men,1994-01-01,236773,63,0.567,0.099500,0.567,0.501,0.000000,0.0986,-8.038,0.0241,142.589,3
1,He Thinks He'll Keep Her,Come On Come On,Mary Chapin Carpenter,1992-06-20,241800,31,0.599,0.603000,0.599,0.741,0.000003,0.1220,-5.859,0.0289,145.580,4
2,All I Wanna Do,Tuesday Night Music Club,Sheryl Crow,1993-08-03,272106,0,0.820,0.111000,0.820,0.528,0.018600,0.2570,-11.179,0.0321,120.091,4
3,Love Sneakin' Up On You,Longing In Their Hearts,Bonnie Raitt,1994-01-01,221626,26,0.641,0.017100,0.641,0.630,0.000003,0.0559,-10.321,0.0297,94.654,4
4,Streets of Philadelphia,PHILADELPHIA - Music From The Motion Picture,Various Artists,1993-01-04,252493,37,0.708,0.194000,0.708,0.190,0.226000,0.0916,-16.578,0.0326,93.669,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4773,Devil In A New Dress,My Beautiful Dark Twisted Fantasy,Kanye West,2010-11-22,351946,69,0.435,0.017500,0.435,0.760,0.000000,0.1580,-4.935,0.0721,80.082,4
4774,Nights,Blonde,Frank Ocean,2016-08-20,307151,78,0.466,0.420000,0.466,0.548,0.000001,0.1130,-9.362,0.1180,89.815,4
4775,Pound Cake / Paris Morton Music 2,Nothing Was The Same (Deluxe),Drake,2013-01-01,433800,65,0.521,0.138000,0.521,0.762,0.000011,0.1140,-6.746,0.5290,164.090,4
4776,Scared Of Love,Goodbye & Good Riddance,Juice WRLD,2018-12-10,170538,74,0.642,0.032000,0.642,0.562,0.000000,0.1680,-4.869,0.0812,158.001,4


Going to get more songs to help get this dataframe over 8,000 rows

In [122]:
nw_ids8 = getTrackIDs('Russ Laney', '5WAM4qc14blQMfJtyvh3Yq')

In [123]:
# loop over track ids 
tracks = []
for i in range(len(nw_ids8)):
  track = getTrackFeatures(nw_ids8[i])
  tracks.append(track)

# create dataset
df_others8 = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature'])

In [124]:
# Concattenating the data frames

frames = [df_others1, df_others2, df_others3, df_others4, df_others5, df_others6, df_others7, df_others8]

df_all_others = pd.concat(frames)

In [126]:
df_all_others.shape

(8995, 16)

In [125]:
df_all_others.drop_duplicates()

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,I'll Make Love To You,II,Boyz II Men,1994-01-01,236773,63,0.567,0.0995,0.567,0.501,0.000000,0.0986,-8.038,0.0241,142.589,3
1,He Thinks He'll Keep Her,Come On Come On,Mary Chapin Carpenter,1992-06-20,241800,31,0.599,0.6030,0.599,0.741,0.000003,0.1220,-5.859,0.0289,145.580,4
2,All I Wanna Do,Tuesday Night Music Club,Sheryl Crow,1993-08-03,272106,0,0.820,0.1110,0.820,0.528,0.018600,0.2570,-11.179,0.0321,120.091,4
3,Love Sneakin' Up On You,Longing In Their Hearts,Bonnie Raitt,1994-01-01,221626,26,0.641,0.0171,0.641,0.630,0.000003,0.0559,-10.321,0.0297,94.654,4
4,Streets of Philadelphia,PHILADELPHIA - Music From The Motion Picture,Various Artists,1993-01-04,252493,37,0.708,0.1940,0.708,0.190,0.226000,0.0916,-16.578,0.0326,93.669,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
594,Goodbye Girl,Goodbye Girl,David Gates,1978,168866,60,0.467,0.6050,0.467,0.255,0.000310,0.2730,-15.963,0.0372,120.174,4
595,My Cherie Amour,My Cherie Amour,Stevie Wonder,1969-08-01,173306,0,0.570,0.2820,0.570,0.523,0.000003,0.2480,-11.052,0.0277,101.137,4
596,How Deep Is Your Love (2007 Remastered Saturda...,Saturday Night Fever [The Original Movie Sound...,Bee Gees,1978-01-05,245200,0,0.630,0.0999,0.630,0.357,0.000000,0.1280,-9.392,0.0260,104.965,4
597,Get Together,Get Together: The Essential Youngbloods,The Youngbloods,2002-05-06,276466,44,0.533,0.3510,0.533,0.580,0.002780,0.0865,-9.265,0.0256,103.127,4


In [127]:
df_non_winners = df_all_others.drop_duplicates()

In [128]:
df_non_winners.shape

(8432, 16)

In [129]:
df_non_winners.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8432 entries, 0 to 598
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              8432 non-null   object 
 1   album             8432 non-null   object 
 2   artist            8432 non-null   object 
 3   release_date      8432 non-null   object 
 4   length            8432 non-null   int64  
 5   popularity        8432 non-null   int64  
 6   danceability      8432 non-null   float64
 7   acousticness      8432 non-null   float64
 8   danceability      8432 non-null   float64
 9   energy            8432 non-null   float64
 10  instrumentalness  8432 non-null   float64
 11  liveness          8432 non-null   float64
 12  loudness          8432 non-null   float64
 13  speechiness       8432 non-null   float64
 14  tempo             8432 non-null   float64
 15  time_signature    8432 non-null   int64  
dtypes: float64(9), int64(3), object(4)
memory u

## Merging the Winners with the Non-Winners data frame

In [130]:
frames2 = [df_winners, df_non_winners]

df_all = pd.concat(frames2)

In [131]:
df_all.head()

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,Nel Blu Dipinto Di Blu,Nel Blu Dipinto Di Blu,Domenico Modugno,2008-04-28,219120,56,0.547,0.982,0.547,0.0588,1.6e-05,0.193,-17.062,0.0405,129.51,4
1,Mack the Knife,That's All,Bobby Darin,1959,184333,62,0.549,0.76,0.549,0.529,0.0,0.206,-12.291,0.108,82.755,4
2,"The Theme from ""A Summer Place"" - Single Version",Percy Faith'S Greatest Hits,Percy Faith & His Orchestra,1953,144893,49,0.466,0.631,0.466,0.389,0.843,0.295,-12.825,0.0253,92.631,4
3,Moon River,The Essential Henry Mancini,Henry Mancini,1958,160693,39,0.235,0.966,0.235,0.264,0.0747,0.128,-12.817,0.0312,91.656,3
4,(I Left My Heart) In San Francisco,I Left My Heart In San Francisco,Tony Bennett,1962-06-18,170960,62,0.313,0.955,0.313,0.078,4e-06,0.169,-17.583,0.0332,128.065,4


In [132]:
df_all.shape

(8494, 16)

In [133]:
# Dropping any duplicates to create the final data frame

df_master = df_all.drop_duplicates()

In [134]:
df_master.shape

(8439, 16)

In [146]:
# Adding a new column to show Record of the Year winners
# All tracks in index 0 - 61 are winners

df_master['record_of_the_year'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_master['record_of_the_year'] = 1


In [152]:
df_master.head(63)

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature,record_of_the_year
0,Nel Blu Dipinto Di Blu,Nel Blu Dipinto Di Blu,Domenico Modugno,2008-04-28,219120,56,0.547,0.9820,0.547,0.0588,0.000016,0.1930,-17.062,0.0405,129.510,4,1
1,Mack the Knife,That's All,Bobby Darin,1959,184333,62,0.549,0.7600,0.549,0.5290,0.000000,0.2060,-12.291,0.1080,82.755,4,1
2,"The Theme from ""A Summer Place"" - Single Version",Percy Faith'S Greatest Hits,Percy Faith & His Orchestra,1953,144893,49,0.466,0.6310,0.466,0.3890,0.843000,0.2950,-12.825,0.0253,92.631,4,1
3,Moon River,The Essential Henry Mancini,Henry Mancini,1958,160693,39,0.235,0.9660,0.235,0.2640,0.074700,0.1280,-12.817,0.0312,91.656,3,1
4,(I Left My Heart) In San Francisco,I Left My Heart In San Francisco,Tony Bennett,1962-06-18,170960,62,0.313,0.9550,0.313,0.0780,0.000004,0.1690,-17.583,0.0332,128.065,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58,Hello,25,Adele,2016-06-24,295493,72,0.481,0.3360,0.481,0.4510,0.000000,0.0872,-6.095,0.0347,157.966,4,1
59,24K Magic,24K Magic,Bruno Mars,2016-11-17,225983,77,0.818,0.0340,0.818,0.8030,0.000000,0.1530,-4.282,0.0797,106.970,4,1
60,This Is America,This Is America,Childish Gambino,2018-05-06,225773,75,0.854,0.1170,0.854,0.4630,0.000000,0.3540,-6.159,0.1370,120.024,4,1
61,bad guy,"WHEN WE ALL FALL ASLEEP, WHERE DO WE GO?",Billie Eilish,2019-03-29,194087,88,0.701,0.3280,0.701,0.4250,0.130000,0.1000,-10.965,0.3750,135.128,4,1


In [154]:
df_master.iloc[62:8439, :]['record_of_the_year'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_master.iloc[62:8439, :]['record_of_the_year'] = 0


In [155]:
df_master

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature,record_of_the_year
0,Nel Blu Dipinto Di Blu,Nel Blu Dipinto Di Blu,Domenico Modugno,2008-04-28,219120,56,0.547,0.9820,0.547,0.0588,0.000016,0.1930,-17.062,0.0405,129.510,4,1
1,Mack the Knife,That's All,Bobby Darin,1959,184333,62,0.549,0.7600,0.549,0.5290,0.000000,0.2060,-12.291,0.1080,82.755,4,1
2,"The Theme from ""A Summer Place"" - Single Version",Percy Faith'S Greatest Hits,Percy Faith & His Orchestra,1953,144893,49,0.466,0.6310,0.466,0.3890,0.843000,0.2950,-12.825,0.0253,92.631,4,1
3,Moon River,The Essential Henry Mancini,Henry Mancini,1958,160693,39,0.235,0.9660,0.235,0.2640,0.074700,0.1280,-12.817,0.0312,91.656,3,1
4,(I Left My Heart) In San Francisco,I Left My Heart In San Francisco,Tony Bennett,1962-06-18,170960,62,0.313,0.9550,0.313,0.0780,0.000004,0.1690,-17.583,0.0332,128.065,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
594,Goodbye Girl,Goodbye Girl,David Gates,1978,168866,60,0.467,0.6050,0.467,0.2550,0.000310,0.2730,-15.963,0.0372,120.174,4,0
595,My Cherie Amour,My Cherie Amour,Stevie Wonder,1969-08-01,173306,0,0.570,0.2820,0.570,0.5230,0.000003,0.2480,-11.052,0.0277,101.137,4,0
596,How Deep Is Your Love (2007 Remastered Saturda...,Saturday Night Fever [The Original Movie Sound...,Bee Gees,1978-01-05,245200,0,0.630,0.0999,0.630,0.3570,0.000000,0.1280,-9.392,0.0260,104.965,4,0
597,Get Together,Get Together: The Essential Youngbloods,The Youngbloods,2002-05-06,276466,44,0.533,0.3510,0.533,0.5800,0.002780,0.0865,-9.265,0.0256,103.127,4,0


## Saving the data frame to a .csv

In [165]:
df_master.to_csv('Record_of_the_Year.csv')