In [1]:
import os
import glob
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import seaborn as sb
sb.set() # set the default Seaborn style for graphics

In [2]:
allsongs = pd.read_csv('datasets/allsongs.csv', dtype={'song_name': str})

# Cleaning dataset
allsongs = allsongs.drop(columns = ['type', 'id', 'uri', 'track_href', 'analysis_url', 'Unnamed: 0'])

# drop rows without song_name
#allsongs = allsongs.dropna(subset=['song_name'])

allsongs = allsongs.reset_index(drop=True)
allsongs.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,genre,song_name,title
0,0.831,0.814,2,-7.364,1,0.42,0.0598,0.0134,0.0556,0.389,156.985,124539,4,Dark Trap,Mercury: Retrograde,
1,0.719,0.493,8,-7.23,1,0.0794,0.401,0.0,0.118,0.124,115.08,224427,4,Dark Trap,Pathology,
2,0.85,0.893,5,-4.783,1,0.0623,0.0138,4e-06,0.372,0.0391,218.05,98821,4,Dark Trap,Symbiote,
3,0.476,0.781,0,-4.71,1,0.103,0.0237,0.0,0.114,0.175,186.948,123661,3,Dark Trap,ProductOfDrugs (Prod. The Virus and Antidote),
4,0.798,0.624,2,-7.668,1,0.293,0.217,0.0,0.166,0.591,147.988,123298,4,Dark Trap,Venom,


In [3]:
# Combining datasets of top songs
all_files = glob.glob("datasets/topsongs/*.csv")

list = []

for filename in all_files:
    decade = int(filename.split('.')[0].split('s/')[2])

    df = pd.read_csv(filename, index_col=None, header=0).assign(Decade=decade)
    list.append(df)

topsongs = pd.concat(list, axis=0, ignore_index=True)

# Example to get all songs from 2010s
topsongs.loc[topsongs['Decade'] == 2010]

Unnamed: 0,Number,title,artist,top genre,year,bpm,nrgy,dnce,dB,live,val,dur,acous,spch,pop,Decade
567,1,bad guy,Billie Eilish,electropop,2019,135,43,70,-11,10,56,194,33,38,94,2010
568,2,7 rings,Ariana Grande,dance pop,2019,140,32,78,-11,9,33,179,59,33,90,2010
569,3,Old Town Road - Remix,Lil Nas X,country rap,2019,136,62,88,-6,11,64,157,5,10,89,2010
570,4,SeÃ±orita,Shawn Mendes,canadian pop,2019,117,55,76,-6,8,75,191,4,3,88,2010
571,5,rockstar (feat. 21 Savage),Post Malone,dfw rap,2018,160,52,59,-6,13,13,218,12,7,88,2010
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
662,96,Stereo Hearts (feat. Adam Levine),Gym Class Heroes,dance pop,2011,90,80,65,-3,27,80,211,3,10,56,2010
663,97,Dynamite,Taio Cruz,dance pop,2011,120,79,76,-4,3,87,203,0,8,56,2010
664,98,Break Your Heart,Taio Cruz,dance pop,2011,122,89,67,-5,24,68,185,0,3,54,2010
665,99,Summer Air,ItaloBrothers,dance pop,2017,102,75,76,-4,8,49,184,6,7,43,2010


In [4]:
# Combining allsongs and topsongs to one dataframe
topsongs.loc[topsongs['Number'] == 1]

Unnamed: 0,Number,title,artist,top genre,year,bpm,nrgy,dnce,dB,live,val,dur,acous,spch,pop,Decade
0,1,Put Your Head On My Shoulder,Paul Anka,adult standards,2000,116,34,55,-9,10,47,155,75,3,72,1950
73,1,Here Comes The Sun - Remastered 2009,The Beatles,british invasion,1969,129,54,56,-10,18,39,186,3,3,82,1960
170,1,Bohemian Rhapsody - 2011 Mix,Queen,glam rock,1975,71,40,41,-10,30,22,354,27,5,84,1970
274,1,Livin' On A Prayer,Bon Jovi,glam metal,1986,123,89,53,-4,29,80,249,8,3,83,1980
379,1,No Scrubs,TLC,atl hip hop,1999,93,68,74,-4,7,59,214,3,10,79,1990
467,1,In the End,Linkin Park,alternative metal,2000,105,86,56,-6,21,40,217,1,6,83,2000
567,1,bad guy,Billie Eilish,electropop,2019,135,43,70,-11,10,56,194,33,38,94,2010


In [5]:
allsongsarr = allsongs['song_name'].str.lower().to_numpy()
print(allsongsarr)

['mercury: retrograde' 'pathology' 'symbiote' ... nan nan nan]


In [6]:
count = 0

# song names not found
for index, row in topsongs.iterrows():
    if row["title"].lower() not in allsongsarr:
        count +=1 
        print(row["title"], row["Decade"])

Put Your Head On My Shoulder 1950
Whatever Will Be Will Be (Que Sera Sera) (with Frank DeVol & His Orchestra) - Single Version 1950
Everybody Loves Somebody 1950
Take Good Care Of My Baby - 1990 Remastered 1950
A Teenager In Love 1950
She's Got You - Single Version 1950
Puppy Love - Remix 1950
Travelin' Man - Remastered 1950
Mr. Blue 1950
Itsy Bitsy Teenie Weenie Yellow Polka Dot Bikini 1950
Lonesome Town - 1990 Digital Remaster 1950
16 Candles 1950
If I Give My Heart to You (with The Mellomen) - 78rpm Version 1950
Sh-Boom - Single Version 1950
Sh-Boom (Life Could Be A Dream) 1950
It's All In The Game 1950
You Mean Everything to Me 1950
April Love 1950
Surf City - Remastered 1990/Stereo Remix 1950
I've Grown Accustomed To Her Face - Remastered/1998 1950
Rubber Ball 1950
Chicago - Remastered 1950
But Not For Me 1950
In The Still Of The Night 1950
Only Because 1950
Che La Luna 1950
My Happiness 1950
Brown Eyed Handsome Man 1950
Poetry In Motion 1950
To The Aisle 1950
Tulips From Amsterda

Brother Louie 1980
Running Up That Hill (A Deal With God) 1980
End Of The Line 1980
Every Breath You Take 1980
Up Where We Belong - From "An Officer And A Gentleman" 1980
Woman in Love 1980
Cheri Cheri Lady 1980
Higher Love - Single Version 1980
That's What Friends Are For 1980
Slave To Love 1980
Now And Forever 1980
Big in Japan 1980
Valerie 1980
Blue Monday '88 1980
Nothing's Gonna Change My Love For You 1980
Sledgehammer 1980
Rhythm Of The Night 1980
Jessie's Girl 1980
Sunshine Reggae 1980
Wonderful Life 1980
The Riddle 1980
Papa Don't Preach 1980
Didn't We Almost Have It All 1980
All Right 1980
Biggest Part of Me 1980
Save a Prayer - 2009 Remaster 1980
Baby Come To Me 1980
Fame - Rerecorded 1980
Hard Habit to Break - 2006 Remaster 1980
Rock You Like a Hurricane 1980
When the Children Cry 1980
Is This Love - 2017 Remaster 1980
I Knew You Were Waiting (For Me) 1980
You Keep Me Hangin On 1980
Dancing with Tears in My Eyes 1980
In The Army Now 1980
Here I Go Again - 2017 Remaster 1980


In [7]:
print(count)

490


In [8]:
# https://www.kaggle.com/akiboy96/spotify-dataset