### Cleaning and Processing Data

In [23]:
# Importing required libraries
import pandas as pd
import os
import datetime
import math
import numpy as np

# Reading CSV files
data = 'data/data.csv'
genres = 'data/data_w_genres.csv'
mus_data = pd.read_csv(data)
genres_data = pd.read_csv(genres)

In [24]:
# Cleaning genres column's str values
genres_data['genres'] = genres_data['genres'].str.replace(r"[\"\])([']", '')
genres_data['artists'] = genres_data['artists'].str.replace(r"[\"\])(*[']", '')
genres_data['artists'] = genres_data['artists'].str.replace(r"[$]", 'S')

In [25]:
# Keeping only selected columns for genres
genres_data = genres_data[['artists', 'genres']]

In [26]:
# Cleaning main data column's str values
mus_data['artists'] = mus_data['artists'].str.replace(r"[\"\])(*[']", '')
mus_data['artists'] = mus_data['artists'].str.replace(r"$", 'S')

In [27]:
# Creating decades column to sort data by 10 years intervals
mus_data['decade'] = np.floor(mus_data['year']/10)*10

In [28]:
# Assigning Int64 to decades column values
mus_data = mus_data.astype({"decade": int})

In [29]:
# Adding additional artist_songs column to get accurate song's counts per decades
mus_data['artist_song'] = mus_data['artists'] + ' - ' + mus_data['name']

In [30]:
# Merging dataframes - genres data with main data
merged_data = mus_data.merge(genres_data, on='artists', how='left')
# merged_data.shape

In [31]:
# Replacing NaN values with empty values after merging
merged_data['genres'].fillna('', inplace=True)
# merged_data.shape

In [32]:
# Sorting data by duration in order to drop non-music items
merged_data = merged_data.sort_values('duration_ms',ascending=False)

In [33]:
# Cleaning merged data, dropping audio books
merged_data.drop(merged_data.loc[merged_data['duration_ms'] >= 1000000].index, inplace=True)
merged_data.drop(merged_data.loc[merged_data['artists'] == 'Georgette Heyer, Irina Salkow'].index, inplace=True)
merged_data.drop(merged_data.loc[merged_data['artists'] == 'Трумен Капоте'].index, inplace=True)
merged_data.drop(merged_data.loc[merged_data['artists'] == 'Seweryn Goszczyński'].index, inplace=True)
merged_data.drop(merged_data.loc[merged_data['artists'] == 'H.P. Lovecraft'].index, inplace=True)
merged_data.drop(merged_data.loc[merged_data['artists'] == 'Эрих Мария Ремарк'].index, inplace=True)
merged_data.drop(merged_data.loc[merged_data['artists'] == 'Эрнест Хемингуэй'].index, inplace=True)

In [34]:
merged_data['release_date'] = pd.to_datetime(merged_data['release_date'], errors='coerce')
merged_data['release_date'] = pd.DatetimeIndex(merged_data['release_date']).year

In [35]:
merged_data = merged_data.drop_duplicates(subset='artist_song', keep="first")
merged_data.reset_index(drop=True, inplace=True)

In [37]:
# Rearranging and renaming columns order
merged_data.columns = ['Decade', 'Year', 'Artist', 'Song', 'Genre', 'Duration_ms', 'ArtistSong', 'Release_Date', 'Valence', 'Acousticness', 'Danceability', 'Energy', 'Explicit', 'Instrumentalness', 'Key', 'Liveness', 'Loudness', 'Mode', 'Popularity', 'Speechiness', 'Tempo', 'ID']
merged_data.shape

(154093, 22)

In [38]:
# Saving master clean data to csv file
merged_data.to_csv('data/clean_data_all.csv', encoding='utf-8', index=False)
# merged_data.head()

### Dividing data into decades to find out top-10 artists

In [60]:
# 1920s decade

mus_1920s = merged_data[merged_data['Decade']==1920]
top10songs20s = mus_1920s.sort_values(['Popularity'], ascending=False)
top10songs20s.head(5)

Unnamed: 0,Decade,Release Year,Artist/Band,Song,Genre,Duration_ms,ArtistSong,Release Date,Valence,Acousticness,...,Explicit,Instrumentalness,Key,Liveness,Loudness,Mode,Popularity,Speechiness,Tempo,ID
806,1920,1926,Fats Waller,Ain't Misbehavin',"adult standards, bebop, dixieland, harlem rena...",237773,Fats Waller - Ain't Misbehavin',1926,0.35,0.821,...,0,0.00193,0,0.19,-16.918,0,49,0.0575,98.358,3BFRqZFLSrqtQr6cjHbAxU
1207,1920,1928,Blind Willie Johnson,"Dark Was the Night, Cold Was the Ground","acoustic blues, appalachian folk, blues, count...",198560,"Blind Willie Johnson - Dark Was the Night, Col...",1928,0.2,0.968,...,0,0.0453,9,0.0976,-23.179,1,47,0.044,116.117,2P9nh9pTK96dE0b6NBbTSs
807,1920,1926,Fats Waller,Two Sleepy People,"adult standards, bebop, dixieland, harlem rena...",183560,Fats Waller - Two Sleepy People,1926,0.54,0.725,...,0,6.5e-05,5,0.169,-18.882,1,45,0.048,87.221,0eInL1bLAFdHJDL04R0OCP
222,1920,1923,Bessie Smith,Nobody Knows You When You're Down and Out,"blues, harlem renaissance, jazz blues, traditi...",177133,Bessie Smith - Nobody Knows You When You're Do...,1923,0.211,0.996,...,0,0.00293,4,0.183,-12.033,1,42,0.0535,89.822,2wAfHM7Whz67VFbdanhZlk
1208,1920,1928,Blind Willie Johnson,It's Nobody's Fault but Mine,"acoustic blues, appalachian folk, blues, count...",188533,Blind Willie Johnson - It's Nobody's Fault but...,1928,0.483,0.98,...,0,7.4e-05,3,0.11,-14.01,1,42,0.039,83.901,0EzykG4pYlmmGTihakWGRL


In [61]:
a20 = mus_1920s['Artist/Band'].value_counts().sort_values(ascending=False).head(10)
top10artists20s = pd.DataFrame({'Artist/Band':a20.index}).merge(mus_1920s, how='left')
top10artists20s['Artist/Band'].value_counts().nlargest(10)

Francisco Canaro                      666
Ignacio Corsini                       405
Francisco Canaro, Charlo              239
Frédéric Chopin, Vladimir Horowitz     82
Fats Waller                            61
Ted Weems & His Orchestra              47
Louis Armstrong & His Hot Five         47
George Olsen                           46
Frédéric Chopin, Arthur Rubinstein     45
Leroy Carr                             40
Name: Artist/Band, dtype: int64

In [17]:
# 1930s decade

mus_1930s = merged_data[merged_data['Decade']==1930]
top10songs30s = mus_1930s.sort_values(['Popularity'], ascending=False)
top10songs30s.head(5)

Unnamed: 0,decade,year,artists,name,genres,duration_ms,artist_song,release_date,valence,acousticness,...,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,id
1606,1930,1930,Joe Quartz,Monster Faladoré,,285962,Joe Quartz - Monster Faladoré,1930,0.377,0.811,...,0,0.0097,6,0.143,-10.494,0,55,0.0641,95.234,2zFXOImEoSnLb5dnPAvVNI
3006,1930,1937,Robert Johnson,Cross Road Blues,"acoustic blues, blues, blues rock, delta blues...",149560,Robert Johnson - Cross Road Blues,1937,0.638,0.93,...,0,0.000261,9,0.102,-11.959,1,54,0.0525,97.159,1TrGdXSgiBm8W68D2K1COG
3407,1930,1939,Glenn Miller,Moonlight Serenade,"adult standards, big band, easy listening, jaz...",202760,Glenn Miller - Moonlight Serenade,1939,0.147,0.862,...,0,0.0998,3,0.146,-11.857,1,54,0.0265,77.873,3ziJFd6JeioC8Xfct0UXpJ
2208,1930,1933,Art Tatum,Tea for Two,"bebop, big band, contemporary post-bop, cool j...",193867,Art Tatum - Tea for Two,1933,0.525,0.991,...,0,0.864,1,0.14,-13.02,1,53,0.0347,166.973,0Otf1ZfYNIjhqFIuJk0fsy
3551,1930,1939,Glenn Miller,In the Mood - Live,"adult standards, big band, easy listening, jaz...",209573,Glenn Miller - In the Mood - Live,1939,0.592,0.824,...,0,0.917,8,0.372,-9.72,1,52,0.0659,175.204,54h1RKrrFJDsNOfhwmqu9o


In [18]:
a30 = mus_1930s['artists'].value_counts().sort_values(ascending=False).head(10)
top10artists30s = pd.DataFrame({'artists':a30.index}).merge(mus_1930s, how='left')
top10artists30s['artists'].value_counts().nlargest(10)

Francisco Canaro, Charlo                210
Sinclair Lewis, Frank Arnold            204
Francisco Canaro                        197
Francisco Canaro, Ernesto Fama          194
Francisco Canaro, Roberto Maida         180
Ignacio Corsini                         138
Umm Kulthum                             115
Franz Joseph Haydn, Pro Arte Quartet    113
Lead Belly                              112
Ernst H. Gombrich, Christoph Waltz      109
Name: artists, dtype: int64

In [19]:
# 1930s decade

mus_1940s = merged_data[merged_data['decade']==1940]
top10songs40s = mus_1940s.sort_values(['popularity'], ascending=False)
top10songs40s.head(5)

Unnamed: 0,decade,year,artists,name,genres,duration_ms,artist_song,release_date,valence,acousticness,...,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,id
4607,1940,1945,Bing Crosby,It's Beginning To Look Like Christmas,"adult standards, easy listening",166000,Bing Crosby - It's Beginning To Look Like Chri...,1945,0.576,0.77,...,0,0.0,0,0.136,-15.018,0,71,0.0496,96.94,44mYhOVgerj2qPjkGDVA6n
4406,1940,1944,Judy Garland,Have Yourself A Merry Little Christmas,"adult standards, hollywood, lounge, torch song...",162333,Judy Garland - Have Yourself A Merry Little Ch...,1944,0.107,0.97,...,0,0.00458,6,0.26,-14.82,1,70,0.0393,180.887,030mot3ZKR3oskfMsqDB2R
5006,1940,1947,Gene Autry,Here Comes Santa Claus (Right Down Santa Claus...,"adult standards, cowboy western, oklahoma coun...",150267,Gene Autry - Here Comes Santa Claus (Right Dow...,1947,0.976,0.799,...,0,0.0,1,0.275,-11.99,1,70,0.0358,96.628,25leEEaz1gIpp7o21Fqyjo
4606,1940,1945,"Bing Crosby, The Andrews Sisters",Mele Kalikimaka (Merry Christmas),,175333,"Bing Crosby, The Andrews Sisters - Mele Kaliki...",1945,0.62,0.646,...,0,0.0,3,0.284,-12.431,1,69,0.0935,202.481,04vLj9QUXoKdRlsp3gkURo
5407,1940,1949,"Erik Satie, Philippe Entremont",Gymnopédie No. 1,,205867,"Erik Satie, Philippe Entremont - Gymnopédie No. 1",1949,0.354,0.994,...,0,0.937,7,0.0941,-36.856,1,67,0.119,72.765,5NGtFXVpXSvwunEIGeviY3


In [20]:
a40 = mus_1940s['artists'].value_counts().sort_values(ascending=False).head(10)
top10artists40s = pd.DataFrame({'artists':a40.index}).merge(mus_1940s, how='left')
top10artists40s['artists'].value_counts().nlargest(10)

Orchestra Studio 7                              221
Lead Belly                                      153
Frédéric Chopin, Arthur Rubinstein              120
Shamshad Begum                                  118
Giuseppe Verdi, Arturo Toscanini                107
Lata Mangeshkar                                  95
Amirbai Karnataki                                94
Francisco Canaro, Carlos Roldán                  75
Igor Stravinsky, Columbia Symphony Orchestra     73
Geeta Dutt                                       70
Name: artists, dtype: int64

In [21]:
# 1950s decade

mus_1950s = merged_data[merged_data['decade']==1950]
top10songs50s = mus_1950s.sort_values(['popularity'], ascending=False)
top10songs50s.head(5)

Unnamed: 0,decade,year,artists,name,genres,duration_ms,artist_song,release_date,valence,acousticness,...,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,id
7406,1950,1959,Dean Martin,Let It Snow! Let It Snow! Let It Snow!,"adult standards, easy listening, lounge, vocal...",117147,Dean Martin - Let It Snow! Let It Snow! Let It...,1959,0.701,0.912,...,0,2e-06,1,0.175,-14.014,1,81,0.0351,134.009,2uFaJJtFpPDc5Pa95XzTvg
7409,1950,1959,Chuck Berry,Johnny B. Goode,"blues rock, classic rock, rock, rock-and-roll,...",161560,Chuck Berry - Johnny B. Goode,1959,0.969,0.741,...,0,6.1e-05,10,0.307,-9.129,1,75,0.0743,167.983,2QfiRTz5Yc8DdShCxG1tB2
7006,1950,1957,Elvis Presley,Blue Christmas,"rock-and-roll, rockabilly",129173,Elvis Presley - Blue Christmas,1957,0.58,0.9,...,0,0.00104,4,0.145,-16.661,1,74,0.0425,94.518,3QiAAp20rPC3dcAtKtMaqQ
7007,1950,1957,"Ella Fitzgerald, Louis Armstrong",Dream A Little Dream Of Me - Single Version,,185160,"Ella Fitzgerald, Louis Armstrong - Dream A Lit...",1957,0.394,0.913,...,0,0.0,0,0.191,-17.042,1,72,0.101,76.497,78MI7mu1LV1k4IA2HzKmHe
7206,1950,1958,Frank Sinatra,Come Fly With Me - Remastered,"adult standards, easy listening, lounge",199093,Frank Sinatra - Come Fly With Me - Remastered,1958,0.493,0.845,...,0,0.0,6,0.165,-11.376,1,70,0.042,67.008,4hHbeIIKO5Y5uLyIEbY9Gn


In [22]:
a50 = mus_1950s['artists'].value_counts().sort_values(ascending=False).head(10)
top10artists50s = pd.DataFrame({'artists':a50.index}).merge(mus_1950s, how='left')
top10artists50s['artists'].value_counts().nlargest(10)

Ella Fitzgerald                       243
Dean Martin                           235
Miles Davis                           205
Lata Mangeshkar                       202
Unspecified                           189
Frank Sinatra                         179
Oscar Peterson                        158
Johann Sebastian Bach, Glenn Gould    148
Billie Holiday                        141
Duke Ellington                        112
Name: artists, dtype: int64

In [23]:
# 1960s decade

mus_1960s = merged_data[merged_data['decade']==1960]
top10songs60s = mus_1960s.sort_values(['popularity'], ascending=False)
top10songs60s.head(5)

Unnamed: 0,decade,year,artists,name,genres,duration_ms,artist_song,release_date,valence,acousticness,...,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,id
8406,1960,1964,Brenda Lee,Rockin' Around The Christmas Tree,"adult standards, brill building pop",126267,Brenda Lee - Rockin' Around The Christmas Tree,1964,0.898,0.614,...,0,0.0,8,0.505,-8.749,1,85,0.0502,67.196,2EjXfH91m7f8HiJN1yQg97
8206,1960,1963,Andy Williams,It's the Most Wonderful Time of the Year,"adult standards, brill building pop, easy list...",151933,Andy Williams - It's the Most Wonderful Time o...,1963,0.776,0.766,...,0,0.0,7,0.117,-8.435,1,83,0.0369,201.629,5hslUAKq9I9CG2bAulFkHN
9406,1960,1969,Creedence Clearwater Revival,Fortunate Son,"album rock, classic rock, country rock, rock, ...",140773,Creedence Clearwater Revival - Fortunate Son,1969,0.663,0.201,...,0,0.00806,0,0.152,-7.516,1,81,0.0374,132.77,4BP3uh0hFLFRb5cjsgLqDh
9007,1960,1967,"Marvin Gaye, Tammi Terrell",Ain't No Mountain High Enough,,151667,"Marvin Gaye, Tammi Terrell - Ain't No Mountain...",1967,0.8,0.43,...,0,0.0,7,0.184,-10.87,1,80,0.032,129.991,7tqhbajSfrz2F7E1Z75ASX
8207,1960,1963,The Ronettes,Sleigh Ride,"brill building pop, classic girl group, motown...",181267,The Ronettes - Sleigh Ride,1963,0.853,0.403,...,0,2e-06,2,0.316,-7.013,1,80,0.0287,91.751,5ASM6Qjiav2xPe7gRkQMsQ


In [24]:
a60 = mus_1960s['artists'].value_counts().sort_values(ascending=False).head(10)
top10artists60s = pd.DataFrame({'artists':a60.index}).merge(mus_1960s, how='left')
top10artists60s['artists'].value_counts().nlargest(10)

The Beach Boys        356
Frank Sinatra         299
Bob Dylan             252
Elvis Presley         236
The Beatles           220
Johnny Cash           205
The Rolling Stones    171
Nina Simone           160
Sam Cooke             147
The Kinks             147
Name: artists, dtype: int64

In [25]:
# 1970s decade

mus_1970s = merged_data[merged_data['decade']==1970]
top10songs70s = mus_1970s.sort_values(['popularity'], ascending=False)
top10songs70s.head(5)

Unnamed: 0,decade,year,artists,name,genres,duration_ms,artist_song,release_date,valence,acousticness,...,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,id
11406,1970,1979,AC/DC,Highway to Hell,"album rock, australian rock, hard rock, rock",208400,AC/DC - Highway to Hell,1979,0.423,0.061,...,0,0.00158,6,0.156,-4.793,0,84,0.133,115.728,2zYzyRzz6pRmhPzyfMEC8s
10806,1970,1976,Eagles,Hotel California - 2013 Remaster,"album rock, classic rock, country rock, folk r...",391376,Eagles - Hotel California - 2013 Remaster,1976,0.609,0.00574,...,0,0.000494,2,0.0575,-9.484,1,83,0.027,147.125,40riOy7x9W7GXjyGp4pjAv
10006,1970,1972,Elton John,"Rocket Man (I Think It's Going To Be A Long, L...","glam rock, mellow gold, piano rock, soft rock",281613,Elton John - Rocket Man (I Think It's Going To...,1972,0.341,0.432,...,0,6e-06,10,0.0925,-9.119,1,82,0.0286,136.571,3gdewACMIVMEWVbyb8O9sY
10406,1970,1974,Lynyrd Skynyrd,Sweet Home Alabama,"album rock, blues rock, classic rock, country ...",283800,Lynyrd Skynyrd - Sweet Home Alabama,1974,0.886,0.181,...,0,0.000331,7,0.0863,-12.145,1,82,0.0255,97.798,7e89621JPkKaeDSTQ3avtg
11008,1970,1977,Electric Light Orchestra,Mr. Blue Sky,"album rock, art rock, beatlesque, bow pop, cla...",303373,Electric Light Orchestra - Mr. Blue Sky,1977,0.477,0.652,...,0,4e-06,10,0.248,-10.054,1,81,0.0328,177.784,2RlgNHKcydI9sayD2Df2xp


In [26]:
a70 = mus_1970s['artists'].value_counts().sort_values(ascending=False).head(10)
top10artists70s = pd.DataFrame({'artists':a70.index}).merge(mus_1970s, how='left')
top10artists70s['artists'].value_counts().nlargest(10)

Queen                       181
Fleetwood Mac               173
Bob Dylan                   168
Led Zeppelin                154
Bob Marley & The Wailers    148
Elton John                  136
The Rolling Stones          122
Elvis Presley               113
David Bowie                 113
Marvin Gaye                 111
Name: artists, dtype: int64

In [27]:
# 1980s decade

mus_1980s = merged_data[merged_data['decade']==1980]
top10songs80s = mus_1980s.sort_values(['popularity'], ascending=False)
top10songs80s.head(5)

Unnamed: 0,decade,year,artists,name,genres,duration_ms,artist_song,release_date,valence,acousticness,...,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,id
11606,1980,1980,AC/DC,Back In Black,"album rock, australian rock, hard rock, rock",255493,AC/DC - Back In Black,1980,0.763,0.011,...,0,0.00965,9,0.0828,-5.678,1,84,0.047,188.386,08mG3Y1vljYA6bvDt4Wqkj
12208,1980,1983,The Police,Every Breath You Take,"album rock, art rock, classic rock, dance rock...",253920,The Police - Every Breath You Take,1983,0.74,0.543,...,0,0.00294,1,0.0714,-9.796,1,84,0.0348,117.401,1JSTJqkT5qHq8MDJnJbRE1
12006,1980,1982,TOTO,Africa,"album rock, classic rock, mellow gold, rock, s...",295893,TOTO - Africa,1982,0.732,0.257,...,0,8e-05,9,0.0481,-18.064,1,83,0.0323,92.718,2374M0fQpWi3dLnB54qaLX
13006,1980,1987,Whitney Houston,I Wanna Dance with Somebody (Who Loves Me),"dance pop, pop, urban contemporary",291293,Whitney Houston - I Wanna Dance with Somebody ...,1987,0.867,0.207,...,0,0.000307,1,0.0888,-8.824,1,82,0.0453,118.818,2tUBqZG2AbRi7Q0BIrVrEj
12406,1980,1984,Bryan Adams,Summer Of '69,"album rock, canadian pop, canadian singer-song...",216053,Bryan Adams - Summer Of '69,1984,0.774,0.0183,...,0,0.0,2,0.0732,-6.205,1,82,0.0386,139.131,0GONea6G2XdnHWjNZd6zt3


In [28]:
a80 = mus_1980s['artists'].value_counts().sort_values(ascending=False).head(10)
top10artists80s = pd.DataFrame({'artists':a80.index}).merge(mus_1980s, how='left')
top10artists80s['artists'].value_counts().nlargest(10)

Queen                115
U2                   111
Talking Heads        109
The Smiths           104
Bruce Springsteen    100
Depeche Mode          96
Metallica             89
The Cure              87
Prince                86
R.E.M.                84
Name: artists, dtype: int64

In [29]:
# 1990s decade

mus_1990s = merged_data[merged_data['decade']==1990]
top10songs90s = mus_1990s.sort_values(['popularity'], ascending=False)
top10songs90s.head(5)

Unnamed: 0,decade,year,artists,name,genres,duration_ms,artist_song,release_date,valence,acousticness,...,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,id
14406,1990,1994,Mariah Carey,All I Want for Christmas Is You,"dance pop, pop, r&b, urban contemporary",241107,Mariah Carey - All I Want for Christmas Is You,1994,0.35,0.164,...,0,0.0,7,0.0708,-7.463,1,88,0.0384,150.273,0bYg9bo50gSsH3LtXe2SQn
13606,1990,1990,AC/DC,Thunderstruck,"album rock, australian rock, hard rock, rock",292880,AC/DC - Thunderstruck,1990,0.259,0.000147,...,0,0.0117,4,0.217,-5.175,1,83,0.0364,133.52,57bgtoPSgt236HzfBOd8kj
13812,1990,1991,R.E.M.,Losing My Religion,"alternative rock, classic rock, permanent wave...",268427,R.E.M. - Losing My Religion,1991,0.803,0.179,...,0,1e-06,9,0.0987,-5.051,0,82,0.0295,125.639,31AOj9sFz2gM0O3hMARRBx
14206,1990,1993,Radiohead,Creep,"alternative rock, art rock, melancholia, oxfor...",238640,Radiohead - Creep,1993,0.104,0.0102,...,1,0.000141,7,0.129,-9.935,1,82,0.0369,91.841,6b2oQwSGFkzsMtQruIWm2p
15410,1990,1999,Red Hot Chili Peppers,Californication,"alternative rock, funk metal, funk rock, perma...",329733,Red Hot Chili Peppers - Californication,1999,0.328,0.0021,...,0,0.00165,9,0.127,-2.788,0,81,0.027,96.483,48UPSzbZjgc449aqz8bxox


In [30]:
a90 = mus_1990s['artists'].value_counts().sort_values(ascending=False).head(10)
top10artists90s = pd.DataFrame({'artists':a90.index}).merge(mus_1990s, how='left')
top10artists90s['artists'].value_counts().nlargest(10)

Metallica                89
Joan Sebastian           85
Sublime                  82
Green Day                78
The Smashing Pumpkins    68
2Pac                     68
Mariah Carey             66
Los Tigres Del Norte     65
Luis Miguel              65
Alice In Chains          65
Name: artists, dtype: int64

In [31]:
# 2000s decade

mus_2000s = merged_data[merged_data['decade']==2000]
top10songs00s = mus_2000s.sort_values(['popularity'], ascending=False)
top10songs00s.head(5)

Unnamed: 0,decade,year,artists,name,genres,duration_ms,artist_song,release_date,valence,acousticness,...,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,id
15608,2000,2000,Linkin Park,In the End,"alternative metal, nu metal, post-grunge, rap ...",216880,Linkin Park - In the End,2000,0.4,0.00958,...,0,0.0,3,0.209,-5.87,0,84,0.0584,105.143,60a0Rd6pjrkxjPbaKzXjfq
17207,2000,2008,Jason Mraz,I'm Yours,"acoustic pop, neo mellow, pop, pop rock",242187,Jason Mraz - I'm Yours,2008,0.718,0.595,...,0,0.0,11,0.105,-8.322,1,83,0.0468,150.953,1EzrEOXmMH3G43AXT1y7pA
16611,2000,2005,"Shakira, Wyclef Jean",Hips Don't Lie (feat. Wyclef Jean),,218093,"Shakira, Wyclef Jean - Hips Don't Lie (feat. W...",2005,0.756,0.284,...,0,0.0,10,0.405,-5.892,0,83,0.0712,100.024,3ZFTkvIE7kyPt6Nu3PEa7V
16806,2000,2006,Wham!,Last Christmas - Remastered,"dance pop, dance rock, disco, europop, new rom...",267160,Wham! - Last Christmas - Remastered,2006,0.861,0.262,...,0,0.0,11,0.221,-7.964,0,83,0.0282,106.856,6wn2nmFn3wDuiMldRiuRuL
16206,2000,2003,Linkin Park,Numb,"alternative metal, nu metal, post-grunge, rap ...",185587,Linkin Park - Numb,2003,0.243,0.0046,...,0,0.0,9,0.639,-4.153,1,82,0.0381,110.018,2nLtzopw4rPReszdYBJU6h


In [32]:
b00 = mus_2000s['artists'].value_counts().sort_values(ascending=False).head(10)
top10artists00s = pd.DataFrame({'artists':b00.index}).merge(mus_2000s, how='left')
top10artists00s['artists'].value_counts().nlargest(10)

John Mayer               69
Eminem                   68
John Williams            61
Jack Johnson             58
System Of A Down         55
Fall Out Boy             55
Radiohead                51
Red Hot Chili Peppers    50
Jay Chou                 48
Breaking Benjamin        47
Name: artists, dtype: int64

In [33]:
# 2010s decade

mus_2010s = merged_data[merged_data['decade']==2010]
top10songs10s = mus_2010s.sort_values(['popularity'], ascending=False)
top10songs10s.head(5)

Unnamed: 0,decade,year,artists,name,genres,duration_ms,artist_song,release_date,valence,acousticness,...,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,id
19407,2010,2019,Harry Styles,Watermelon Sugar,"pop, post-teen pop",174000,Harry Styles - Watermelon Sugar,2019,0.557,0.122,...,0,0.0,0,0.335,-4.209,1,94,0.0465,95.39,6UelLqGlWMcVH1E5c4H7lY
19426,2010,2019,"Topic, A7S",Breaking Me,,166794,"Topic, A7S - Breaking Me",2019,0.664,0.223,...,0,0.0,8,0.129,-5.652,0,92,0.218,122.031,3H7ihDc1dqLriiWXwsc2po
19406,2010,2019,Clairo,Sofia,"bedroom pop, boston indie",188387,Clairo - Sofia,2019,0.641,0.598,...,0,0.00372,0,0.231,-9.805,1,90,0.039,112.997,7B3z0ySL9Rr0XvZEAjWZzM
19210,2010,2018,"Billie Eilish, Khalid",lovely (with Khalid),,200186,"Billie Eilish, Khalid - lovely (with Khalid)",2018,0.12,0.934,...,0,0.0,4,0.095,-10.109,0,89,0.0333,115.284,0u2P5u6lvoDfwTYjAADbn4
19417,2010,2019,Harry Styles,Golden,"pop, post-teen pop",208907,Harry Styles - Golden,2019,0.254,0.21,...,0,0.000131,4,0.131,-5.257,0,89,0.0557,139.863,45S5WTQEGOB1VHr1Q4FuPl


In [34]:
b10 = mus_2010s['artists'].value_counts().sort_values(ascending=False).head(10)
top10artists10s = pd.DataFrame({'artists':b10.index}).merge(mus_2010s, how='left')
top10artists10s['artists'].value_counts().nlargest(10)

One Direction    95
BTS              93
Drake            92
Taylor Swift     91
Mac Miller       88
Lana Del Rey     66
The Weeknd       63
Ariana Grande    57
J. Cole          55
SuicideBoyS      49
Name: artists, dtype: int64

In [35]:
# 2020s decade

mus_2020s = merged_data[merged_data['decade']==2020]
top10songs20s = mus_2020s.sort_values(['popularity'], ascending=False)
top10songs20s.head(5)

Unnamed: 0,decade,year,artists,name,genres,duration_ms,artist_song,release_date,valence,acousticness,...,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,id
19611,2020,2020,"Bad Bunny, Jhay Cortez",Dakiti,,205090,"Bad Bunny, Jhay Cortez - Dakiti",2020,0.145,0.401,...,1,5.2e-05,4,0.113,-10.059,0,100,0.0544,109.928,47EiUVwUp4C9fGccaPuUCS
19606,2020,2020,"24kGoldn, iann dior",Mood (feat. iann dior),,140526,"24kGoldn, iann dior - Mood (feat. iann dior)",2020,0.756,0.221,...,1,0.0,7,0.272,-3.558,0,99,0.0369,90.989,3tjFYV6RSFtuktYl3ZtYcq
19610,2020,2020,Ariana Grande,positions,"pop, post-teen pop",172325,Ariana Grande - positions,2020,0.682,0.468,...,1,0.0,0,0.0931,-4.771,1,96,0.0878,144.015,35mvY5S1H3J2QZyna3TFe0
19616,2020,2020,The Weeknd,Blinding Lights,"canadian contemporary r&b, canadian pop, pop",200040,The Weeknd - Blinding Lights,2020,0.334,0.00146,...,0,9.5e-05,1,0.0897,-5.934,1,96,0.0598,171.005,0VjIjW4GlUZAMYd2vXMi3b
19608,2020,2020,"Cardi B, Megan Thee Stallion",WAP (feat. Megan Thee Stallion),,187541,"Cardi B, Megan Thee Stallion - WAP (feat. Mega...",2020,0.357,0.0194,...,1,0.0,1,0.0824,-7.509,1,96,0.375,133.073,4Oun2ylbjFKMPTiaSbbCih


In [36]:
b20 = mus_2020s['artists'].value_counts().sort_values(ascending=False).head(10)
top10artists20s = pd.DataFrame({'artists':b20.index}).merge(mus_2020s, how='left')
top10artists20s['artists'].value_counts().nlargest(10)

YoungBoy Never Broke Again    32
Future, Lil Uzi Vert          23
BTS                           22
Taylor Swift                  16
Juice WRLD                    16
The Weeknd                    15
Lil Uzi Vert                  15
The Kid LAROI                 14
Chris Stapleton               14
J Balvin                      13
Name: artists, dtype: int64

In [37]:
## All Decades Data Together

# all_mus = mus_1920s.append([mus_1930s, mus_1940s, mus_1950s, mus_1960s, mus_1970s, mus_1980s, mus_1990s, mus_2000s, mus_2010s, mus_2020s])
# all_mus.to_csv('data/clean_data_decade.csv', encoding='utf-8', index=False)

In [None]:
# all_mus.shape