# Import Dependencies 

In [1]:
from scipy.stats import linregress
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
import numpy as np
import requests
import time
import json

## Load the Data

In [2]:
# read the saved data:
music_df = pd.read_csv("../Output_Data/all_data_genre.csv", encoding = "Latin-1")
print(len(music_df))
music_df.head()

817


Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%,Deezer Album ID,Deezer Genre
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,Major,80,89,83,31,0,8,4,463574485,Asian Music
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,Major,71,61,74,7,0,10,4,410083687,Rap/Hip Hop
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,Major,51,32,53,17,0,31,6,484372295,Pop
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,Major,55,58,72,11,0,11,15,108447472,Pop
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,Minor,65,23,80,14,63,11,6,442984025,Latin Music


## Remove Duplicated Rows and Unwanted Columns

In [3]:
# Using Pandas remove duplicated Rows
unduplicated_music_df = music_df.drop_duplicates(subset=["track_name", "artist(s)_name",
                                                         "released_year", "released_month", 
                                                         "Deezer Album ID"], keep=False,\
                                                         inplace=False)
len(unduplicated_music_df)

813

In [4]:
# Remove Unwanted Columns:
final_music_df = unduplicated_music_df.drop(columns=["in_shazam_charts", "released_day",
                                                      "mode", "danceability_%", "valence_%",
                                                      "energy_%", "acousticness_%",
                                                      "instrumentalness_%", "liveness_%",
                                                      "speechiness_%", "Deezer Album ID",
                                                      "in_spotify_charts", "in_apple_charts",
                                                      "in_deezer_charts"])
# final_music_df.columns
final_music_df.head(1)

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,in_spotify_playlists,streams,in_apple_playlists,in_deezer_playlists,bpm,key,Deezer Genre
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,553,141381703,43,45,125,B,Asian Music


### Rename Columns Then Remove Empty Cells

In [5]:
# First, using pandas Rename the columns
final_music_df = final_music_df.rename(columns={"track_name": "Track Name", "artist(s)_name": "Artist(s) Name",
                                                "artist_count": "Artist Count",
                                                "released_year": "Released Year", "released_month": "Released Month", 
                                                "in_spotify_playlists": "In Spotify Playlists",
                                                "streams": "Spotify Streams",
                                                "in_apple_playlists": "In Apple Playlists", 
                                                "in_deezer_playlists": "In Deezer Playlists",
                                                "bpm": "BPM", "key": "Key"})

# Now remove all empty cells - NOTE: most empty cells are in the "Deezer Genre" column
cleaned_final_music_df = final_music_df.dropna()

print(len(cleaned_final_music_df))
cleaned_final_music_df.head(3)

695


Unnamed: 0,Track Name,Artist(s) Name,Artist Count,Released Year,Released Month,In Spotify Playlists,Spotify Streams,In Apple Playlists,In Deezer Playlists,BPM,Key,Deezer Genre
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,553,141381703,43,45,125,B,Asian Music
1,LALA,Myke Towers,1,2023,3,1474,133716286,48,58,92,C#,Rap/Hip Hop
2,vampire,Olivia Rodrigo,1,2023,6,1397,140003974,94,91,138,F,Pop


In [6]:
# The Track Name ""Love Grows (Where My Rosemary Goes)"" has an error in the 
# "Spotify Streams" Column so will remove it using drop by index.

# drop by index
cleaned_final_music_df = cleaned_final_music_df.drop([478])
len(cleaned_final_music_df)

694

In [7]:
print(cleaned_final_music_df[["Spotify Streams", "In Deezer Playlists"]].dtypes)

print("-------------------------------------------")

# Convert the values in the "Spotify Streams" AND "In Deezer Playlists" Columns to integer
# First convert the Spotify Streams Column
cleaned_final_music_df["Spotify Streams"] = cleaned_final_music_df["Spotify Streams"].astype("int64")

# For the "Deezer Playlist" column we have to remove the "," then convert the string to an interger.
cleaned_final_music_df["In Deezer Playlists"] = cleaned_final_music_df["In Deezer Playlists"].str.replace(",", "")
cleaned_final_music_df["In Deezer Playlists"] = cleaned_final_music_df["In Deezer Playlists"].astype(int) 

print(cleaned_final_music_df[["Spotify Streams", "In Deezer Playlists"]].dtypes)

Spotify Streams        object
In Deezer Playlists    object
dtype: object
-------------------------------------------
Spotify Streams        int64
In Deezer Playlists    int32
dtype: object


In [8]:
# export to csv:
cleaned_final_music_df.to_csv("../Output_Data/cleaned_song_data.csv", encoding = "Latin-1", index=False, header=True)

<h1><center>2023 Top Songs Released in 2023 - 2022 - and Prior</center></h1>

In [9]:
# Define a function that will highlight the background of a specified column.
def highlighting(inp_var, color='lightblue'):
    return [f'background-color: {color}' for i in inp_var]

## Top Songs Released in 2023: ____________________________________________________________________________

In [10]:
# Songs that were released in 2023:
artists_23_df = cleaned_final_music_df[cleaned_final_music_df["Artist Count"] == 1]
songs_23_df = artists_23_df[artists_23_df["Released Year"] == 2023] 
#                         and cleaned_final_music_df[cleaned_final_music_df["Artist Count"] == 1]
#--------------------------------------------------------------------------------------------------------------------------
spotify_top_songs_all = []

### `Top Songs in 2023 Spotify`

In [11]:
# Top 3 songs in '23 based on Spotify streams

# Sort the "streams" values in DESCENDING order.
top_23_streams_df = songs_23_df.sort_values(by="Spotify Streams", ascending=False) 

# Format the Streams Column for better readability.
top_23_streams_df["Spotify Streams"] = top_23_streams_df["Spotify Streams"].map("{:,}".format)

# Display the Specified Columns and Apply the highlight format:
top_23_streams_df = top_23_streams_df[["Track Name", "Artist(s) Name", "Released Year", \
                  "Released Month", "Spotify Streams", "BPM", "Key", "Deezer Genre"]].\
                  head(3).style.apply(highlighting, axis=0, subset=["Spotify Streams"])
top_23_streams_df

Unnamed: 0,Track Name,Artist(s) Name,Released Year,Released Month,Spotify Streams,BPM,Key,Deezer Genre
14,Cupid - Twin Ver.,Fifty Fifty,2023,2,496795686,120,B,Asian Music
115,OMG,NewJeans,2023,1,430977451,127,A,Asian Music
24,Last Night,Morgan Wallen,2023,1,429829812,204,F#,Country


In [12]:
# Top songs in '23 based on Spotify playlists

# sort the "in_spotify_playlists" in DESCENDING order.
top_23_Spotify_playlists = songs_23_df.sort_values(by="In Spotify Playlists", ascending=False) 

# Format the Column for better readability.
top_23_Spotify_playlists["In Spotify Playlists"] = top_23_Spotify_playlists["In Spotify Playlists"]\
                                                                                    .map("{:,}".format)

top_23_Spotify_playlists[["Track Name", "Artist(s) Name", "Released Year", \
                  "Released Month", "In Spotify Playlists", "BPM", "Key", "Deezer Genre"]].head(3)\
                  .style.apply(highlighting, axis=0, subset=["In Spotify Playlists"])


Unnamed: 0,Track Name,Artist(s) Name,Released Year,Released Month,In Spotify Playlists,BPM,Key,Deezer Genre
12,Daylight,David Kushner,2023,4,3528,130,D,Pop
4,WHERE SHE GOES,Bad Bunny,2023,5,3133,144,A,Latin Music
25,Dance The Night (From Barbie The Album),Dua Lipa,2023,5,2988,110,B,Pop


### `Top Songs in 2023 Apple`

In [13]:
# Top songs in 23 based on Apple playlists

# sort the "in_apple_playlists" in DESCENDING order.
top_23_Apple_playlists = songs_23_df.sort_values(by="In Apple Playlists", ascending=False) 

top_23_Apple_playlists[["Track Name", "Artist(s) Name", "Released Year", \
                      "Released Month", "In Apple Playlists", "BPM", "Key", "Deezer Genre"]].head(3)\
                      .style.apply(highlighting, axis=0, subset=["In Apple Playlists"])

Unnamed: 0,Track Name,Artist(s) Name,Released Year,Released Month,In Apple Playlists,BPM,Key,Deezer Genre
183,Eyes Closed,Ed Sheeran,2023,3,116,107,D,Pop
332,LLYLM,ROSALï¿½,2023,1,105,170,F#,Pop
2,vampire,Olivia Rodrigo,2023,6,94,138,F,Pop


### `Top Songs in 2023 Deezer`

In [14]:
# Top songs in 23 based on Deezer playlists

# sort the "in_deezer_playlists" in DESCING order.
top_23_Deezer_playlists = songs_23_df.sort_values(by="In Deezer Playlists", ascending=False)

top_23_Deezer_playlists[["Track Name", "Artist(s) Name", "Released Year", \
                      "Released Month", "In Deezer Playlists", "BPM", "Key", "Deezer Genre"]].head(3)\
                      .style.apply(highlighting, axis=0, subset=["In Deezer Playlists"])

Unnamed: 0,Track Name,Artist(s) Name,Released Year,Released Month,In Deezer Playlists,BPM,Key,Deezer Genre
12,Daylight,David Kushner,2023,4,182,130,D,Pop
102,Tattoo,Loreen,2023,2,145,150,D#,Pop
25,Dance The Night (From Barbie The Album),Dua Lipa,2023,5,143,110,B,Pop


## Top Songs Released in 2022: ____________________________________________________________________________

In [15]:
# Songs that were released in 2022:
artists_22_df = cleaned_final_music_df[cleaned_final_music_df["Artist Count"] == 1]
songs_22_df = artists_22_df[artists_22_df["Released Year"] == 2022]

### `Top Songs in 2022 Spotify`

In [16]:
# Top songs in '22

# Sort the "streams" values in DESCENDING order.
top_22_streams_df = songs_22_df.sort_values(by="Spotify Streams", ascending=False) 

# Format the Streams Column for better readability.
top_22_streams_df["Spotify Streams"] = top_22_streams_df["Spotify Streams"].map("{:,}".format)

# Display the Specified Columns and Apply the highlight format:
top_22_streams_df[["Track Name", "Artist(s) Name", "Released Year", \
                  "Released Month", "Spotify Streams", "BPM", "Key", "Deezer Genre"]].\
                  head(3).style.apply(highlighting, axis=0, subset=["Spotify Streams"])

Unnamed: 0,Track Name,Artist(s) Name,Released Year,Released Month,Spotify Streams,BPM,Key,Deezer Genre
161,Titi Me Preguntï¿,Bad Bunny,2022,5,1264310836,107,F,Latin Music
48,La Bachata,Manuel Turizo,2022,5,1214083358,125,G,Rap/Hip Hop
13,Kill Bill,SZA,2022,12,1163093654,89,G#,R&B


In [17]:
# Top songs in '22 based on Spotify playlists

# sort the "in_spotify_playlists" in DESCENDING order.
top_22_Spotify_playlists = songs_22_df.sort_values(by="In Spotify Playlists", ascending=False)

# Format the Column for better readability.
top_22_Spotify_playlists["In Spotify Playlists"] = top_22_Spotify_playlists["In Spotify Playlists"]\
                                                                                         .map("{:,}".format)

top_22_Spotify_playlists[["Track Name", "Artist(s) Name", "Released Year", \
                          "Released Month", "In Spotify Playlists", "BPM", "Key", "Deezer Genre"]].\
                          head(3).style.apply(highlighting, axis=0, subset=["In Spotify Playlists"])

Unnamed: 0,Track Name,Artist(s) Name,Released Year,Released Month,In Spotify Playlists,BPM,Key,Deezer Genre
737,BREAK MY SOUL,Beyoncï¿,2022,6,9724,115,C#,Pop
29,Anti-Hero,Taylor Swift,2022,10,9082,97,E,Pop
161,Titi Me Preguntï¿,Bad Bunny,2022,5,9037,107,F,Latin Music


### `Top Songs in 2022 Apple`

In [18]:
# Top songs in '22 based on Apple playlists

# sort the "in_apple_playlists" in DESCENDING order.
top_22_Apple_playlists = songs_22_df.sort_values(by="In Apple Playlists", ascending=False) 

top_22_Apple_playlists[["Track Name", "Artist(s) Name", "Released Year", \
                        "Released Month", "In Apple Playlists", "BPM", "Key", "Deezer Genre"]].\
                        head(3).style.apply(highlighting, axis=0, subset=["In Apple Playlists"])

Unnamed: 0,Track Name,Artist(s) Name,Released Year,Released Month,In Apple Playlists,BPM,Key,Deezer Genre
29,Anti-Hero,Taylor Swift,2022,10,242,97,E,Pop
647,About Damn Time,Lizzo,2022,4,242,109,A#,Pop
737,BREAK MY SOUL,Beyoncï¿,2022,6,222,115,C#,Pop


### `Top Songs in 2022 Deezer`

In [19]:
# Top songs in '22 based on Deezer playlists

# sort the "in_deezer_playlists" in DESCENDING order.
top_22_Deezer_playlists = songs_22_df.sort_values(by="In Deezer Playlists", ascending=False) 

top_22_Deezer_playlists[["Track Name", "Artist(s) Name", "Released Year", \
                        "Released Month", "In Deezer Playlists", "BPM", "Key", "Deezer Genre"]].\
                        head(3).style.apply(highlighting, axis=0, subset=["In Deezer Playlists"])

Unnamed: 0,Track Name,Artist(s) Name,Released Year,Released Month,In Deezer Playlists,BPM,Key,Deezer Genre
243,DESPECHï¿,ROSALï¿½,2022,7,422,130,G,Pop
199,CUFF IT,Beyoncï¿,2022,7,330,115,G,Pop
785,THE LONELIEST,Mï¿½ï¿½ne,2022,10,328,130,D,Rock


## Top Songs Released Prior 2022: ____________________________________________________________________________

In [20]:
# all songs before 2022:
artists_prior_df = cleaned_final_music_df[cleaned_final_music_df["Artist Count"] == 1]
music_prior_df = artists_prior_df[artists_prior_df["Released Year"] < 2022]

### `Top Songs Prior 2022 Spotify`

In [21]:
# Sort the "streams" values in DESCENDING order.
top_prior_streams = music_prior_df.sort_values(by="Spotify Streams", ascending=False) 

# Format the Column for better readability.
top_prior_streams["Spotify Streams"] = top_prior_streams["Spotify Streams"].map("{:,}".format)
                                                                                                
# Display the Specified Columns and Apply the highlight format:
top_prior_streams[["Track Name", "Artist(s) Name", "Released Year", \
                "Released Month", "Spotify Streams", "BPM", "Key", "Deezer Genre"]].\
                head(3).style.apply(highlighting, axis=0, subset=["Spotify Streams"])

Unnamed: 0,Track Name,Artist(s) Name,Released Year,Released Month,Spotify Streams,BPM,Key,Deezer Genre
151,Shape of You,Ed Sheeran,2017,1,3562543890,96,C#,Pop
122,Believer,Imagine Dragons,2017,1,2594040133,125,A#,Alternative
120,Perfect,Ed Sheeran,2017,1,2559529074,95,G#,Pop


In [22]:
# Top songs PRIOR '22 based on Spotify playlists

# sort the "in_spotify_playlists" in DESCENDING order.
top_prior_Spotify_playlists = music_prior_df.sort_values(by="In Spotify Playlists", ascending=False) 

# Format the Column for better readability.
top_prior_Spotify_playlists["In Spotify Playlists"] = top_prior_Spotify_playlists\
                                                        ["In Spotify Playlists"].map("{:,}".format)

top_prior_Spotify_playlists[["Track Name", "Artist(s) Name", "Released Year", \
                            "Released Month", "In Spotify Playlists", "BPM", "Key", "Deezer Genre"]].\
                            head(3).style.apply(highlighting, axis=0, subset=["In Spotify Playlists"])

Unnamed: 0,Track Name,Artist(s) Name,Released Year,Released Month,In Spotify Playlists,BPM,Key,Deezer Genre
525,Mr. Brightside,The Killers,2003,9,51979,148,C#,Rock
606,Wake Me Up - Radio Edit,Avicii,2013,1,50887,124,D,Electro
520,Smells Like Teen Spirit - Remastered 2021,Nirvana,1991,9,49991,117,C#,Rock


### `Top Songs Prior 2022 Apple`

In [23]:
# Top songs PRIOR '22 based on Apple playlists

# sort the "in_apple_playlists" in DESCENDING order.
top_prior_Apple_playlists = music_prior_df.sort_values(by="In Apple Playlists", ascending=False) 

top_prior_Apple_playlists[["Track Name", "Artist(s) Name", "Released Year", \
                           "Released Month", "In Apple Playlists", "BPM", "Key", "Deezer Genre"]].\
                           head(3).style.apply(highlighting, axis=0, subset=["In Apple Playlists"])

Unnamed: 0,Track Name,Artist(s) Name,Released Year,Released Month,In Apple Playlists,BPM,Key,Deezer Genre
352,Don't Start Now,Dua Lipa,2019,10,532,124,B,Pop
576,Thinking Out Loud,Ed Sheeran,2014,1,363,79,D,Pop
149,Shake It Off,Taylor Swift,2014,1,328,160,G,Singer & Songwriter


### `Top Songs Prior 2022 Deezer`

In [24]:
# Top songs PRIOR '22 based on Deezer playlists

# sort the "in_deezer_playlists" in DESCENDING order.
top_prior_Deezer_playlists = music_prior_df.sort_values(by="In Deezer Playlists", ascending=False) 

# Format the Column for better readability.
top_prior_Deezer_playlists["In Deezer Playlists"] = top_prior_Deezer_playlists\
                                                        ["In Deezer Playlists"].map("{:,}".format)

top_prior_Deezer_playlists[["Track Name", "Artist(s) Name", "Released Year", \
                            "Released Month", "In Deezer Playlists", "BPM", "Key", "Deezer Genre"]].\
                            head(3).style.apply(highlighting, axis=0, subset=["In Deezer Playlists"])

Unnamed: 0,Track Name,Artist(s) Name,Released Year,Released Month,In Deezer Playlists,BPM,Key,Deezer Genre
520,Smells Like Teen Spirit - Remastered 2021,Nirvana,1991,9,12367,117,C#,Rock
777,The Scientist,Coldplay,2002,8,7827,146,F,Rock
288,Numb,Linkin Park,2003,3,7341,110,A,Dance


<h1><center>---------- TOP 3 SONGS IN ALL YEARS ---------- </center></h1>

In [25]:
# --------------------------------------------------------------------------------------------------------
# Sort all values for the columns of interest in DESCENDING order
# SPOTIFY ------------------------------------------------------------------------------------------------
top_streams_df = cleaned_final_music_df.sort_values(by="Spotify Streams", ascending=False) 
top_Spotify_playlists = cleaned_final_music_df.sort_values(by="In Spotify Playlists", ascending=False) 

# APPLE ---------------------------------------------------------------------------------------------------
top_Apple_playlists = cleaned_final_music_df.sort_values(by="In Apple Playlists", ascending=False)

# DEEZER --------------------------------------------------------------------------------------------------
top_Deezer_playlists = cleaned_final_music_df.sort_values(by="In Deezer Playlists", ascending=False) 
# --------------------------------------------------------------------------------------------------------

## Group by Artist

In [26]:
# # songs_23_df.groupby("Artist(s) Name")
# for artist in songs_23_df["Artist(s) Name"]:
#     if songs_23_df["Artist Count"]
artists_23 = songs_23_df["Artist(s) Name"].value_counts()
print(len(artists_23))
# artists_23.head(9)   # 51 artists >= 2

51


In [27]:
artists_22 = songs_22_df["Artist(s) Name"].value_counts()
print(len(artists_22))
# artists_22.head(25)   # 110 artists >= 2

110


In [28]:
artists_prior = music_prior_df["Artist(s) Name"].value_counts()
print(len(artists_prior))
# artists_prior.head(30)      # 132 artists >= 2

132
