## Starter Code to Import Libraries and Load the Songs Data

In [1]:
import pandas as pd
import requests
import json

from api_keys import rapidAPI_key

In [2]:
music_df = pd.read_csv("../Resources/spotify-2023.csv", encoding='latin-1')
print(len(music_df))
music_df.head(3)

953


Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6


In [3]:
# Cleaning the initial data
music_df = music_df.dropna(how='any')
print(len(music_df))
music_df.head(3)

817


Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6


### Adding Deezer Album ID, So We Get the Genres

In [4]:
# rapid api deezer
# Set up your rapidAPI key and URL for deezer
headers = {
    "X-RapidAPI-Key": rapidAPI_key,
    "X-RapidAPI-Host": "deezerdevs-deezer.p.rapidapi.com"
}     

url = "https://deezerdevs-deezer.p.rapidapi.com/search"

# Create an empty list to store the deezer album IDs:
d_album_id= []

# Loop through the DataFrame
for index, row in music_df.iterrows():
    title = row['track_name'] 

    # Set up the parameters for the API call
    querystring = {"q":title}
    
    #print("Processing track:",title)
    
    # Make the API request
    response = requests.get(url, headers=headers, params=querystring) 
    
    # Check if the request was successful
    if response.status_code == 200:
        data_response = response.json()
        pretty_data = json.dumps(data_response, indent=2)
        #print(data_response.keys())
        #print(pretty_data)
        
        #check if data is a list with at least one item
        if "data" in data_response and len(data_response["data"]) > 0:
            # Extract the album id from the API response
            album_id = int(data_response["data"][0]["album"]["id"])
            d_album_id.append(album_id)
        else:
            d_album_id.append(0)
    else:
            d_album_id.append(0)


In [5]:
# Add the Deezer Album ID, 'd_album_id' column to the DataFrame
music_df['Deezer Album ID'] = d_album_id

# Print the DataFrame to see the results
music_df.tail(3)  # To check if we got all the Album ID's 

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%,Deezer Album ID
950,A Veces (feat. Feid),"Feid, Paulo Londra",2,2022,11,3,573,0,73513683,2,...,C#,Major,80,81,67,4,0,8,6,382808197
951,En La De Ella,"Feid, Sech, Jhayco",3,2022,10,20,1320,0,133895612,29,...,C#,Major,82,67,77,8,0,12,5,367159307
952,Alone,Burna Boy,1,2022,11,4,782,2,96007391,27,...,E,Minor,61,32,67,15,0,11,5,373539777


In [6]:
# export to csv:
music_df.to_csv("../Resources/album_id.csv", encoding = "Latin-1", index=False, header=True)

In [7]:
# read in saved data
music_df = pd.read_csv("../Resources/album_id.csv", encoding = "Latin-1")
print(len(music_df))
music_df.head(3)

817


Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%,Deezer Album ID
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,B,Major,80,89,83,31,0,8,4,463574485
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,C#,Major,71,61,74,7,0,10,4,410083687
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,F,Major,51,32,53,17,0,31,6,484372295


### Extracting The Genres

In [8]:
# rapid api deezer to find genre based on album id
# Set up your rapidAPI key and URL for deezer
headers = {
    "X-RapidAPI-Key": rapidAPI_key,
    "X-RapidAPI-Host": "deezerdevs-deezer.p.rapidapi.com"
}

# Create an empty list to store the deezer album IDs:
d_genre= []

# Loop through the DataFrame
for index, row in music_df.iterrows():
    deezer_id = row['Deezer Album ID']

    url = "https://deezerdevs-deezer.p.rapidapi.com/album/"+ str(deezer_id)

    #print("Processing album:", deezer_id)
    
    # Make the API request
    response = requests.get(url, headers=headers) 
    
    # Check if the request was successful
    if response.status_code == 200:
        id_response = response.json()
        pretty_data = json.dumps(id_response, indent=2)
 
        #check if data is a list with at least one item
        try:
            # Extract the album id from the API response
            genre_find = id_response["genres"]["data"][0]["name"]
            d_genre.append(genre_find)
            #print(genre_find)
        except:
            #print(track_name, "not found")
            d_genre.append(None)
    else:
            #print("Error processing album", deezer_id)
            d_genre.append(None)

In [9]:
# Add the "Deezer Genre" column to the DataFrame
music_df['Deezer Genre'] = d_genre

# Print the DataFrame to see the results
print(len(music_df))
music_df.tail(3) # To check that we got all the Genres 

817


Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%,Deezer Album ID,Deezer Genre
814,A Veces (feat. Feid),"Feid, Paulo Londra",2,2022,11,3,573,0,73513683,2,...,Major,80,81,67,4,0,8,6,382808197,Pop
815,En La De Ella,"Feid, Sech, Jhayco",3,2022,10,20,1320,0,133895612,29,...,Major,82,67,77,8,0,12,5,367159307,Rap/Hip Hop
816,Alone,Burna Boy,1,2022,11,4,782,2,96007391,27,...,Minor,61,32,67,15,0,11,5,373539777,Films/Games


In [10]:
# export to csv:
music_df.to_csv("../Resources/all_data_genre.csv", encoding = "Latin-1", index=False, header=True)

In [11]:
# read the saved data:
final_music_df = pd.read_csv("../Resources/all_data_genre.csv", encoding = "Latin-1")
final_music_df.head()

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%,Deezer Album ID,Deezer Genre
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,Major,80,89,83,31,0,8,4,463574485,Asian Music
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,Major,71,61,74,7,0,10,4,410083687,Rap/Hip Hop
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,Major,51,32,53,17,0,31,6,484372295,Pop
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,Major,55,58,72,11,0,11,15,108447472,Pop
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,Minor,65,23,80,14,63,11,6,442984025,Latin Music
