In [1]:
# install required library
%pip install pandas numpy spotipy lxml
import pandas as pd
import numpy as np

Note: you may need to restart the kernel to use updated packages.


In [2]:
df = pd.read_csv('cleaned_data.csv')
df.head()

Unnamed: 0,Ekspresi,Gendre,Artist,Lagu,Tahun
0,Bersemangat,Pop,Tv girl,lover rock,2014
1,Bersemangat,Rap,Eminem,Godzilla,2020
2,Sedih,Pop,JKT48,Aitikata,2018
3,Bersemangat,Rock,the sigit,all the time,2006
4,Netral,Indie,Idfitaf,Takut,2021


In [3]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from dotenv import load_dotenv
from pathlib import Path
import os

load_dotenv(Path('../.env'))


# auth purpose
os.environ["SPOTIPY_CLIENT_ID"] = os.getenv("CLIENT_ID")
os.environ["SPOTIPY_CLIENT_SECRET"] = os.getenv("CLIENT_SECRET")
os.environ["SPOTIPY_REDIRECT_URI"] = os.getenv("REDIRECT_URI")


auth_manager = SpotifyOAuth(scope="playlist-modify-public", 
                            open_browser=False)

# follow the generated link, copy your access token,
# and put on your .env file
auth_manager.get_authorize_url()

'https://accounts.spotify.com/authorize?client_id=5dd698be60cb4dc7a4c8c37f6d76f9f8&response_type=code&redirect_uri=http%3A%2F%2Flocalhost%3A8080&scope=playlist-modify-public'

In [4]:
# auth purposes. make sure you fill your access token 
# on your .env file before run this block
load_dotenv(Path('../.env'))
access_token = os.getenv("ACCESS_TOKEN")
auth_manager.get_access_token(access_token, as_dict=False)

spotify = spotipy.Spotify(auth_manager=auth_manager)

user_dict = spotify.current_user()
print(user_dict)

{'display_name': 'fahmi', 'external_urls': {'spotify': 'https://open.spotify.com/user/nj235rfuc9chpq9fpyl3fteig'}, 'href': 'https://api.spotify.com/v1/users/nj235rfuc9chpq9fpyl3fteig', 'id': 'nj235rfuc9chpq9fpyl3fteig', 'images': [], 'type': 'user', 'uri': 'spotify:user:nj235rfuc9chpq9fpyl3fteig', 'followers': {'href': None, 'total': 0}}


In [5]:
# search for all song in dataset and append the link
list_df = df.to_dict(orient='records')

for item in list_df:
    try:
        result = spotify.search(
            f"""track:{item['Lagu']} artist:{item['Artist']}""", type="track", limit=1
        )
        if len(result["tracks"]["items"]) > 0:
            item["spotify_uri"] = result["tracks"]["items"][0]["uri"]
        else:
            item["spotify_uri"] = np.nan
    except spotipy.client.SpotifyException as e:
        item["spotify_uri"] = str(e.http_status) + " - " + e.msg

df = pd.DataFrame(list_df)

# preview data
df.head()

Unnamed: 0,Ekspresi,Gendre,Artist,Lagu,Tahun,spotify_uri
0,Bersemangat,Pop,Tv girl,lover rock,2014,spotify:track:6dBUzqjtbnIa1TwYbyw5CM
1,Bersemangat,Rap,Eminem,Godzilla,2020,spotify:track:7FIWs0pqAYbP91WWM0vlTQ
2,Sedih,Pop,JKT48,Aitikata,2018,
3,Bersemangat,Rock,the sigit,all the time,2006,spotify:track:60zomg1pGnaSrapCzO6aPG
4,Netral,Indie,Idfitaf,Takut,2021,


In [6]:
# clear null data
df_clean = df.dropna()
df_clean = df_clean[~df_clean['spotify_uri'].str.contains("Not found")]

df_clean.head()

# save the result into csv format
df_clean.to_csv('music_link.csv', index=False)