# Spotify Scraping Project

## Setup Code

In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import requests
import pandas as pd
import numpy as np
import time 
import re

In [3]:
client_id = ''
client_secret = ''

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [5]:
input_data = pd.read_csv("273_kArtists.csv")

## Retrieving IDs for 'album', single', 'appears_on', 'compilation'

### Retrieving Album Names and Album IDs from Artists

In [7]:
all_artist_names = []
all_artist_ids = []
all_album_names = []
all_album_uris = []

# Retrieve Album ID using the Album Names
for index in input_data.index:
    artist_name = input_data["Artist"][index]
    artist_id = input_data["Spotify_Id"][index]

    try:
        spotify_artist_id = "spotify:artist:" + str(artist_id)
        results = sp.artist_albums(spotify_artist_id, album_type='album', country='US')
        albums = results['items']

        while results['next']:
            results = sp.next(results)
            albums.extend(results['items'])

        # unique_album_names = set()
        for album in albums:
            all_artist_names.append(artist_name)
            all_artist_ids.append(artist_id)
            all_album_names.append(album['name'])
            all_album_uris.append(album['uri'])

    except:
        print("error with finding " + artist_name)


In [8]:
# Adding resulting artist name, artist id, album name, album id to a new dataframe
temp_data = {'Artist': all_artist_names,
            'Artist_Id': all_artist_ids,
            'Album_Name': all_album_names,
            'Album_Id': all_album_uris}

artist_id_album_id = pd.DataFrame(temp_data)

In [9]:
artist_id_album_id.head(10)

Unnamed: 0,Artist,Artist_Id,Album_Name,Album_Id
0,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf
1,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,99 LIVE IN SEOUL (Live),spotify:album:64IbiOh32NqTuHxy5wsiBG
2,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Outside Castle -The 5th Album,spotify:album:0qqE8lGVSRDqePeWpKy1Fg
3,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Age Of Peace (Original Motion Picture Soundtrack),spotify:album:73PXsDhLv6RV01AyVN8Ymx
4,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,I Yah! - The 4th Album,spotify:album:2hTRBrQt64fYVfZl9viZh3
5,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Resurrection - The 3rd Album,spotify:album:0yJ02boB03UsjObzoXJ5O4
6,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Wolf And Sheep,spotify:album:2RWrsv0kIKreXvm7IrMeYN
7,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,We Hate All Kinds of Violence... - The 1st Album,spotify:album:23Xh3YpgzRuJ2DAW5X7MsP
8,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ANOTHER LIGHT,spotify:album:5f2LzIFxljQF4FH7w3rXcQ
9,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,THE 20TH ANNIVERSARY,spotify:album:7AKNe7vxI0Z6CCyy7zaZ26


In [10]:
artist_id_album_id.to_csv("artist_id_album_id.csv")

### Retrieving Single and Single IDs from Artists

In [11]:
all_artist_names = []
all_artist_ids = []
all_single_names = []
all_single_uris = []

# Retrieve Single "Album" ID using the Album Names

for index in input_data.index:
    artist_name = input_data["Artist"][index]
    artist_id = input_data["Spotify_Id"][index]

    try:
        spotify_artist_id = "spotify:artist:" + str(artist_id)
        results = sp.artist_albums(spotify_artist_id, album_type='single', country='US')
        singles = results['items']

        while results['next']:
            results = sp.next(results)
            singles.extend(results['items'])

        for single in singles:
            all_artist_names.append(artist_name)
            all_artist_ids.append(artist_id)
            all_single_names.append(single['name'])
            all_single_uris.append(single['uri'])

    except:
        print("error with finding " + artist_name)

In [12]:
temp_data = {'Artist': all_artist_names,
            'Artist_Id': all_artist_ids,
            'Single_Album_Name': all_single_names,
            'Single_Album_Id': all_single_uris}

artist_id_single_album_id = pd.DataFrame(temp_data)

In [13]:
artist_id_single_album_id.head(10)

Unnamed: 0,Artist,Artist_Id,Single_Album_Name,Single_Album_Id
0,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,DON'T LOOK BACK,spotify:album:63fxv7o113Zji3oLeXgswm
1,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,spotify:album:7vqIRL00YfTtuO0sSAK3Uv
2,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,THREE WORDS,spotify:album:6pN2wvuzGHURmVNXbpRaei
3,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Love (story) - SM STATION,spotify:album:3j2XirA7CqIqkwuQfO7wkR
4,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Remixed,spotify:album:4jBlaJL3oauZCX9VeseBJC
5,Fin.K.L,2aRLyjYp7WPr4EkjkI1gvS,"FIN.K.L SINGLE ""Like the song remains""",spotify:album:1Qwyg4hhC6gv8x42kq9Qru
6,Fin.K.L,2aRLyjYp7WPr4EkjkI1gvS,Fin.K.L Digital Album,spotify:album:08u61FSaRTXKe6CD7RePvp
7,Shinhwa,0jVvkFPa6YbFXQ3Qmhita0,SHINHWA TWENTY SPECIAL ALBUM 'HEART',spotify:album:1F4ew4S5UvxCTg28U2J1cR
8,Shinhwa,0jVvkFPa6YbFXQ3Qmhita0,SHINHWA TWENTY GIFT SINGLE ‘All Your Dreams’,spotify:album:2txBzMBsQDnw0kT6rFpQRB
9,Shinhwa,0jVvkFPa6YbFXQ3Qmhita0,UNCHANGING PT. 1,spotify:album:5TrZNDuG4byRTlcMsX6OaS


In [14]:
artist_id_single_album_id.to_csv("artist_id_single_album_id.csv")

### Retrieving 'Appears On' and 'Appears On' ID from Artist ID

In [15]:
all_artist_names = []
all_artist_ids = []
all_appears_on_names = []
all_appears_on_uris = []

# Retrieve Album ID using the Album Names

for index in input_data.index:
    artist_name = input_data["Artist"][index]
    artist_id = input_data["Spotify_Id"][index]

    try:
        spotify_artist_id = "spotify:artist:" + str(artist_id)
        results = sp.artist_albums(spotify_artist_id, album_type='appears_on', country='US')
        appears_on = results['items']

        while results['next']:
            results = sp.next(results)
            appears_on.extend(results['items'])

        for appearance in appears_on:
            all_artist_names.append(artist_name)
            all_artist_ids.append(artist_id)
            all_appears_on_names.append(appearance['name'])
            all_appears_on_uris.append(appearance['uri'])
    
    except:
        print("error with finding " + artist_name)

In [16]:
temp_data = {'Artist': all_artist_names,
            'Artist_Id': all_artist_ids,
            'Appears_On_Name': all_appears_on_names,
            'Appears_On_Id': all_appears_on_uris}

artist_id_appears_on_id = pd.DataFrame(temp_data)

In [17]:
artist_id_appears_on_id.head(10)

Unnamed: 0,Artist,Artist_Id,Appears_On_Name,Appears_On_Id
0,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Winter Vacation in SMTOWN.com,spotify:album:5aBuzpJ2UNoPwQhee47UQk
1,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Christmas in SMTOWN.com,spotify:album:7ccOMxO0n9DjSfLBHhWMDT
2,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,90's trendy K-pop music Best 20 Vol. 15,spotify:album:2lW9DhcA1StfL54mw5pgC2
3,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Superhero (My Knight) Remix EP,spotify:album:5v6Jrf7O7k99sPkMTU2Noo
4,S.E.S.,61HUG80Xma4rnXsqfZkzeM,2002 SUMMER VACATION in SMTOWN.COM,spotify:album:59KnIxsURaQ1wabZZpyf67
5,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Winter Vacation in SMTOWN.com - Angel Eyes,spotify:album:3nHNulZ1pybkNcM5NeErQA
6,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Winter Vacation in SMTOWN.com,spotify:album:5aBuzpJ2UNoPwQhee47UQk
7,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Christmas in SMTOWN.com,spotify:album:7ccOMxO0n9DjSfLBHhWMDT
8,Fin.K.L,2aRLyjYp7WPr4EkjkI1gvS,韓劇情緣,spotify:album:2SgMnH1w8G47RWCAt18wwY
9,Fin.K.L,2aRLyjYp7WPr4EkjkI1gvS,All About Eve (MBC DRAMA) OST,spotify:album:7LDnsb1ATqmSM3NYqefvfL


In [18]:
artist_id_appears_on_id.to_csv("artist_id_appears_on_id.csv")

### Retrieving Compilation and Compilation ID from Artist ID

In [19]:
all_artist_names = []
all_artist_ids = []
all_compilation_names = []
all_compilation_uris = []

# Retrieve Album ID using the Album Names

for index in input_data.index:
    artist_name = input_data["Artist"][index]
    artist_id = input_data["Spotify_Id"][index]

    try:
        spotify_artist_id = "spotify:artist:" + str(artist_id)
        results = sp.artist_albums(spotify_artist_id, album_type='compilation', country='US')
        compilations = results['items']

        while results['next']:
            results = sp.next(results)
            compilations.extend(results['items'])

        for compiled in compilations:
            all_artist_names.append(artist_name)
            all_artist_ids.append(artist_id)
            all_compilation_names.append(compiled['name'])
            all_compilation_uris.append(compiled['uri'])

    except:
        print("error with finding " + artist_name)

In [20]:
temp_data = {'Artist': all_artist_names,
            'Artist_Id': all_artist_ids,
            'Compilation_Name': all_compilation_names,
            'Compilation_Id': all_compilation_uris}

artist_id_compilation_id = pd.DataFrame(temp_data)

In [21]:
artist_id_compilation_id.head(10)

Unnamed: 0,Artist,Artist_Id,Compilation_Name,Compilation_Id
0,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO
1,BoA,4muJrGMndyYWqZtfk8OWy4,BEST OF SOUL,spotify:album:0gxS6h33HGI5jEAgcMsTWA
2,BIGBANG,4Kxlr1PRlDKEB0ekOCyHgX,THE BEST OF BIGBANG 2006-2014,spotify:album:1JwbdXLKJVffgSW95WcTUp
3,2PM,5iRPbkcPmqAFFwDUj6ywVS,THE BEST OF 2PM in Japan 2011-2016,spotify:album:5rEgHD2JSK9RRxyKtXsImD
4,BTS,3Nrfpe0tUJi4K4DXYWgMUX,"BTS, THE BEST",spotify:album:6bN241dPMC54Ac6zV1B8hl
5,Stray Kids,2dIgFjalVxs4ThymZ67YCE,SKZ2020,spotify:album:4hk05fFLNquJc8w5htXZDi
6,4Minute,6cdC1cwqh3eJAXaxXJt2jv,Best of 4Minute,spotify:album:502uBQOdyp6A3En53ZJFCo
7,G-Dragon,30b9WulBM8sFuBo17nNq9c,COUP D'ETAT [+ ONE OF A KIND & HEARTBREAKER],spotify:album:1mVPbaMZGsPh5gzhvO6BBX
8,TAEYANG,6udveWUgX4vu75FF0DTrXV,RISE [+ SOLAR & HOT],spotify:album:2Z80u1PO03Jk4eUccBUR90
9,BIBI,6UbmqUEgjLA6jAcXwbM1Z9,Gringas,spotify:album:37IFkjRyzSbBWubkkHD6RL


In [22]:
artist_id_compilation_id.to_csv("artist_id_compilation_id.csv")

## Locating Tracks and Attaching to Albums, Singles, Appears On, and Compilation

### Locating and Attaching for Albums

In [23]:
artist_album_data = pd.read_csv("artist_id_album_id.csv", index_col=[0])

In [24]:
artist_album_data.head(10)

Unnamed: 0,Artist,Artist_Id,Album_Name,Album_Id
0,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf
1,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,99 LIVE IN SEOUL (Live),spotify:album:64IbiOh32NqTuHxy5wsiBG
2,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Outside Castle -The 5th Album,spotify:album:0qqE8lGVSRDqePeWpKy1Fg
3,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Age Of Peace (Original Motion Picture Soundtrack),spotify:album:73PXsDhLv6RV01AyVN8Ymx
4,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,I Yah! - The 4th Album,spotify:album:2hTRBrQt64fYVfZl9viZh3
5,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Resurrection - The 3rd Album,spotify:album:0yJ02boB03UsjObzoXJ5O4
6,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Wolf And Sheep,spotify:album:2RWrsv0kIKreXvm7IrMeYN
7,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,We Hate All Kinds of Violence... - The 1st Album,spotify:album:23Xh3YpgzRuJ2DAW5X7MsP
8,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ANOTHER LIGHT,spotify:album:5f2LzIFxljQF4FH7w3rXcQ
9,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,THE 20TH ANNIVERSARY,spotify:album:7AKNe7vxI0Z6CCyy7zaZ26


In [25]:
list_of_track_df = []

for ind in artist_album_data.index:
    album_id = artist_album_data["Album_Id"][ind]
    tracks = sp.album_tracks(album_id = album_id)
    
    try:
        for track in tracks['items']:
            # Retrieve each track's Spotify qualities
            track_name = track['name']
            track_uri = track['uri']
            track_features = sp.audio_features(track_uri)
            track_features_df = pd.DataFrame(track_features)
            track_features_df['Track_Title'] = track_name

            # Combine track and artist information with the features dataframe
            temp_df = pd.concat([artist_album_data.loc[ind:ind], 
                                track_features_df.set_index(artist_album_data.loc[ind:ind].index)], axis = 1)
            list_of_track_df.append(temp_df)

    except:
        print("Ran into an error - rerun please!")
        print(album_id)

In [26]:
temp_df = pd.concat(list_of_track_df, ignore_index=True)

In [27]:
temp_df.head(10)

Unnamed: 0,Artist,Artist_Id,Album_Name,Album_Id,danceability,energy,key,loudness,mode,speechiness,...,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,Track_Title,0
0,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.209,0.963,6.0,-12.9,0.0,0.133,...,131.864,audio_features,6KOn7RweqUBSZWoKKkHFRb,spotify:track:6KOn7RweqUBSZWoKKkHFRb,https://api.spotify.com/v1/tracks/6KOn7RweqUBS...,https://api.spotify.com/v1/audio-analysis/6KOn...,90333.0,4.0,Opening - Live,
1,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.397,0.901,4.0,-6.97,1.0,0.285,...,171.944,audio_features,1khANf67sdS8kUkjmF218J,spotify:track:1khANf67sdS8kUkjmF218J,https://api.spotify.com/v1/tracks/1khANf67sdS8...,https://api.spotify.com/v1/audio-analysis/1khA...,278333.0,4.0,I yah! - Live,
2,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.542,0.913,1.0,-7.763,1.0,0.241,...,97.051,audio_features,4XkT2bHdbHMPukCpj7mXPJ,spotify:track:4XkT2bHdbHMPukCpj7mXPJ,https://api.spotify.com/v1/tracks/4XkT2bHdbHMP...,https://api.spotify.com/v1/audio-analysis/4XkT...,320533.0,4.0,Git It Up! / 전사의 후예 / You Got Gun - Live,
3,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.436,0.666,10.0,-14.143,0.0,0.632,...,107.013,audio_features,65a4TY00ChiawzHVH4xcDO,spotify:track:65a4TY00ChiawzHVH4xcDO,https://api.spotify.com/v1/tracks/65a4TY00Chia...,https://api.spotify.com/v1/audio-analysis/65a4...,284427.0,5.0,Opening Ment - Live,
4,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.366,0.778,7.0,-9.239,0.0,0.0714,...,99.995,audio_features,3vYZaL9lMZ1uKURbcEq4Jz,spotify:track:3vYZaL9lMZ1uKURbcEq4Jz,https://api.spotify.com/v1/tracks/3vYZaL9lMZ1u...,https://api.spotify.com/v1/audio-analysis/3vYZ...,278667.0,4.0,It's Been Raining Since You Left Me / Delight ...,
5,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.667,0.914,1.0,-8.073,1.0,0.0873,...,110.037,audio_features,7H9Y6xJsNQLaPLw3COdgo5,spotify:track:7H9Y6xJsNQLaPLw3COdgo5,https://api.spotify.com/v1/tracks/7H9Y6xJsNQLa...,https://api.spotify.com/v1/audio-analysis/7H9Y...,227067.0,4.0,N.B.K.(Natural Born Killer) - Live,
6,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.702,0.768,10.0,-8.454,0.0,0.177,...,96.979,audio_features,4xnlCh34wSnw2JbRRYWqBH,spotify:track:4xnlCh34wSnw2JbRRYWqBH,https://api.spotify.com/v1/tracks/4xnlCh34wSnw...,https://api.spotify.com/v1/audio-analysis/4xnl...,266173.0,4.0,Time Will Tell - Live,
7,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.464,0.918,1.0,-7.74,0.0,0.0738,...,177.882,audio_features,2lyftOm9r45ja4MC7nXWXc,spotify:track:2lyftOm9r45ja4MC7nXWXc,https://api.spotify.com/v1/tracks/2lyftOm9r45j...,https://api.spotify.com/v1/audio-analysis/2lyf...,244160.0,4.0,Livin' La Vida Loca (Tony Special) - Live,
8,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.38,0.524,3.0,-9.068,1.0,0.0388,...,76.602,audio_features,0r3yOoI5592LzWuBGIntBH,spotify:track:0r3yOoI5592LzWuBGIntBH,https://api.spotify.com/v1/tracks/0r3yOoI5592L...,https://api.spotify.com/v1/audio-analysis/0r3y...,167507.0,4.0,Only You (KANGTA Solo) - Live,
9,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.463,0.421,0.0,-8.005,1.0,0.0247,...,86.608,audio_features,7hvqkt1HOjO1VQiTsBQsPz,spotify:track:7hvqkt1HOjO1VQiTsBQsPz,https://api.spotify.com/v1/tracks/7hvqkt1HOjO1...,https://api.spotify.com/v1/audio-analysis/7hvq...,270333.0,4.0,Right Here Waiting (KANGTA Solo) - Live,


In [28]:
temp_df.to_csv("v1_album_to_song.csv")

### Locating and Attaching "Singles" and "Single Tracks"

In [29]:
singles_input_data = pd.read_csv("artist_id_single_album_id.csv")

In [30]:
singles_input_data.head(10)

Unnamed: 0.1,Unnamed: 0,Artist,Artist_Id,Single_Album_Name,Single_Album_Id
0,0,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,DON'T LOOK BACK,spotify:album:63fxv7o113Zji3oLeXgswm
1,1,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,spotify:album:7vqIRL00YfTtuO0sSAK3Uv
2,2,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,THREE WORDS,spotify:album:6pN2wvuzGHURmVNXbpRaei
3,3,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Love (story) - SM STATION,spotify:album:3j2XirA7CqIqkwuQfO7wkR
4,4,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Remixed,spotify:album:4jBlaJL3oauZCX9VeseBJC
5,5,Fin.K.L,2aRLyjYp7WPr4EkjkI1gvS,"FIN.K.L SINGLE ""Like the song remains""",spotify:album:1Qwyg4hhC6gv8x42kq9Qru
6,6,Fin.K.L,2aRLyjYp7WPr4EkjkI1gvS,Fin.K.L Digital Album,spotify:album:08u61FSaRTXKe6CD7RePvp
7,7,Shinhwa,0jVvkFPa6YbFXQ3Qmhita0,SHINHWA TWENTY SPECIAL ALBUM 'HEART',spotify:album:1F4ew4S5UvxCTg28U2J1cR
8,8,Shinhwa,0jVvkFPa6YbFXQ3Qmhita0,SHINHWA TWENTY GIFT SINGLE ‘All Your Dreams’,spotify:album:2txBzMBsQDnw0kT6rFpQRB
9,9,Shinhwa,0jVvkFPa6YbFXQ3Qmhita0,UNCHANGING PT. 1,spotify:album:5TrZNDuG4byRTlcMsX6OaS


In [42]:
list_of_single_track_df = []

for ind in singles_input_data.index:
    single_album_id = singles_input_data["Single_Album_Id"][ind]
    tracks = sp.album_tracks(album_id = single_album_id)

    try:
        for track in tracks['items']:
            # Retrieve each track's Spotify qualities
            track_name = track['name']
            track_uri = track['uri']
            track_features = sp.audio_features(track_uri)
            track_features_df = pd.DataFrame(track_features)
            track_features_df['Track_Title'] = track_name

            # Combine track and artist information with the features dataframe
            temp_df = pd.concat([singles_input_data.loc[ind:ind], 
                                track_features_df.set_index(singles_input_data.loc[ind:ind].index)], axis = 1)
            list_of_single_track_df.append(temp_df)

    except:
        print("Ran into an error - rerun please!")
        print(single_album_id)

In [43]:
temp_df = pd.concat(list_of_single_track_df, ignore_index=True)

In [44]:
temp_df.head(10)

Unnamed: 0.1,Unnamed: 0,Artist,Artist_Id,Single_Album_Name,Single_Album_Id,danceability,energy,key,loudness,mode,...,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,Track_Title,0
0,0,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,DON'T LOOK BACK,spotify:album:63fxv7o113Zji3oLeXgswm,0.535,0.484,0.0,-8.209,1.0,...,124.038,audio_features,3szqGZtiiS8hmIPcT9qBgh,spotify:track:3szqGZtiiS8hmIPcT9qBgh,https://api.spotify.com/v1/tracks/3szqGZtiiS8h...,https://api.spotify.com/v1/audio-analysis/3szq...,249263.0,4.0,DON’T LOOK BACK,
1,1,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,spotify:album:7vqIRL00YfTtuO0sSAK3Uv,0.622,0.756,4.0,-4.838,0.0,...,167.902,audio_features,4jB0ikdBeXiIIP97TPXIPt,spotify:track:4jB0ikdBeXiIIP97TPXIPt,https://api.spotify.com/v1/tracks/4jB0ikdBeXiI...,https://api.spotify.com/v1/audio-analysis/4jB0...,242977.0,4.0,ALL FOR YOU,
2,1,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,spotify:album:7vqIRL00YfTtuO0sSAK3Uv,0.676,0.593,11.0,-5.368,1.0,...,140.011,audio_features,5d0AZT2CWAH4mApoiXlTDa,spotify:track:5d0AZT2CWAH4mApoiXlTDa,https://api.spotify.com/v1/tracks/5d0AZT2CWAH4...,https://api.spotify.com/v1/audio-analysis/5d0A...,243714.0,4.0,DREAM,
3,1,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,spotify:album:7vqIRL00YfTtuO0sSAK3Uv,0.793,0.799,7.0,-3.051,0.0,...,122.989,audio_features,2FHTNle5MybRe4k0GfjHFq,spotify:track:2FHTNle5MybRe4k0GfjHFq,https://api.spotify.com/v1/tracks/2FHTNle5MybR...,https://api.spotify.com/v1/audio-analysis/2FHT...,221587.0,4.0,MEANINGLESS,
4,1,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,spotify:album:7vqIRL00YfTtuO0sSAK3Uv,0.702,0.702,5.0,-4.21,0.0,...,130.045,audio_features,0pO0z7133jfdG6MkGA5Mym,spotify:track:0pO0z7133jfdG6MkGA5Mym,https://api.spotify.com/v1/tracks/0pO0z7133jfd...,https://api.spotify.com/v1/audio-analysis/0pO0...,246303.0,4.0,ROUND & ROUND,
5,1,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,spotify:album:7vqIRL00YfTtuO0sSAK3Uv,0.751,0.735,0.0,-3.24,0.0,...,125.95,audio_features,7yTryQD1n1PGjrqKTxh4na,spotify:track:7yTryQD1n1PGjrqKTxh4na,https://api.spotify.com/v1/tracks/7yTryQD1n1PG...,https://api.spotify.com/v1/audio-analysis/7yTr...,221595.0,4.0,WALKING IN THE SKY,
6,2,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,THREE WORDS,spotify:album:6pN2wvuzGHURmVNXbpRaei,0.65,0.616,0.0,-4.69,1.0,...,147.991,audio_features,3xxqDGYqGNTIi5H8KJHDtC,spotify:track:3xxqDGYqGNTIi5H8KJHDtC,https://api.spotify.com/v1/tracks/3xxqDGYqGNTI...,https://api.spotify.com/v1/audio-analysis/3xxq...,243243.0,4.0,THREE WORDS,
7,3,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Love (story) - SM STATION,spotify:album:3j2XirA7CqIqkwuQfO7wkR,0.714,0.931,5.0,-2.093,1.0,...,103.01,audio_features,7jdRhISt05YmJAKD0eGEeX,spotify:track:7jdRhISt05YmJAKD0eGEeX,https://api.spotify.com/v1/tracks/7jdRhISt05Ym...,https://api.spotify.com/v1/audio-analysis/7jdR...,239093.0,4.0,Love [story],
8,3,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Love (story) - SM STATION,spotify:album:3j2XirA7CqIqkwuQfO7wkR,0.756,0.896,7.0,-4.253,1.0,...,103.009,audio_features,0jT6xrrvWpeVgnk3d8X1lr,spotify:track:0jT6xrrvWpeVgnk3d8X1lr,https://api.spotify.com/v1/tracks/0jT6xrrvWpeV...,https://api.spotify.com/v1/audio-analysis/0jT6...,235579.0,4.0,Love [story] - Instrumental,
9,4,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Remixed,spotify:album:4jBlaJL3oauZCX9VeseBJC,0.751,0.923,0.0,-4.242,1.0,...,113.004,audio_features,6p9nbwXxgbLnsUOM5XCcGV,spotify:track:6p9nbwXxgbLnsUOM5XCcGV,https://api.spotify.com/v1/tracks/6p9nbwXxgbLn...,https://api.spotify.com/v1/audio-analysis/6p9n...,217133.0,4.0,달리기 - J-Bait Disco Mix,


In [45]:
temp_df.to_csv("v1_single_album_to_song.csv")

### Locating and Attaching "Appears On"

In [46]:
appears_on_input_data = pd.read_csv("artist_id_appears_on_id.csv")

In [47]:
appears_on_input_data.head(10)

Unnamed: 0.1,Unnamed: 0,Artist,Artist_Id,Appears_On_Name,Appears_On_Id
0,0,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Winter Vacation in SMTOWN.com,spotify:album:5aBuzpJ2UNoPwQhee47UQk
1,1,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Christmas in SMTOWN.com,spotify:album:7ccOMxO0n9DjSfLBHhWMDT
2,2,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,90's trendy K-pop music Best 20 Vol. 15,spotify:album:2lW9DhcA1StfL54mw5pgC2
3,3,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Superhero (My Knight) Remix EP,spotify:album:5v6Jrf7O7k99sPkMTU2Noo
4,4,S.E.S.,61HUG80Xma4rnXsqfZkzeM,2002 SUMMER VACATION in SMTOWN.COM,spotify:album:59KnIxsURaQ1wabZZpyf67
5,5,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Winter Vacation in SMTOWN.com - Angel Eyes,spotify:album:3nHNulZ1pybkNcM5NeErQA
6,6,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Winter Vacation in SMTOWN.com,spotify:album:5aBuzpJ2UNoPwQhee47UQk
7,7,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Christmas in SMTOWN.com,spotify:album:7ccOMxO0n9DjSfLBHhWMDT
8,8,Fin.K.L,2aRLyjYp7WPr4EkjkI1gvS,韓劇情緣,spotify:album:2SgMnH1w8G47RWCAt18wwY
9,9,Fin.K.L,2aRLyjYp7WPr4EkjkI1gvS,All About Eve (MBC DRAMA) OST,spotify:album:7LDnsb1ATqmSM3NYqefvfL


In [None]:
# list_of_appears_on_track_df = []

# for ind in appears_on_input_data.index:
#     appears_on_album_id = appears_on_input_data["Appears_On_Id"][ind]
#     tracks = sp.album_tracks(album_id = appears_on_album_id)
    
#     try:
#         for track in tracks['items']:
#             # Retrieve each track's Spotify qualities
#             track_name = track['name']
#             track_uri = track['uri']
#             track_features = sp.audio_features(track_uri)
#             track_features_df = pd.DataFrame(track_features)
#             track_features_df['Track_Title'] = track_name

#             # Combine track and artist information with the features dataframe
#             temp_df = pd.concat([appears_on_input_data.loc[ind:ind], 
#                                 track_features_df.set_index(appears_on_input_data.loc[ind:ind].index)], axis = 1)
#             list_of_appears_on_track_df.append(temp_df)

#     except:
#         print("Ran into an error - rerun please!")
#         print(appears_on_album_id)

### Locating and Attaching "Compilation" 

In [48]:
artist_compilation_data = pd.read_csv("artist_id_compilation_id.csv", index_col = 0)

In [49]:
artist_compilation_data.head(10)

Unnamed: 0,Artist,Artist_Id,Compilation_Name,Compilation_Id
0,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO
1,BoA,4muJrGMndyYWqZtfk8OWy4,BEST OF SOUL,spotify:album:0gxS6h33HGI5jEAgcMsTWA
2,BIGBANG,4Kxlr1PRlDKEB0ekOCyHgX,THE BEST OF BIGBANG 2006-2014,spotify:album:1JwbdXLKJVffgSW95WcTUp
3,2PM,5iRPbkcPmqAFFwDUj6ywVS,THE BEST OF 2PM in Japan 2011-2016,spotify:album:5rEgHD2JSK9RRxyKtXsImD
4,BTS,3Nrfpe0tUJi4K4DXYWgMUX,"BTS, THE BEST",spotify:album:6bN241dPMC54Ac6zV1B8hl
5,Stray Kids,2dIgFjalVxs4ThymZ67YCE,SKZ2020,spotify:album:4hk05fFLNquJc8w5htXZDi
6,4Minute,6cdC1cwqh3eJAXaxXJt2jv,Best of 4Minute,spotify:album:502uBQOdyp6A3En53ZJFCo
7,G-Dragon,30b9WulBM8sFuBo17nNq9c,COUP D'ETAT [+ ONE OF A KIND & HEARTBREAKER],spotify:album:1mVPbaMZGsPh5gzhvO6BBX
8,TAEYANG,6udveWUgX4vu75FF0DTrXV,RISE [+ SOLAR & HOT],spotify:album:2Z80u1PO03Jk4eUccBUR90
9,BIBI,6UbmqUEgjLA6jAcXwbM1Z9,Gringas,spotify:album:37IFkjRyzSbBWubkkHD6RL


In [50]:
list_of_compilations_track_df = []

for ind in artist_compilation_data.index:
    compilation_id = artist_compilation_data["Compilation_Id"][ind]
    tracks = sp.album_tracks(album_id = compilation_id)
    
    try:
        for track in tracks['items']:
            # Retrieve each track's Spotify qualities
            track_name = track['name']
            track_uri = track['uri']
            track_features = sp.audio_features(track_uri)
            track_features_df = pd.DataFrame(track_features)
            track_features_df['Track_Title'] = track_name

            # Combine track and artist information with the features dataframe
            temp_df = pd.concat([artist_compilation_data.loc[ind:ind], 
                                track_features_df.set_index(artist_compilation_data.loc[ind:ind].index)], axis = 1)
            list_of_compilations_track_df.append(temp_df)

    except:
        print("Ran into an error - rerun please!")
        print(compilation_id)

In [51]:
temp_df = pd.concat(list_of_compilations_track_df, ignore_index=True)

In [52]:
temp_df.head(10)

Unnamed: 0,Artist,Artist_Id,Compilation_Name,Compilation_Id,danceability,energy,key,loudness,mode,speechiness,...,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,Track_Title
0,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.765,0.877,10,-4.082,0,0.0614,...,0.933,102.038,audio_features,5ylUgQPsprPXqbj40qbY76,spotify:track:5ylUgQPsprPXqbj40qbY76,https://api.spotify.com/v1/tracks/5ylUgQPsprPX...,https://api.spotify.com/v1/audio-analysis/5ylU...,204760,4,스캔들
1,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.321,0.887,7,-4.371,1,0.237,...,0.275,78.301,audio_features,7GfcHqyVb9VdHBGHhTcYuU,spotify:track:7GfcHqyVb9VdHBGHhTcYuU,https://api.spotify.com/v1/tracks/7GfcHqyVb9Vd...,https://api.spotify.com/v1/audio-analysis/7Gfc...,191107,4,보라빛향기
2,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.57,0.956,6,-5.014,1,0.064,...,0.834,144.994,audio_features,5WwkjpQCnMvLAGYnXwCgWb,spotify:track:5WwkjpQCnMvLAGYnXwCgWb,https://api.spotify.com/v1/tracks/5WwkjpQCnMvL...,https://api.spotify.com/v1/audio-analysis/5Wwk...,214427,4,Never Say
3,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.676,0.404,9,-7.232,0,0.0909,...,0.673,68.011,audio_features,453ZYLlY6PSIuvqMrI9Bn6,spotify:track:453ZYLlY6PSIuvqMrI9Bn6,https://api.spotify.com/v1/tracks/453ZYLlY6PSI...,https://api.spotify.com/v1/audio-analysis/453Z...,247907,4,단한번만...
4,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.548,0.912,9,-4.659,1,0.199,...,0.311,98.039,audio_features,4WrcbHIcp7ND1BwPCb5ota,spotify:track:4WrcbHIcp7ND1BwPCb5ota,https://api.spotify.com/v1/tracks/4WrcbHIcp7ND...,https://api.spotify.com/v1/audio-analysis/4Wrc...,250827,4,Break It
5,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.604,0.955,2,-4.646,1,0.0484,...,0.565,116.02,audio_features,7GM1Lo9s6kyI1rn2vPHp94,spotify:track:7GM1Lo9s6kyI1rn2vPHp94,https://api.spotify.com/v1/tracks/7GM1Lo9s6kyI...,https://api.spotify.com/v1/audio-analysis/7GM1...,258133,4,환영문
6,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.815,0.845,6,-3.799,0,0.183,...,0.793,96.986,audio_features,6WS11nF6DZl9LeMDTmJWpI,spotify:track:6WS11nF6DZl9LeMDTmJWpI,https://api.spotify.com/v1/tracks/6WS11nF6DZl9...,https://api.spotify.com/v1/audio-analysis/6WS1...,209267,4,Cowboy
7,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.612,0.962,8,-4.688,1,0.0544,...,0.853,102.003,audio_features,0fTqVwWqGbpFUe3bgJ9mHl,spotify:track:0fTqVwWqGbpFUe3bgJ9mHl,https://api.spotify.com/v1/tracks/0fTqVwWqGbpF...,https://api.spotify.com/v1/audio-analysis/0fTq...,222240,4,Love Letter
8,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.646,0.979,4,-3.51,0,0.0903,...,0.684,102.001,audio_features,1eR8EcfbPAYeHqvMsyEh2z,spotify:track:1eR8EcfbPAYeHqvMsyEh2z,https://api.spotify.com/v1/tracks/1eR8EcfbPAYe...,https://api.spotify.com/v1/audio-analysis/1eR8...,265867,4,백전무패
9,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.559,0.907,7,-5.192,0,0.0389,...,0.571,142.949,audio_features,1Qy4SrFeh1kaL2mtVBKbfB,spotify:track:1Qy4SrFeh1kaL2mtVBKbfB,https://api.spotify.com/v1/tracks/1Qy4SrFeh1ka...,https://api.spotify.com/v1/audio-analysis/1Qy4...,234187,4,하늘아


In [53]:
temp_df.to_csv("v1_artist_to_compilation.csv")

## Cleaning Scraped Data

### Fixing the Album and Tracks Dataframe

In [54]:
album_track_data = pd.read_csv("v1_album_to_song.csv", index_col = 0)
album_track_data.head(5)

Unnamed: 0,Artist,Artist_Id,Album_Name,Album_Id,danceability,energy,key,loudness,mode,speechiness,...,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,Track_Title,0
0,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.209,0.963,6.0,-12.9,0.0,0.133,...,131.864,audio_features,6KOn7RweqUBSZWoKKkHFRb,spotify:track:6KOn7RweqUBSZWoKKkHFRb,https://api.spotify.com/v1/tracks/6KOn7RweqUBS...,https://api.spotify.com/v1/audio-analysis/6KOn...,90333.0,4.0,Opening - Live,
1,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.397,0.901,4.0,-6.97,1.0,0.285,...,171.944,audio_features,1khANf67sdS8kUkjmF218J,spotify:track:1khANf67sdS8kUkjmF218J,https://api.spotify.com/v1/tracks/1khANf67sdS8...,https://api.spotify.com/v1/audio-analysis/1khA...,278333.0,4.0,I yah! - Live,
2,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.542,0.913,1.0,-7.763,1.0,0.241,...,97.051,audio_features,4XkT2bHdbHMPukCpj7mXPJ,spotify:track:4XkT2bHdbHMPukCpj7mXPJ,https://api.spotify.com/v1/tracks/4XkT2bHdbHMP...,https://api.spotify.com/v1/audio-analysis/4XkT...,320533.0,4.0,Git It Up! / 전사의 후예 / You Got Gun - Live,
3,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.436,0.666,10.0,-14.143,0.0,0.632,...,107.013,audio_features,65a4TY00ChiawzHVH4xcDO,spotify:track:65a4TY00ChiawzHVH4xcDO,https://api.spotify.com/v1/tracks/65a4TY00Chia...,https://api.spotify.com/v1/audio-analysis/65a4...,284427.0,5.0,Opening Ment - Live,
4,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,spotify:album:697cPaD568S5Zt4bgo4cQf,0.366,0.778,7.0,-9.239,0.0,0.0714,...,99.995,audio_features,3vYZaL9lMZ1uKURbcEq4Jz,spotify:track:3vYZaL9lMZ1uKURbcEq4Jz,https://api.spotify.com/v1/tracks/3vYZaL9lMZ1u...,https://api.spotify.com/v1/audio-analysis/3vYZ...,278667.0,4.0,It's Been Raining Since You Left Me / Delight ...,


In [55]:
# Drop unneeded columnms
# print(album_track_data.columns)
album_track_data.drop(['uri','track_href','analysis_url','type','0'], axis=1, inplace=True)

# Drop the 'spotify:album:' part of the 'Album_Id' columns
album_track_data['Album_Id'] = album_track_data['Album_Id'].str[14:]

In [56]:
album_track_data.head(5)

Unnamed: 0,Artist,Artist_Id,Album_Name,Album_Id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,duration_ms,time_signature,Track_Title
0,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,697cPaD568S5Zt4bgo4cQf,0.209,0.963,6.0,-12.9,0.0,0.133,9e-05,0.913,0.322,0.0829,131.864,6KOn7RweqUBSZWoKKkHFRb,90333.0,4.0,Opening - Live
1,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,697cPaD568S5Zt4bgo4cQf,0.397,0.901,4.0,-6.97,1.0,0.285,0.0665,0.0,0.888,0.258,171.944,1khANf67sdS8kUkjmF218J,278333.0,4.0,I yah! - Live
2,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,697cPaD568S5Zt4bgo4cQf,0.542,0.913,1.0,-7.763,1.0,0.241,0.108,0.000315,0.89,0.377,97.051,4XkT2bHdbHMPukCpj7mXPJ,320533.0,4.0,Git It Up! / 전사의 후예 / You Got Gun - Live
3,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,697cPaD568S5Zt4bgo4cQf,0.436,0.666,10.0,-14.143,0.0,0.632,0.925,0.000267,0.855,0.08,107.013,65a4TY00ChiawzHVH4xcDO,284427.0,5.0,Opening Ment - Live
4,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,697cPaD568S5Zt4bgo4cQf,0.366,0.778,7.0,-9.239,0.0,0.0714,0.164,0.000214,0.891,0.404,99.995,3vYZaL9lMZ1uKURbcEq4Jz,278667.0,4.0,It's Been Raining Since You Left Me / Delight ...


In [57]:
# Bring the 'Track Title' column to the right of 'Album_Id'
column_to_move = album_track_data.pop("Track_Title")
album_track_data.insert(4, "Track_Title", column_to_move)
column_to_move = album_track_data.pop("id")
album_track_data.insert(5, "Track_Id", column_to_move)
album_track_data.head()

Unnamed: 0,Artist,Artist_Id,Album_Name,Album_Id,Track_Title,Track_Id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,697cPaD568S5Zt4bgo4cQf,Opening - Live,6KOn7RweqUBSZWoKKkHFRb,0.209,0.963,6.0,-12.9,0.0,0.133,9e-05,0.913,0.322,0.0829,131.864,90333.0,4.0
1,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,697cPaD568S5Zt4bgo4cQf,I yah! - Live,1khANf67sdS8kUkjmF218J,0.397,0.901,4.0,-6.97,1.0,0.285,0.0665,0.0,0.888,0.258,171.944,278333.0,4.0
2,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,697cPaD568S5Zt4bgo4cQf,Git It Up! / 전사의 후예 / You Got Gun - Live,4XkT2bHdbHMPukCpj7mXPJ,0.542,0.913,1.0,-7.763,1.0,0.241,0.108,0.000315,0.89,0.377,97.051,320533.0,4.0
3,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,697cPaD568S5Zt4bgo4cQf,Opening Ment - Live,65a4TY00ChiawzHVH4xcDO,0.436,0.666,10.0,-14.143,0.0,0.632,0.925,0.000267,0.855,0.08,107.013,284427.0,5.0
4,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,FOREVER 2001 LIVE CONCERT,697cPaD568S5Zt4bgo4cQf,It's Been Raining Since You Left Me / Delight ...,3vYZaL9lMZ1uKURbcEq4Jz,0.366,0.778,7.0,-9.239,0.0,0.0714,0.164,0.000214,0.891,0.404,99.995,278667.0,4.0


In [58]:
album_track_data.to_csv("cleaned_album_to_track_df.csv")

### Fixing the 'Appears On' Dataframe

In [59]:
appears_on_data = pd.read_csv("artist_appears_on.csv", index_col=0)

In [60]:
appears_on_data.head()

Unnamed: 0,Artist,Artist_Id,Appears_On_Name,Appears_On_Id
0,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Winter Vacation in SMTOWN.com,5aBuzpJ2UNoPwQhee47UQk
1,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Christmas in SMTOWN.com,7ccOMxO0n9DjSfLBHhWMDT
2,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,90's trendy K-pop music Best 20 Vol. 15,2lW9DhcA1StfL54mw5pgC2
3,S.E.S.,61HUG80Xma4rnXsqfZkzeM,Superhero (My Knight) Remix EP,5v6Jrf7O7k99sPkMTU2Noo
4,S.E.S.,61HUG80Xma4rnXsqfZkzeM,2002 SUMMER VACATION in SMTOWN.COM,59KnIxsURaQ1wabZZpyf67


In [61]:
# Drop the 'spotify:album:' part of the 'Appears_On_Id' columns
appears_on_data['Appears_On_Id'] = appears_on_data['Appears_On_Id'].str[14:]
appears_on_data.head(3)

Unnamed: 0,Artist,Artist_Id,Appears_On_Name,Appears_On_Id
0,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Winter Vacation in SMTOWN.com,hee47UQk
1,H.O.T.,5JrfgZAgqAMywJpLpJM0eS,Christmas in SMTOWN.com,LBHhWMDT
2,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,90's trendy K-pop music Best 20 Vol. 15,4mw5pgC2


In [62]:
appears_on_data.to_csv("artist_appears_on_df.csv")

### Fixing the Singles and 'Singles Album' Dataframe

In [63]:
single_album_track_data = pd.read_csv("v1_single_album_to_song.csv", index_col = 0)
single_album_track_data.head(5)

Unnamed: 0,Unnamed: 0.1,Artist,Artist_Id,Single_Album_Name,Single_Album_Id,danceability,energy,key,loudness,mode,...,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,Track_Title,0
0,0,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,DON'T LOOK BACK,spotify:album:63fxv7o113Zji3oLeXgswm,0.535,0.484,0.0,-8.209,1.0,...,124.038,audio_features,3szqGZtiiS8hmIPcT9qBgh,spotify:track:3szqGZtiiS8hmIPcT9qBgh,https://api.spotify.com/v1/tracks/3szqGZtiiS8h...,https://api.spotify.com/v1/audio-analysis/3szq...,249263.0,4.0,DON’T LOOK BACK,
1,1,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,spotify:album:7vqIRL00YfTtuO0sSAK3Uv,0.622,0.756,4.0,-4.838,0.0,...,167.902,audio_features,4jB0ikdBeXiIIP97TPXIPt,spotify:track:4jB0ikdBeXiIIP97TPXIPt,https://api.spotify.com/v1/tracks/4jB0ikdBeXiI...,https://api.spotify.com/v1/audio-analysis/4jB0...,242977.0,4.0,ALL FOR YOU,
2,1,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,spotify:album:7vqIRL00YfTtuO0sSAK3Uv,0.676,0.593,11.0,-5.368,1.0,...,140.011,audio_features,5d0AZT2CWAH4mApoiXlTDa,spotify:track:5d0AZT2CWAH4mApoiXlTDa,https://api.spotify.com/v1/tracks/5d0AZT2CWAH4...,https://api.spotify.com/v1/audio-analysis/5d0A...,243714.0,4.0,DREAM,
3,1,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,spotify:album:7vqIRL00YfTtuO0sSAK3Uv,0.793,0.799,7.0,-3.051,0.0,...,122.989,audio_features,2FHTNle5MybRe4k0GfjHFq,spotify:track:2FHTNle5MybRe4k0GfjHFq,https://api.spotify.com/v1/tracks/2FHTNle5MybR...,https://api.spotify.com/v1/audio-analysis/2FHT...,221587.0,4.0,MEANINGLESS,
4,1,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,spotify:album:7vqIRL00YfTtuO0sSAK3Uv,0.702,0.702,5.0,-4.21,0.0,...,130.045,audio_features,0pO0z7133jfdG6MkGA5Mym,spotify:track:0pO0z7133jfdG6MkGA5Mym,https://api.spotify.com/v1/tracks/0pO0z7133jfd...,https://api.spotify.com/v1/audio-analysis/0pO0...,246303.0,4.0,ROUND & ROUND,


In [64]:
single_album_track_data.columns

Index(['Unnamed: 0.1', 'Artist', 'Artist_Id', 'Single_Album_Name',
       'Single_Album_Id', 'danceability', 'energy', 'key', 'loudness', 'mode',
       'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'type', 'id', 'uri', 'track_href', 'analysis_url',
       'duration_ms', 'time_signature', 'Track_Title', '0'],
      dtype='object')

In [65]:
# Drop unneeded columnms
single_album_track_data.drop(['Unnamed: 0.1','uri','track_href','analysis_url','type','0'], axis=1, inplace=True)

# Drop the 'spotify:album:' part of the 'Single_Album_Id' columns
single_album_track_data['Single_Album_Id'] = single_album_track_data['Single_Album_Id'].str[14:]

# Bring the 'Track Title' column to the right of 'Album_Id', while moving the 'id' column and renaming it as well
column_to_move = single_album_track_data.pop("Track_Title")
single_album_track_data.insert(4, "Track_Title", column_to_move)

column_to_move = single_album_track_data.pop("id")
single_album_track_data.insert(5, "Track_Id", column_to_move)

single_album_track_data.head()

Unnamed: 0,Artist,Artist_Id,Single_Album_Name,Single_Album_Id,Track_Title,Track_Id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,DON'T LOOK BACK,63fxv7o113Zji3oLeXgswm,DON’T LOOK BACK,3szqGZtiiS8hmIPcT9qBgh,0.535,0.484,0.0,-8.209,1.0,0.0301,0.678,2e-06,0.108,0.233,124.038,249263.0,4.0
1,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,7vqIRL00YfTtuO0sSAK3Uv,ALL FOR YOU,4jB0ikdBeXiIIP97TPXIPt,0.622,0.756,4.0,-4.838,0.0,0.0473,0.0705,0.0,0.108,0.718,167.902,242977.0,4.0
2,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,7vqIRL00YfTtuO0sSAK3Uv,DREAM,5d0AZT2CWAH4mApoiXlTDa,0.676,0.593,11.0,-5.368,1.0,0.0346,0.303,0.0,0.12,0.314,140.011,243714.0,4.0
3,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,7vqIRL00YfTtuO0sSAK3Uv,MEANINGLESS,2FHTNle5MybRe4k0GfjHFq,0.793,0.799,7.0,-3.051,0.0,0.0318,0.189,0.0,0.112,0.777,122.989,221587.0,4.0
4,SECHSKIES,6uRyNreOHUvWPNGnKfIo27,ALL FOR YOU,7vqIRL00YfTtuO0sSAK3Uv,ROUND & ROUND,0pO0z7133jfdG6MkGA5Mym,0.702,0.702,5.0,-4.21,0.0,0.029,0.128,0.0,0.121,0.57,130.045,246303.0,4.0


In [66]:
single_album_track_data.to_csv("cleaned_single_album_track_data.csv")

### Fixing the Compilation Dataframe

In [67]:
compilation_track_data = pd.read_csv("v1_artist_to_compilation.csv", index_col = 0)
compilation_track_data.head()

Unnamed: 0,Artist,Artist_Id,Compilation_Name,Compilation_Id,danceability,energy,key,loudness,mode,speechiness,...,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,Track_Title
0,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.765,0.877,10,-4.082,0,0.0614,...,0.933,102.038,audio_features,5ylUgQPsprPXqbj40qbY76,spotify:track:5ylUgQPsprPXqbj40qbY76,https://api.spotify.com/v1/tracks/5ylUgQPsprPX...,https://api.spotify.com/v1/audio-analysis/5ylU...,204760,4,스캔들
1,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.321,0.887,7,-4.371,1,0.237,...,0.275,78.301,audio_features,7GfcHqyVb9VdHBGHhTcYuU,spotify:track:7GfcHqyVb9VdHBGHhTcYuU,https://api.spotify.com/v1/tracks/7GfcHqyVb9Vd...,https://api.spotify.com/v1/audio-analysis/7Gfc...,191107,4,보라빛향기
2,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.57,0.956,6,-5.014,1,0.064,...,0.834,144.994,audio_features,5WwkjpQCnMvLAGYnXwCgWb,spotify:track:5WwkjpQCnMvLAGYnXwCgWb,https://api.spotify.com/v1/tracks/5WwkjpQCnMvL...,https://api.spotify.com/v1/audio-analysis/5Wwk...,214427,4,Never Say
3,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.676,0.404,9,-7.232,0,0.0909,...,0.673,68.011,audio_features,453ZYLlY6PSIuvqMrI9Bn6,spotify:track:453ZYLlY6PSIuvqMrI9Bn6,https://api.spotify.com/v1/tracks/453ZYLlY6PSI...,https://api.spotify.com/v1/audio-analysis/453Z...,247907,4,단한번만...
4,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,spotify:album:7v8ud1pc0Y9xvPGFtVJYbO,0.548,0.912,9,-4.659,1,0.199,...,0.311,98.039,audio_features,4WrcbHIcp7ND1BwPCb5ota,spotify:track:4WrcbHIcp7ND1BwPCb5ota,https://api.spotify.com/v1/tracks/4WrcbHIcp7ND...,https://api.spotify.com/v1/audio-analysis/4Wrc...,250827,4,Break It


In [68]:
# Drop unneeded columnms
compilation_track_data.drop(['uri','track_href','analysis_url','type'], axis=1, inplace=True)

# Drop the 'spotify:album:' part of the 'Compilation_Id' columns
compilation_track_data['Compilation_Id'] = compilation_track_data['Compilation_Id'].str[14:]

# Bring the 'Track Title' column to the right of 'Album_Id', while moving the 'id' column and renaming it as well
column_to_move = compilation_track_data.pop("Track_Title")
compilation_track_data.insert(4, "Track_Title", column_to_move)

column_to_move = compilation_track_data.pop("id")
compilation_track_data.insert(5, "Track_Id", column_to_move)

compilation_track_data.head()

Unnamed: 0,Artist,Artist_Id,Compilation_Name,Compilation_Id,Track_Title,Track_Id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,7v8ud1pc0Y9xvPGFtVJYbO,스캔들,5ylUgQPsprPXqbj40qbY76,0.765,0.877,10,-4.082,0,0.0614,0.0185,1.8e-05,0.0982,0.933,102.038,204760,4
1,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,7v8ud1pc0Y9xvPGFtVJYbO,보라빛향기,7GfcHqyVb9VdHBGHhTcYuU,0.321,0.887,7,-4.371,1,0.237,0.00455,0.0,0.288,0.275,78.301,191107,4
2,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,7v8ud1pc0Y9xvPGFtVJYbO,Never Say,5WwkjpQCnMvLAGYnXwCgWb,0.57,0.956,6,-5.014,1,0.064,0.00292,0.0,0.282,0.834,144.994,214427,4
3,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,7v8ud1pc0Y9xvPGFtVJYbO,단한번만...,453ZYLlY6PSIuvqMrI9Bn6,0.676,0.404,9,-7.232,0,0.0909,0.171,0.000391,0.133,0.673,68.011,247907,4
4,Click-B,2kOGSFThgEzPEjL4fFB25w,The Best Of Click-B,7v8ud1pc0Y9xvPGFtVJYbO,Break It,4WrcbHIcp7ND1BwPCb5ota,0.548,0.912,9,-4.659,1,0.199,0.00195,0.0,0.154,0.311,98.039,250827,4


In [69]:
compilation_track_data.to_csv("cleaned_compilation_track.csv")

In [269]:
artist_album_input = pd.read_csv("275_kArtists.csv")

In [270]:
artist_album_input.head()

Unnamed: 0,Artist,Spotify_Id
0,H.O.T.,5JrfgZAgqAMywJpLpJM0eS
1,SECHSKIES,6uRyNreOHUvWPNGnKfIo27
2,S.E.S.,61HUG80Xma4rnXsqfZkzeM
3,Fin.K.L,2aRLyjYp7WPr4EkjkI1gvS
4,Shinhwa,0jVvkFPa6YbFXQ3Qmhita0


In [271]:
len(artist_album_input["Artist"].unique())

274

In [272]:
temp_list = artist_album_input["Spotify_Id"]

In [276]:
for i in range(0,len(temp_list)):
    for j in range(i+1,len(temp_list)):
        if temp_list[i] == temp_list[j]:
            print("found")
            print(temp_list[i])
            print(temp_list[j])

found
7cVZApDoQZpS447nHTsNqu
7cVZApDoQZpS447nHTsNqu
