In [210]:
import base64
from requests import post, get
import json

client_id = "eb74c5ecb1ef4bafb1d51864054479a7"
client_secret = "e57224d2ae534f598390c68ce5efdab9"

# Get the token, a token is needed each time you want to get a request, each token has an expiry time of 3600s
def get_token():
    auth_string = client_id + ':' + client_secret
    auth_bytes = auth_string.encode("utf-8")
    auth_base64 = str(base64.b64encode(auth_bytes), "utf-8")
    
    url = "https://accounts.spotify.com/api/token"
    headers = {
        "Authorization": "Basic " + auth_base64,
        "Content-Type": "application/x-www-form-urlencoded"
    }
    data = {"grant_type": "client_credentials"}
    result = post(url, headers=headers, data=data)
    json_result = json.loads(result.content)
    token = json_result["access_token"]
    return token

def get_auth_header(token):
    return {"Authorization": "Bearer " + token}

# Function to search for top artists, parameters include q: query, limit: how many results you want to return
def search_for_artists(token, q, limit, market='US', initial_offset=0):
    url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)
    artists = []
    for offset in range(initial_offset, initial_offset + limit, 50):
        params = {
            'q':q,
            'type': 'artist',
            'offset': offset,
            'market': market,
            'limit': 50,
        }
        result = get(url, headers=headers, params=params)
        json_result = json.loads(result.content)
        artists += json_result['artists']['items']
    
    return artists

# Function to get top 1 track by a particular artist give the artist id
def get_top_track_by_artist(token,artist_id, market='US'):
    url = f"https://api.spotify.com/v1/artists/{artist_id}/top-tracks?country={market}"
    headers = get_auth_header(token)
    result = get(url, headers=headers)
    try:
        json_result = json.loads(result.content)['tracks'][0]
    except:
        json_result = {}
        
    return json_result

# Function to get top tracks based on popularity, parameters include q: query, limit: how many results you want to return
def search_for_tracks(token, q, limit, market='US', initial_offset=0):
    url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)
    tracks = []
    for offset in range(initial_offset, initial_offset + limit, 50):
        params = {
            'q':q,
            'type': 'track',
            'offset': offset,
            'market': market,
            'limit': 50,
        }
        result = get(url, headers=headers, params=params)
        json_result = json.loads(result.content)
        tracks += json_result['tracks']['items']
    
    return tracks


In [213]:
import pandas as pd

# Create a dataframe for tracks
def create_track_df(json_response):
    all_tracks = []

    for i in range(0,len(json_response)):
        track = {
            'song_name': json_response[i]['name'],
            'album_name': json_response[i]['album']['name'],
            'album_link': json_response[i]['album']['external_urls']['spotify'],
            'artist_name': json_response[i]['album']['artists'][0]['name'],
            'popularity': json_response[i]['popularity'],
            'release_date': json_response[i]['album']['release_date'],
            'song_link': json_response[i]['external_urls']['spotify'],
            'duration_ms': json_response[i]['duration_ms'],
            'explicit': json_response[i]['explicit'],
        }

        all_tracks.append(track)
        
    return pd.DataFrame(all_tracks, index=list(range(1,len(json_response)+1)))

# Create a dataframe for artists, include their no.1 hit
def create_artists_df(json_response):
    all_artists = []
    token = get_token()
    for i in range(len(json_response)):
        top_track = get_top_track_by_artist(token,json_response[i]['id'])
        artist = {
            'artist_name': json_response[i]['name'],
            'popularity': json_response[i]['popularity'],
            'followers': json_response[i]['followers']['total'],
            'artist_link': json_response[i]['external_urls']['spotify'],
            'genres': json_response[i]['genres'],
            'top_track': top_track.get('name',None),
            'top_track_album': top_track.get('album',{}).get('name',None),
            'top_track_popularity': top_track.get('popularity',None),
            'top_track_release_date': top_track.get('album',{}).get('release_date',None),
            'top_track_duration_ms': top_track.get('duration_ms',None),
            'top_track_explicit': top_track.get('explicit',None),
            'top_track_album_link': top_track.get('album',{}).get('external_urls',{}).get('spotify',None),
            'top_track_link': top_track.get('external_urls',{}).get('spotify',None),
        }
        all_artists.append(artist)
    
    return pd.DataFrame(all_artists, index=list(range(1, len(json_response)+1)))

### Top 100 artists from each genres

In [None]:
Chinese_top_artists = search_for_artists(token, 'genre:chinese', 100, market='HK')
Japanese_top_artists = search_for_artists(token, 'genre:japanese', 100, market='JP')
Korean_top_artists = search_for_artists(token, 'genre:korean', 100, market='KR')
Kpop_top_artists = search_for_artists(token, 'genre:k-pop', 100, market='KR')

In [193]:
Jpop_top_artists = search_for_artists(token, 'genre:j-pop', 100, market='JP')
Jidol_top_artists = search_for_artists(token, 'genre:j-idol', 100, market='JP')
Jdance_top_artists = search_for_artists(token, 'genre:j-dance', 100, market='JP')

In [184]:
chinese_top100_artist = create_artists_df(Chinese_top_artists)
japanese_top100_artist = create_artists_df(Japanese_top_artists)
korean_top100_artist = create_artists_df(Korean_top_artists)
kpop_top100_artist = create_artists_df(Kpop_top_artists)

In [None]:
Jpop_top100_artist = create_artists_df(Jpop_top_artists)
Jdance_top100_artist = create_artists_df(Jdance_top_artists)

In [214]:
Jidol_top100_artist = create_artists_df(Jidol_top_artists)

In [202]:
chinese_top100_artist.to_csv("chinese_top100_artist.csv")
japanese_top100_artist.to_csv("japanese_top100_artist.csv")
korean_top100_artist.to_csv("korean_top100_artist.csv")
kpop_top100_artist.to_csv("kpop_top100_artist.csv")
Jpop_top100_artist.to_csv("jpop_top100_artist.csv")
Jdance_top100_artist.to_csv("jdance_top100_artist.csv")

In [215]:
Jidol_top100_artist.to_csv("jidol_top100_artist.csv")

In [232]:
chinese_top100_artist['query_genre'] = 'chinese'
japanese_top100_artist['query_genre'] = 'japanese'
korean_top100_artist['query_genre'] = 'korean'
kpop_top100_artist['query_genre'] = 'j-pop'
Jpop_top100_artist['query_genre'] = 'j-pop'
Jdance_top100_artist['query_genre'] = 'j-dance'
Jidol_top100_artist['query_genre'] = 'j-idol'

In [233]:
east_asia_topartists = [chinese_top100_artist,japanese_top100_artist,korean_top100_artist,kpop_top100_artist,Jpop_top100_artist,Jdance_top100_artist,Jidol_top100_artist]
east_asia_topartistsdf = pd.concat(east_asia_topartists).sort_values(by='followers', ascending=False).reset_index().drop('index',axis=1)
east_asia_topartistsdf.to_csv('east_asia_top_artists.csv')

### Top 1000 tracks from each genres

In [186]:
token = get_token()
kpop_top_tracks = search_for_tracks(token, 'genre:k-pop', 1000, market='KR')
chinese_top_tracks = search_for_tracks(token, 'genre:chinese', 1000, market='HK')
korean_top_tracks = search_for_tracks(token, 'genre:korean', 1000, market='KR')
japanese_top_tracks = search_for_tracks(token, 'genre:japanese', 1000, market='JP')
jidol_top_tracks = search_for_tracks(token, 'genre: j-idol', 1000, market='JP')
jpop_top_tracks = search_for_tracks(token, 'genre: j-pop', 1000, market='JP')
jdance_top_tracks = search_for_tracks(token, 'genre: j-dance', 1000, market='JP')

In [188]:
kpop_top1000 = create_track_df(kpop_top_tracks)
chinese_top1000 = create_track_df(chinese_top_tracks)
korean_top1000 = create_track_df(korean_top_tracks)
japanese_top1000 = create_track_df(japanese_top_tracks)
jidol_top1000 = create_track_df(jidol_top_tracks)
jpop_top1000 = create_track_df(jpop_top_tracks)
jdance_top1000 = create_track_df(jdance_top_tracks)

In [192]:
kpop_top1000.to_csv('kpop_top1000_tracks.csv')
chinese_top1000.to_csv('chinese_top1000_tracks.csv')
korean_top1000.to_csv('korean_top1000_tracks.csv')
japanese_top1000.to_csv('japanese_top1000_tracks.csv')
jidol_top1000.to_csv('jidol_top1000_tracks.csv')
jpop_top1000.to_csv('jpop_top1000_tracks.csv')
jdance_top1000.to_csv('jdance_top1000_tracks.csv')

In [230]:
kpop_top1000['query_genre'] = 'k-pop'
chinese_top1000['query_genre'] = 'chinese'
korean_top1000['query_genre'] = 'korean'
japanese_top1000['query_genre'] = 'japanese'
jidol_top1000['query_genre'] = 'j-idol'
jpop_top1000['query_genre'] = 'j-pop'
jdance_top1000['query_genre'] = 'j-dance'

In [231]:
east_asia_toptracks = [kpop_top1000,chinese_top1000,korean_top1000,japanese_top1000,jidol_top1000,jpop_top1000,jdance_top1000]
east_asia_toptracksdf = pd.concat(east_asia_toptracks).sort_values(by='popularity', ascending=False).reset_index().drop('index',axis=1)
east_asia_toptracksdf.to_csv('east_asia_top_tracks.csv')