# Get Top 10 artists and Top 5 Genres (Artists and Genres data)

In [3]:
import pandas as pd
import numpy as np
import json

## 1. Top 10 Artists per emotion

In [4]:
#Retrieve data
df = pd.read_csv('data/tracks_final.csv')

#Retrieve emotions
emotions = df['emotions'].unique()

artists_dict = {}
for emotion in emotions : 

    #Get sub dataframe of emotion
    df_tmp = df[df['emotions'] == emotion]

    #Group by artist, count number of tracks and sort from highest to lowest
    df_tmp = df_tmp.groupby(['artist']).size().reset_index(name='size')
    df_tmp = df_tmp.sort_values(by=['size'], ascending=False)

    #Retrieve track_id of artists
    df_tmp = df_tmp.merge(df[['artist', 'track_id']], on='artist', how='left')    
    df_tmp = df_tmp.drop_duplicates(subset=['artist'], keep='first')
    df_tmp = df_tmp.rename(columns={'artist': 'name'})

    #Add rank (based on number of tracks)
    df_tmp['rank'] = [i for i in range(1, len(df_tmp) + 1)]

    #Retrieve top 10 artists
    df_tmp = df_tmp.head(10)
    
    #Add to dictionary
    artists_dict[emotion] = df_tmp.to_dict('records')

#Store dictionary in json file
with open('data/top_artists.json', 'w') as fp:
    json.dump(artists_dict, fp)

## 2. Top 5 Genres per emotion

In [6]:
genres_dict = {}
for emotion in emotions :
    df_tmp = df[df['emotions'] == emotion]
    df_tmp = df_tmp.groupby(['genre']).size().reset_index(name='counts')
    df_tmp = df_tmp.sort_values(by=['counts'], ascending=False)
    df_tmp = df_tmp.head(5)
    #Sort randomly rows
    df_tmp = df_tmp.sample(frac=1)
    genres_dict[emotion] = df_tmp.to_dict('records')

with open('data/top_genres.json', 'w') as fp:
    json.dump(genres_dict, fp)