# Import Libraries

In [49]:
import numpy as np
import pandas as pd
from time import time
import pickle

# Packages to perform dimensionality reduction
import sklearn.datasets
import sklearn.decomposition
import sklearn.manifold

# Packages for plotting
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d
from matplotlib import offsetbox
import seaborn as sns
rc={'lines.linewidth': 2, 'axes.labelsize': 14, 'axes.titlesize': 14}
sns.set(rc=rc)

import bebi103

# Make Matplotlib plots appear inline
%matplotlib inline

import bokeh
from bokeh.plotting import output_notebook,show
bokeh.io.output_notebook()

# Pickle in Data

In [4]:
# Pickle in factorized data

path = r"C:\Users\Andrew\Documents\Metis\TikTok_Hit_Predictor\Pickle\supervised_factorized.pkl"

df_factorized = pickle.load(open(path,'rb'))
df_factorized.head(2)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,year,spotify_artists,success
0,0.88,0.501,2.0,-6.774,1.0,0.062,0.0494,0.0695,0.436,0.459,120.038,2020.0,0,1.0
1,0.935,0.454,1.0,-7.509,1.0,0.375,0.0194,0.0,0.0824,0.357,133.073,2018.0,1,1.0


### Pickle in columns with more detail

In [7]:
path = r"C:\Users\Andrew\Documents\Metis\TikTok_Song_Predictor\Pickle\df_agg.pkl"

df_agg = pickle.load(open(path,'rb'))
df_agg.head(2)

Unnamed: 0,level_0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,...,TikTok Link,Release Date,Position Change,spotify_uri,audio_analysis,feature_analysis,success,year,top_albums,top_artists
0,0,0.88,0.501,2.0,-6.774,1.0,0.062,0.0494,0.0695,0.436,...,https://www.tiktok.com/music/All-TikTok-Mashup...,2020-08-17,23.0,5TpvLkESnw1g9wDz52efeO,"{'meta': {'analyzer_version': '4.0.0', 'platfo...","{'danceability': 0.88, 'energy': 0.501, 'key':...",1,2020.0,Other,Other
1,162,0.935,0.454,1.0,-7.509,1.0,0.375,0.0194,0.0,0.0824,...,https://www.tiktok.com/music/WAP-Megan-Thee-St...,2018-03-22,15.0,4Oun2ylbjFKMPTiaSbbCih,"{'meta': {'analyzer_version': '4.0.0', 'platfo...","{'danceability': 0.935, 'energy': 0.454, 'key'...",1,2018.0,Other,Cardi B


In [8]:
df_agg.shape

(7675, 45)

In [9]:
df_factorized.shape

(7675, 14)

Confirmed both dataframes are the same shape

In [1]:
#http://bebi103.caltech.edu.s3-website-us-east-1.amazonaws.com/2016/tutorials/aux8_tsne.html

# Add columns with further descriptive detail

In [10]:
df_agg.columns

Index(['level_0', 'danceability', 'energy', 'key', 'loudness', 'mode',
       'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'type', 'id', 'uri', 'track_href', 'analysis_url',
       'duration_ms', 'time_signature', 'ISRC', 'index', 'Playlist Add Date',
       'Record Label', 'UPC', 'Album Name', 'name', 'Peak Position',
       'Peak Date', 'Rank', 'Historical Positions', 'Time on Chart',
       'Artist(s)', 'tiktok_track_id', 'Velocity', 'Historical Posts',
       'TikTok Link', 'Release Date', 'Position Change', 'spotify_uri',
       'audio_analysis', 'feature_analysis', 'success', 'year', 'top_albums',
       'top_artists'],
      dtype='object')

In [11]:
df_agg['Artist(s)']

0                JVKE 🌩
1               Cardi B
2             Tik Toker
3              Ir Sais 
4             Sada Baby
             ...       
7670             2KBABY
7671        A.R. Rahman
7672        Andra Gogan
7673    Black Eyed Peas
7674               すとぷり
Name: Artist(s), Length: 7675, dtype: object

In [41]:
# Add further descriptive information
aux_cols = ['Artist(s)', 'name','TikTok Link','success']
df_descriptive = df_agg[aux_cols]
df_factorized[aux_cols] = df_descriptive

# Perform TSNE

In [68]:
# Time the tSNE computation
t0 = time()

# Find the optimal low-dimensional representation with t-SNE
tsne = sklearn.manifold.TSNE(n_components=2,random_state=0)
embed_factorized = tsne.fit_transform(df_factorized.drop(aux_cols, axis=1))

# Store the time
t_tsne = time() - t0
# Print time
print('t-SNE took %.2fs.' % t_tsne)

t-SNE took 20.50s.


In [69]:
embed_factorized

array([[-37.17972   ,  -0.40873644],
       [-35.964893  ,  -0.21188673],
       [-34.55721   ,  -0.2965801 ],
       ...,
       [ 89.75295   , -15.177577  ],
       [ 40.443222  , -71.884186  ],
       [ 87.629684  , -14.315768  ]], dtype=float32)

In [70]:
# Convert to dataframe

# Convert to DataFrame
df_tsne = pd.DataFrame(embed_factorized, 
                           columns=['tsne1','tsne2'])
# Add the meta-data
df_tsne[aux_cols] = df_factorized[aux_cols]
df_tsne.head()

Unnamed: 0,tsne1,tsne2,Artist(s),name,TikTok Link,success
0,-37.179722,-0.408736,JVKE 🌩,All TikTok Mashup (JVKE - Upside Down),https://www.tiktok.com/music/All-TikTok-Mashup...,1
1,-35.964893,-0.211887,Cardi B,WAP（feat. Megan Thee Stallion）,https://www.tiktok.com/music/WAP-Megan-Thee-St...,1
2,-34.557209,-0.29658,Tik Toker,Young Thug - Relationship (feat. Future),https://www.tiktok.com/music/Young-Thug-Relati...,1
3,-37.839973,-0.711288,Ir Sais,Dream Girl,https://www.tiktok.com/music/Dream-Girl-680796...,1
4,-36.435543,-0.327984,Sada Baby,Whole Lotta Choppas,https://www.tiktok.com/music/Whole-Lotta-Chopp...,1


In [71]:
# Add other features 

features = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo','year']

df_tsne[features] = df_factorized[features]

In [72]:
df_tsne.head()

Unnamed: 0,tsne1,tsne2,Artist(s),name,TikTok Link,success,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,year
0,-37.179722,-0.408736,JVKE 🌩,All TikTok Mashup (JVKE - Upside Down),https://www.tiktok.com/music/All-TikTok-Mashup...,1,0.88,0.501,2.0,-6.774,1.0,0.062,0.0494,0.0695,0.436,0.459,120.038,2020.0
1,-35.964893,-0.211887,Cardi B,WAP（feat. Megan Thee Stallion）,https://www.tiktok.com/music/WAP-Megan-Thee-St...,1,0.935,0.454,1.0,-7.509,1.0,0.375,0.0194,0.0,0.0824,0.357,133.073,2018.0
2,-34.557209,-0.29658,Tik Toker,Young Thug - Relationship (feat. Future),https://www.tiktok.com/music/Young-Thug-Relati...,1,0.842,0.597,2.0,-6.336,1.0,0.0627,0.00252,0.0,0.124,0.298,145.992,2018.0
3,-37.839973,-0.711288,Ir Sais,Dream Girl,https://www.tiktok.com/music/Dream-Girl-680796...,1,0.884,0.546,2.0,-6.279,0.0,0.117,0.269,8e-06,0.064,0.49,113.236,2020.0
4,-36.435543,-0.327984,Sada Baby,Whole Lotta Choppas,https://www.tiktok.com/music/Whole-Lotta-Chopp...,1,0.926,0.762,1.0,-1.887,1.0,0.205,0.000718,0.0,0.117,0.277,127.931,2020.0


In [73]:
def cat_convert(x):
    if x == 1:
        return "hit"
    else:
        return "not a hit"

In [74]:
df_tsne['success'] = df_tsne['success'].apply(lambda x: cat_convert(x))

In [83]:
df_tsne

Unnamed: 0,tsne1,tsne2,Artist(s),name,TikTok Link,success,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,year
0,-37.179722,-0.408736,JVKE 🌩,All TikTok Mashup (JVKE - Upside Down),https://www.tiktok.com/music/All-TikTok-Mashup...,hit,0.880,0.5010,2.0,-6.774,1.0,0.0620,0.049400,0.069500,0.4360,0.4590,120.038,2020.0
1,-35.964893,-0.211887,Cardi B,WAP（feat. Megan Thee Stallion）,https://www.tiktok.com/music/WAP-Megan-Thee-St...,hit,0.935,0.4540,1.0,-7.509,1.0,0.3750,0.019400,0.000000,0.0824,0.3570,133.073,2018.0
2,-34.557209,-0.296580,Tik Toker,Young Thug - Relationship (feat. Future),https://www.tiktok.com/music/Young-Thug-Relati...,hit,0.842,0.5970,2.0,-6.336,1.0,0.0627,0.002520,0.000000,0.1240,0.2980,145.992,2018.0
3,-37.839973,-0.711288,Ir Sais,Dream Girl,https://www.tiktok.com/music/Dream-Girl-680796...,hit,0.884,0.5460,2.0,-6.279,0.0,0.1170,0.269000,0.000008,0.0640,0.4900,113.236,2020.0
4,-36.435543,-0.327984,Sada Baby,Whole Lotta Choppas,https://www.tiktok.com/music/Whole-Lotta-Chopp...,hit,0.926,0.7620,1.0,-1.887,1.0,0.2050,0.000718,0.000000,0.1170,0.2770,127.931,2020.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7670,91.636543,-15.384322,2KBABY,Old Streets,https://www.tiktok.com/music/Old-Streets-68169...,not a hit,0.626,0.4150,2.0,-12.073,1.0,0.6030,0.559000,0.000000,0.0931,0.5940,99.609,0.0
7671,92.036888,-15.517015,A.R. Rahman,Vande Mataram,https://www.tiktok.com/music/Vande-Mataram-671...,not a hit,0.672,0.0304,2.0,-26.380,1.0,0.0381,0.288000,0.865000,0.0676,0.0682,90.037,0.0
7672,89.752953,-15.177577,Andra Gogan,In locul meu,https://www.tiktok.com/music/In-locul-meu-6921...,not a hit,0.879,0.6420,0.0,-6.775,1.0,0.0570,0.096800,0.000002,0.1300,0.7420,129.938,0.0
7673,40.443222,-71.884186,Black Eyed Peas,Pump It,https://www.tiktok.com/music/Pump-It-679139548...,not a hit,0.649,0.9310,1.0,-3.150,0.0,0.1810,0.009300,0.000000,0.7510,0.7440,153.645,0.0


# Create plot

In [82]:
# What pops up on hover?
tooltips = [('Artist(s)', '@Artist(s)'),
            ('name', '@name'),
            ('dancebility', '@danceability'),
            ('energy','@energy'),
            ('key','@loudness'),
            ('mode','@mode'),
            ('speechiness','@speechiness'),
            ('acousticness','@acousticness'),
            ('instrumentalness','@instrumentalness'),
            ('liveness','@liveness'),
            ('valence','@tempo'),
            ('year','@year')
           ]

# Make the hover tool
hover = bokeh.models.HoverTool(tooltips=tooltips)

# Create figure
p = bokeh.plotting.figure(plot_width=650, plot_height=450, 
                          x_axis_label='t-SNE axis 1',
                          y_axis_label='t-SNE axis 2')

# Add the hover tool
p.add_tools(hover)

# Define colors in a dictionary to access them with
# the key from the pandas groupby funciton.
keys = df_tsne.success.dropna().unique()
color_dict = {k: bebi103.image.rgb_frac_to_hex(sns.color_palette()[i]) 
                      for i, k in enumerate(sorted(keys))}

for key, group in df_tsne.groupby('success'):
    # Specify data source
    source = bokeh.models.ColumnDataSource(group)
    
    # Populate glyphs
    p.circle(x='tsne1', y='tsne2', size=7, alpha=0.5, source=source,
                color=color_dict[key], legend_label=key)

p.legend.background_fill_alpha = 0.25
p.legend.location = 'bottom_right'
# Blanched almonds are the best kind of almonds
p.legend.background_fill_color = 'blanchedalmond'
bokeh.io.show(p)