In [2]:
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots as p_sub
import plotly.express as px
import plotly.graph_objects as go
import json
from artists_88rising import artists

In [3]:
# Import JSON data into dataframe

# Solo tracks
solo_df = pd.read_json('./Datasets/solo_tracks_88rising.json')
solo_df.head()

Unnamed: 0,song name,album,artist,featured,release_date,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence,num_artists
0,Animal Farm,Animal Farm,BIBI,[BIBI],2022-09-27,203945,69,0.0178,0.617,0.57,0.55,107.971,0.375,1
1,KAZINO,KAZINO,BIBI,[BIBI],2020-04-29,185946,65,0.139,0.625,0.686,0.11,133.041,0.263,1
2,BAD SAD AND MAD,Life is a Biâ¦,BIBI,[BIBI],2021-04-28,154988,64,0.545,0.746,0.558,0.0904,90.013,0.638,1
3,"Very, Slowly",Twenty-Five Twenty-One OST Part 3,BIBI,[BIBI],2022-02-20,214851,60,0.678,0.528,0.423,0.167,135.917,0.249,1
4,MotoSpeed 24,Sweet Sorrow of Mother,BIBI,[BIBI],2022-10-24,133904,56,0.319,0.61,0.597,0.136,95.004,0.295,1


In [4]:
# Tracks with collaborations from 88 Rising artists
collabs_88_df = pd.read_json('./Datasets/collab_88rising.json')
collabs_88_df.head()

Unnamed: 0,song name,album,artist,featured,release_date,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence,num_artists
0,The Weekend,The Weekend,88rising,"[88rising, BIBI]",2021-10-14,167661,69,0.062,0.784,0.521,0.0995,101.491,0.817,2
1,Best Lover,Head In The Clouds Forever,88rising,"[88rising, BIBI]",2022-04-16,152910,66,0.268,0.74,0.663,0.121,82.004,0.519,2
2,Crazy Like You (feat. BIBI),"Bare&Rare, Pt. 1",CHUNG HA,"[CHUNG HA, BIBI]",2022-07-11,175480,50,0.0957,0.536,0.639,0.209,170.94,0.502,2
3,froyo (feat. Warren Hue),Head In The Clouds Forever,88rising,"[88rising, BIBI, Rich Brian]",2022-04-16,309121,45,0.213,0.508,0.688,0.157,145.089,0.335,3
4,These Nights,These Nights,88rising,"[88rising, Rich Brian]",2019-10-03,223006,41,0.000317,0.656,0.673,0.28,83.018,0.494,2


In [5]:
# Collaborations with artists outside of 88 Rising
collab_out_df = pd.read_json('./Datasets/collab_outside_artists.json')
collab_out_df.head()

Unnamed: 0,song name,album,artist,featured,release_date,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence,num_artists
0,LAW (Prod. Czaer),Street Man Fighter Original Vol.3 (Mission by ...,Various Artists,"[Yoon Mirae, BIBI]",2022-09-06,189373,71,0.00857,0.756,0.9,0.237,128.054,0.55,2
1,Never Gonna Come Down,Shang-Chi and The Legend of The Ten Rings: The...,88rising,"[Mark Tuan, BIBI]",2021-09-03,203800,51,0.656,0.777,0.689,0.062,150.036,0.873,2
2,AUTOMATIC,AUTOMATIC,Various Artists,"[Chancellor, Babylon, twlv]",2020-10-14,268293,51,0.133,0.72,0.703,0.121,99.999,0.492,3
3,Eleven (feat. BIBI),Blueline,twlv,"[twlv, BIBI]",2019-03-15,196520,46,0.145,0.502,0.576,0.17,89.897,0.205,2
4,Hanryang,Hanryang,Min Kyunghoon,"[Min Kyunghoon, KIM HEECHUL, BIBI]",2020-12-19,202866,49,0.0605,0.698,0.729,0.0918,140.083,0.306,3


## Collaboration Trends
Explore the collaborations data to see if there is any effect on popularity. This will include tracks with collaborations with artists both part of 88 Rising and outside of 88 Rising
- Does the number of artists in the collaboration effect song popularity?
- Which artist collaborations are the most popular?


In [6]:
# Combine 88 Rising solo, collabs and outside collabs data
all_df = pd.concat([collabs_88_df, collab_out_df, solo_df], axis=0)
all_df.head()

Unnamed: 0,song name,album,artist,featured,release_date,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence,num_artists
0,The Weekend,The Weekend,88rising,"[88rising, BIBI]",2021-10-14,167661,69,0.062,0.784,0.521,0.0995,101.491,0.817,2
1,Best Lover,Head In The Clouds Forever,88rising,"[88rising, BIBI]",2022-04-16,152910,66,0.268,0.74,0.663,0.121,82.004,0.519,2
2,Crazy Like You (feat. BIBI),"Bare&Rare, Pt. 1",CHUNG HA,"[CHUNG HA, BIBI]",2022-07-11,175480,50,0.0957,0.536,0.639,0.209,170.94,0.502,2
3,froyo (feat. Warren Hue),Head In The Clouds Forever,88rising,"[88rising, BIBI, Rich Brian]",2022-04-16,309121,45,0.213,0.508,0.688,0.157,145.089,0.335,3
4,These Nights,These Nights,88rising,"[88rising, Rich Brian]",2019-10-03,223006,41,0.000317,0.656,0.673,0.28,83.018,0.494,2


In [330]:
print(len(all_df), len(collab_out_df), len(collabs_88_df), len(solo_df))

451 164 36 251


all_df consists of all data on 88rising. Need to add a column that indicates the amount of artists in each track.

In [7]:
all_df['num_artists'] = all_df['featured'].apply(lambda x: len(x))
all_df.head()

Unnamed: 0,song name,album,artist,featured,release_date,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence,num_artists
0,The Weekend,The Weekend,88rising,"[88rising, BIBI]",2021-10-14,167661,69,0.062,0.784,0.521,0.0995,101.491,0.817,2
1,Best Lover,Head In The Clouds Forever,88rising,"[88rising, BIBI]",2022-04-16,152910,66,0.268,0.74,0.663,0.121,82.004,0.519,2
2,Crazy Like You (feat. BIBI),"Bare&Rare, Pt. 1",CHUNG HA,"[CHUNG HA, BIBI]",2022-07-11,175480,50,0.0957,0.536,0.639,0.209,170.94,0.502,2
3,froyo (feat. Warren Hue),Head In The Clouds Forever,88rising,"[88rising, BIBI, Rich Brian]",2022-04-16,309121,45,0.213,0.508,0.688,0.157,145.089,0.335,3
4,These Nights,These Nights,88rising,"[88rising, Rich Brian]",2019-10-03,223006,41,0.000317,0.656,0.673,0.28,83.018,0.494,2


In [332]:
all_df['num_artists'].unique()

array([2, 3, 1], dtype=int64)

Create a column that specifies if the main artist is an 88rising artist. This value will be boolean (true or false).

In [8]:
lowercase_artists = [x.lower() for x in artists]
lowercase_artists.append('chung ha')
lowercase_artists.append('88rising')
lowercase_artists.append('mili')
lowercase_artists

['atarashii gakko!',
 'bibi',
 'chungha',
 'dumbfoundead',
 'guapdad 4000',
 'higher brothers',
 'jackson wang',
 'joji',
 'keith ape',
 'lexie liu',
 'milli',
 'niki',
 'rich brian',
 'seori',
 'stephanie poetri',
 'warren hue',
 'ylona garcia',
 'chung ha',
 '88rising',
 'mili']

In [9]:
in_88 = []
for index, row in all_df.iterrows():
    if row['artist'].lower() in lowercase_artists or row['artist'].lower() == '88rising' or row['artist'].lower() =='chung ha': # Chung Ha is mispelled in tracks csv
        in_88.append(True)
    else:
        in_88.append(False)
        
in_88[0:10]

[True, True, True, True, True, True, True, True, True, True]

In [10]:
all_df['in_88rising'] = in_88
all_df.head()

Unnamed: 0,song name,album,artist,featured,release_date,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence,num_artists,in_88rising
0,The Weekend,The Weekend,88rising,"[88rising, BIBI]",2021-10-14,167661,69,0.062,0.784,0.521,0.0995,101.491,0.817,2,True
1,Best Lover,Head In The Clouds Forever,88rising,"[88rising, BIBI]",2022-04-16,152910,66,0.268,0.74,0.663,0.121,82.004,0.519,2,True
2,Crazy Like You (feat. BIBI),"Bare&Rare, Pt. 1",CHUNG HA,"[CHUNG HA, BIBI]",2022-07-11,175480,50,0.0957,0.536,0.639,0.209,170.94,0.502,2,True
3,froyo (feat. Warren Hue),Head In The Clouds Forever,88rising,"[88rising, BIBI, Rich Brian]",2022-04-16,309121,45,0.213,0.508,0.688,0.157,145.089,0.335,3,True
4,These Nights,These Nights,88rising,"[88rising, Rich Brian]",2019-10-03,223006,41,0.000317,0.656,0.673,0.28,83.018,0.494,2,True


In [343]:
all_df.loc[all_df['in_88rising']==False]

Unnamed: 0,song name,album,artist,featured,release_date,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence,num_artists,in_88rising
0,LAW (Prod. Czaer),Street Man Fighter Original Vol.3 (Mission by ...,Various Artists,"[Yoon Mirae, BIBI]",2022-09-06,189373,71,0.00857,0.756,0.900,0.2370,128.054,0.550,2,False
2,AUTOMATIC,AUTOMATIC,Various Artists,"[Chancellor, Babylon, twlv]",2020-10-14,268293,51,0.13300,0.720,0.703,0.1210,99.999,0.492,3,False
3,Eleven (feat. BIBI),Blueline,twlv,"[twlv, BIBI]",2019-03-15,196520,46,0.14500,0.502,0.576,0.1700,89.897,0.205,2,False
4,Hanryang,Hanryang,Min Kyunghoon,"[Min Kyunghoon, KIM HEECHUL, BIBI]",2020-12-19,202866,49,0.06050,0.698,0.729,0.0918,140.083,0.306,3,False
5,She Said (with BIBI),with HER,Crush,"[Crush, BIBI]",2020-10-20,208786,46,0.31300,0.685,0.655,0.1020,142.087,0.574,2,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143,COMING HOME (feat. NIKI),LETâS JUST SAY THE WORLD ENDED A WEEK FROM N...,HONNE,"[HONNE, NIKI]",2021-10-22,210019,58,0.17000,0.826,0.329,0.0927,106.935,0.558,2,False
148,Forever (feat. NIKI),Chameleon,End of the World,"[End of the World, NIKI]",2020-11-27,176186,34,0.25800,0.582,0.762,0.3640,81.009,0.372,2,False
152,edamame (feat. Rich Brian),edamame (feat. Rich Brian),bbno$,"[bbno$, Rich Brian]",2021-07-24,133706,75,0.02300,0.815,0.848,0.0265,106.032,0.684,2,False
159,18,18,Various Artists,"[Kris Wu, Rich Brian, Trippie Redd]",2018-01-16,254507,46,0.18300,0.632,0.629,0.3440,179.882,0.207,3,False


Duplicate tracks to represent for each featured artist

In [344]:
# Duplicate rows based on the amount of featured artists and fill in the featured_artist column
dup_all_df = all_df.copy()
dup_all_df.insert(4, 'featured_artist', "")
for index, row in all_df.iterrows():
    for a in row['featured']:
        add_index = max(dup_all_df.index)+1
        dup_all_df.loc[add_index] = row
        dup_all_df.loc[dup_all_df.index == add_index, 'featured_artist'] = a
dup_all_df.head()

Unnamed: 0,song name,album,artist,featured,featured_artist,release_date,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence,num_artists,in_88rising
0,The Weekend,The Weekend,88rising,"[88rising, BIBI]",,2021-10-14,167661,69,0.062,0.784,0.521,0.0995,101.491,0.817,2,True
1,Best Lover,Head In The Clouds Forever,88rising,"[88rising, BIBI]",,2022-04-16,152910,66,0.268,0.74,0.663,0.121,82.004,0.519,2,True
2,Crazy Like You (feat. BIBI),"Bare&Rare, Pt. 1",CHUNG HA,"[CHUNG HA, BIBI]",,2022-07-11,175480,50,0.0957,0.536,0.639,0.209,170.94,0.502,2,True
3,froyo (feat. Warren Hue),Head In The Clouds Forever,88rising,"[88rising, BIBI, Rich Brian]",,2022-04-16,309121,45,0.213,0.508,0.688,0.157,145.089,0.335,3,True
4,These Nights,These Nights,88rising,"[88rising, Rich Brian]",,2019-10-03,223006,41,0.000317,0.656,0.673,0.28,83.018,0.494,2,True


In [345]:
# Drop all rows where featured_artist is missing
no_na_all_df = dup_all_df.drop(dup_all_df.loc[dup_all_df['featured_artist']==""].index)


In [348]:
print(len(no_na_all_df), len(dup_all_df))

717 1168


Save this new dataset as a JSON

In [370]:
all_dict = no_na_all_df.to_dict("records")
# Save dictionary into a JSON
with open("./Datasets/all_collabs.json", "w") as outfile:
    json.dump(all_dict, outfile, indent=4)

### Number of Artists per song and track popularity
A bar plot of the number of artists featured in a song and the song popularity. (Note: 88rising will be considered as one artist)

In [32]:
# Group data by number of artists and taking the average of the popularity and audio features
num_art_grp = all_df.groupby("num_artists")
num_art_grp=num_art_grp.mean()
num_art_grp.head()

Unnamed: 0_level_0,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence
num_artists,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,192300.314741,44.816733,0.385745,0.644884,0.583283,0.157935,122.583339,0.452216
2,191548.149254,43.395522,0.290008,0.702612,0.609897,0.1703,120.191045,0.499397
3,211788.090909,44.060606,0.274708,0.726061,0.655682,0.152873,119.776152,0.446202


In [37]:
num_art_grp.index

Int64Index([1, 2, 3], dtype='int64', name='num_artists')

In [68]:
x_axis = num_art_grp.index
fig = px.bar(num_art_grp, x=x_axis, y='popularity',
             hover_data={'popularity':':.2f', 'length(ms)':':.2f', 'acousticness':':.2f', 'danceability':':.2f', 'energy':':.2f', 'liveness':':.2f', 'tempo':':.2f', 'valence':':.2f'},
             labels={'popularity':'Average Popularity', 'num_artists':"Number of Artists"}, title='Average Track Popularity and the Number of Artists Featured')
fig.update_xaxes(type='category')
fig.show()


The popularity rates are equal for different number of artists featured. Therefore, the number of featured artists does not effect popularity rates.

Is there any relationship between the number of artists and the audio features?
Audio Features:
- length(ms)
- acousticness
- danceability
- energy
- liveness (likelihood that the track was performed live)
- tempo (beats per minute)
- valence (the measure of positivity in tracks; how happy, cheerful, or euphoric the track is)

In [48]:
audio_feat = num_art_grp.columns
audio_feat

Index(['length(ms)', 'popularity', 'acousticness', 'danceability', 'energy',
       'liveness', 'tempo', 'valence'],
      dtype='object')

In [97]:
low_audio_feat = num_art_grp[['acousticness', 'danceability', 'energy',
       'liveness', 'valence']]

In [108]:
# Scatter plot of number of artists and the audio features
fig = px.line(low_audio_feat, x=x_axis, y=low_audio_feat.columns, markers=True,
    labels={'num_artists':'Number of Artists', 'value': 'Avg Feature Value', 'variable': 'Audio Feature'},
    title = 'Number of Featured Artists and the Average Audio Feature Values')
fig.update_xaxes(type='category')

fig.show()

In [127]:
# Interactive Chart of Audio Features

fig = go.Figure()

for column in low_audio_feat.columns.to_list():
    fig.add_trace(
        go.Scatter(
            x = low_audio_feat.index,
            y = low_audio_feat[column],
            name = column
        )
    )
    
fig.update_layout(
    updatemenus=[go.layout.Updatemenu(
        active=0,
        buttons=list(
            [dict(label = 'All',
                  method = 'update',
                  args = [{'visible': [True, True, True, True, True]},
                          {'title': 'All',
                           'showlegend':True}]),
             dict(label = 'Acousticness',
                  method = 'update',
                  args = [{'visible': [True, False, False, False, False]}, # the index of True aligns with the indices of plot traces
                          {'title': 'Acousticness',
                           'showlegend':True}]),
             dict(label = 'Danceability',
                  method = 'update',
                  args = [{'visible': [False, True, False, False, False]},
                          {'title': 'Danceability',
                           'showlegend':True}]),
             dict(label = 'Energy',
                  method = 'update',
                  args = [{'visible': [False, False, True, False, False]},
                          {'title': 'Energy',
                           'showlegend':True}]),
             dict(label = 'Liveness',
                  method = 'update',
                  args = [{'visible': [False, False, False, True, False]},
                          {'title': 'Liveness',
                           'showlegend':True}]),
            dict(label = 'Valence',
                  method = 'update',
                  args = [{'visible': [False, False, False, False, True]}, # the index of True aligns with the indices of plot traces
                          {'title': 'Valence',
                           'showlegend':True}]),
            ])
        )
    ])

fig.show()

In [132]:
# Scatter plot of number of artists and the Length feature
fig = px.line(num_art_grp, x=x_axis, y='length(ms)', markers=True,
    title = 'Number of Featured Artists and the Average Track Length')
fig.update_xaxes(type='category')

fig.show()

In [134]:
# Scatter plot of number of artists and the Tempo feature
fig = px.line(num_art_grp, x=x_axis, y='tempo', markers=True,
    title = 'Number of Featured Artists and the Average Tempo')
fig.update_xaxes(type='category')

fig.show()

Findings:
Tracks are longer when there are more artists.
Acoustic levels go down as the number of featured artists increased.
Danceability increases as the number of artists increased.
Energy level increases as the number of artists increased.
Highest average liveness is with two artists.
The tempo in beats per minute decreases as the number of artists increase.
Most happy songs (highest average valence) when there are two artists.

### What are popular artists to collab with?
Which 88rising artists have the most number of collaborations? We will be looking into artists with the most tracks that has features.

In [12]:
# Combine 88rising collabs and outside collabs data
all_collabs_df = pd.concat([collabs_88_df, collab_out_df], axis=0)
all_collabs_df = all_collabs_df.reset_index(drop = True)
all_collabs_df.head()

Unnamed: 0,song name,album,artist,featured,release_date,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence,num_artists
0,The Weekend,The Weekend,88rising,"[88rising, BIBI]",2021-10-14,167661,69,0.062,0.784,0.521,0.0995,101.491,0.817,2
1,Best Lover,Head In The Clouds Forever,88rising,"[88rising, BIBI]",2022-04-16,152910,66,0.268,0.74,0.663,0.121,82.004,0.519,2
2,Crazy Like You (feat. BIBI),"Bare&Rare, Pt. 1",CHUNG HA,"[CHUNG HA, BIBI]",2022-07-11,175480,50,0.0957,0.536,0.639,0.209,170.94,0.502,2
3,froyo (feat. Warren Hue),Head In The Clouds Forever,88rising,"[88rising, BIBI, Rich Brian]",2022-04-16,309121,45,0.213,0.508,0.688,0.157,145.089,0.335,3
4,These Nights,These Nights,88rising,"[88rising, Rich Brian]",2019-10-03,223006,41,0.000317,0.656,0.673,0.28,83.018,0.494,2


In [5]:
all_collabs_grp = all_collabs_df.groupby('artist')
all_collabs_grp = all_collabs_grp.mean()
all_collabs_grp.head()

Unnamed: 0_level_0,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence
artist,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
88rising,203475.860465,50.372093,0.275442,0.690698,0.616326,0.156477,117.029953,0.420535
Afgan,178798.0,46.0,0.256,0.723,0.458,0.0879,113.954,0.357
Al Rocco,179885.0,37.0,0.494,0.882,0.613,0.294,134.013,0.72
BIBI,190177.666667,41.333333,0.179,0.647,0.677,0.139667,118.993333,0.420667
Babylon,214782.0,22.0,0.129,0.769,0.895,0.196,104.948,0.631


In [141]:
all_collabs_df['artist'].unique()

array(['88rising', 'CHUNG HA', 'Rich Brian', 'Higher Brothers',
       'Jackson Wang', 'NIKI', 'Various Artists', 'twlv', 'Min Kyunghoon',
       'Crush', 'Way Ched', 'Lolo ZouaÃ¯', 'ZICO', 'BIBI', 'J.Y. Park',
       'Drunken Tiger', 'Yoon Mirae', 'nafla', 'Christopher', 'Paul Kim',
       'Grizzly', 'YESUNG', 'CHANGMO', 'Babylon', 'Dreamville',
       'Thundercat', 'Guapdad 4000', 'Jace', 'Kembe X', 'Travis Thompson',
       'Quadeca', 'Moslikely', 'Eric Bellinger', 'Lou Phelps',
       'Marc E. Bassy', "Twelve'len", 'JP Saxe', 'Too $hort',
       "Deante' Hitchcock", 'Buddy', 'Lou From Paradise', 'Jay Worthy',
       'Shawn Wasabi', 'lil ricefield', 'The Cool Kids', 'KnowKnow',
       'Masiwei', 'Psy.P', 'Melo', 'Bohan Phoenix', 'Rain', 'Afgan',
       'JAY B', 'CrazyBoy', 'Sammi Cheng', 'VaVa', 'Al Rocco', 'Joji',
       'rei brown', 'Bonobo', 'shamana', 'SahBabii', 'Ryan Hemsworth',
       'Mili', 'Binary Haze Interactive', 'Promise of wizard', 'HONNE',
       'End of the World', 

Some main artists are outside of 88rising and some tracks have more than one 88rising artist. Need to create a duplicate row for each 8rising artist featured and make the featured column contain only one artist name.

In [107]:
lowercase_artists = [x.lower() for x in artists]
lowercase_artists.append('chung ha')
lowercase_artists.append('88rising')
lowercase_artists

['atarashii gakko!',
 'bibi',
 'chungha',
 'dumbfoundead',
 'guapdad 4000',
 'higher brothers',
 'jackson wang',
 'joji',
 'keith ape',
 'lexie liu',
 'milli',
 'niki',
 'rich brian',
 'seori',
 'stephanie poetri',
 'warren hue',
 'ylona garcia',
 'chung ha',
 '88rising']

In [108]:
len(all_collabs_df)

200

In [109]:
# Duplicate rows based on the amount of featured artists and fill in the featured_artist column
dup_all_collabs = all_collabs_df.copy()
dup_all_collabs.insert(4, 'featured_artist', "")
for index, row in all_collabs_df.iterrows():
    for a in row['featured']:
        add_index = max(dup_all_collabs.index)+1
        dup_all_collabs.loc[add_index] = row
        dup_all_collabs.loc[dup_all_collabs.index == add_index, 'featured_artist'] = a
dup_all_collabs.head()

Unnamed: 0,song name,album,artist,featured,featured_artist,release_date,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence
0,The Weekend,The Weekend,88rising,"[88rising, BIBI]",,2021-10-14,167661,69,0.062,0.784,0.521,0.0995,101.491,0.817
1,Best Lover,Head In The Clouds Forever,88rising,"[88rising, BIBI]",,2022-04-16,152910,66,0.268,0.74,0.663,0.121,82.004,0.519
2,Crazy Like You (feat. BIBI),"Bare&Rare, Pt. 1",CHUNG HA,"[CHUNG HA, BIBI]",,2022-07-11,175480,50,0.0957,0.536,0.639,0.209,170.94,0.502
3,froyo (feat. Warren Hue),Head In The Clouds Forever,88rising,"[88rising, BIBI, Rich Brian]",,2022-04-16,309121,45,0.213,0.508,0.688,0.157,145.089,0.335
4,These Nights,These Nights,88rising,"[88rising, Rich Brian]",,2019-10-03,223006,41,0.000317,0.656,0.673,0.28,83.018,0.494


In [110]:
# Drop all rows where featured_artist is missing
no_na_all_collabs = dup_all_collabs.drop(dup_all_collabs.loc[dup_all_collabs['featured_artist']==""].index)

In [111]:
no_na_all_collabs.head()

Unnamed: 0,song name,album,artist,featured,featured_artist,release_date,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence
200,The Weekend,The Weekend,88rising,"[88rising, BIBI]",88rising,2021-10-14,167661,69,0.062,0.784,0.521,0.0995,101.491,0.817
201,The Weekend,The Weekend,88rising,"[88rising, BIBI]",BIBI,2021-10-14,167661,69,0.062,0.784,0.521,0.0995,101.491,0.817
202,Best Lover,Head In The Clouds Forever,88rising,"[88rising, BIBI]",88rising,2022-04-16,152910,66,0.268,0.74,0.663,0.121,82.004,0.519
203,Best Lover,Head In The Clouds Forever,88rising,"[88rising, BIBI]",BIBI,2022-04-16,152910,66,0.268,0.74,0.663,0.121,82.004,0.519
204,Crazy Like You (feat. BIBI),"Bare&Rare, Pt. 1",CHUNG HA,"[CHUNG HA, BIBI]",CHUNG HA,2022-07-11,175480,50,0.0957,0.536,0.639,0.209,170.94,0.502


In [112]:
len(no_na_all_collabs)

466

In [113]:
# Filter data to only include featured_artist within 88rising
no_na_all_collabs['featured_artist'] = no_na_all_collabs['featured_artist'].apply(lambda x: x.lower())


In [114]:
featured_88_only = no_na_all_collabs.loc[no_na_all_collabs['featured_artist'].isin(lowercase_artists)]
featured_88_only.head()

Unnamed: 0,song name,album,artist,featured,featured_artist,release_date,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence
200,The Weekend,The Weekend,88rising,"[88rising, BIBI]",88rising,2021-10-14,167661,69,0.062,0.784,0.521,0.0995,101.491,0.817
201,The Weekend,The Weekend,88rising,"[88rising, BIBI]",bibi,2021-10-14,167661,69,0.062,0.784,0.521,0.0995,101.491,0.817
202,Best Lover,Head In The Clouds Forever,88rising,"[88rising, BIBI]",88rising,2022-04-16,152910,66,0.268,0.74,0.663,0.121,82.004,0.519
203,Best Lover,Head In The Clouds Forever,88rising,"[88rising, BIBI]",bibi,2022-04-16,152910,66,0.268,0.74,0.663,0.121,82.004,0.519
204,Crazy Like You (feat. BIBI),"Bare&Rare, Pt. 1",CHUNG HA,"[CHUNG HA, BIBI]",chung ha,2022-07-11,175480,50,0.0957,0.536,0.639,0.209,170.94,0.502


In [124]:
feat_88_freq = featured_88_only['featured_artist'].value_counts().to_frame()
feat_88_freq.reset_index(inplace=True)
feat_88_freq = feat_88_freq.rename(columns={'index':'artist','featured_artist':'num_tracks'})
feat_88_freq

Unnamed: 0,artist,num_tracks
0,88rising,39
1,higher brothers,38
2,guapdad 4000,34
3,rich brian,27
4,niki,22
5,bibi,20
6,chung ha,19
7,joji,18
8,jackson wang,17
9,stephanie poetri,4


In [139]:
# Gather averages data and append to feat_88_freq
feat_88_grp = featured_88_only.groupby('featured_artist')
feat_88_grp=feat_88_grp.mean()
feat_88_grp.reset_index(inplace=True)
feat_88_grp

Unnamed: 0,featured_artist,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence
0,88rising,203253.487179,50.717949,0.345534,0.679846,0.566179,0.164531,112.996077,0.413515
1,bibi,192938.5,46.3,0.177166,0.68565,0.6836,0.156275,122.0455,0.49515
2,chung ha,191575.947368,43.842105,0.258632,0.695947,0.756684,0.191553,112.999947,0.615574
3,guapdad 4000,186777.441176,36.235294,0.208565,0.762559,0.651676,0.173947,116.458588,0.606353
4,higher brothers,203793.394737,34.368421,0.217011,0.785947,0.692842,0.169882,127.504368,0.472989
5,jackson wang,189336.941176,48.0,0.248247,0.705706,0.614412,0.165965,126.517353,0.452882
6,joji,183839.055556,58.277778,0.348849,0.670167,0.540778,0.17245,117.252444,0.366856
7,keith ape,215048.5,50.5,0.074428,0.7175,0.667,0.0723,150.9095,0.3755
8,milli,193251.0,54.0,0.0465,0.652,0.47,0.229,100.052,0.32
9,niki,190699.136364,51.863636,0.412759,0.672091,0.508045,0.142836,111.744045,0.397614


In [140]:
feat_88_df = pd.concat([feat_88_freq,feat_88_grp], axis='columns')
feat_88_df.drop(feat_88_df.columns[2], axis=1, inplace=True)
feat_88_df

Unnamed: 0,artist,num_tracks,length(ms),popularity,acousticness,danceability,energy,liveness,tempo,valence
0,88rising,39,203253.487179,50.717949,0.345534,0.679846,0.566179,0.164531,112.996077,0.413515
1,higher brothers,38,192938.5,46.3,0.177166,0.68565,0.6836,0.156275,122.0455,0.49515
2,guapdad 4000,34,191575.947368,43.842105,0.258632,0.695947,0.756684,0.191553,112.999947,0.615574
3,rich brian,27,186777.441176,36.235294,0.208565,0.762559,0.651676,0.173947,116.458588,0.606353
4,niki,22,203793.394737,34.368421,0.217011,0.785947,0.692842,0.169882,127.504368,0.472989
5,bibi,20,189336.941176,48.0,0.248247,0.705706,0.614412,0.165965,126.517353,0.452882
6,chung ha,19,183839.055556,58.277778,0.348849,0.670167,0.540778,0.17245,117.252444,0.366856
7,joji,18,215048.5,50.5,0.074428,0.7175,0.667,0.0723,150.9095,0.3755
8,jackson wang,17,193251.0,54.0,0.0465,0.652,0.47,0.229,100.052,0.32
9,stephanie poetri,4,190699.136364,51.863636,0.412759,0.672091,0.508045,0.142836,111.744045,0.397614


### Popular 88rising Artists for Collaborations

In [141]:
x_axis = feat_88_df['artist']
fig = px.bar(feat_88_df, x=x_axis, y='num_tracks',
             hover_data={'num_tracks':'Total Tracks','popularity':':.2f', 'length(ms)':':.2f', 'acousticness':':.2f', 'danceability':':.2f', 'energy':':.2f', 'liveness':':.2f', 'tempo':':.2f', 'valence':':.2f'},
             labels={'popularity':'Average Popularity'}, title='88rising Artist Total Collaborations')
fig.update_xaxes(type='category')
fig.show()

In [145]:
trace1 = go.Bar(
    x=x_axis,
    y=feat_88_df['num_tracks'],
    name="Total Collaborated Tracks",
    marker=dict(
        color='rgb(34,163,192)'
               )
)
trace2 = go.Scatter(
    x=x_axis,
    y=feat_88_df['popularity'],
    name='Average Popularity Rating',
    yaxis='y2'

)

fig = p_sub(specs=[[{"secondary_y": True}]])
fig.add_trace(trace1)
fig.add_trace(trace2,secondary_y=True)
fig['layout'].update(height = 600, width = 800, title = '88rising Artist Collaborations and Popularity',xaxis=dict(
      tickangle=-90
    ))
fig.show()

It is interesting to see that some artists that I thought would be popular had less popularity such as Niki and Rich Brian.
Let's see if there is a relationship between artist total collaborations and their popularity rating.

In [148]:
fig = px.scatter(feat_88_df, x="num_tracks", y="popularity", trendline="ols")
fig.show()
px.get_trendline_results(fig).px_fit_results.iloc[0].summary()



kurtosistest only valid for n>=20 ... continuing anyway, n=13



0,1,2,3
Dep. Variable:,y,R-squared:,0.118
Model:,OLS,Adj. R-squared:,0.037
Method:,Least Squares,F-statistic:,1.467
Date:,"Sun, 06 Nov 2022",Prob (F-statistic):,0.251
Time:,14:00:49,Log-Likelihood:,-41.82
No. Observations:,13,AIC:,87.64
Df Residuals:,11,BIC:,88.77
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,51.3209,3.216,15.956,0.000,44.242,58.400
x1,-0.1711,0.141,-1.211,0.251,-0.482,0.140

0,1,2,3
Omnibus:,2.64,Durbin-Watson:,1.05
Prob(Omnibus):,0.267,Jarque-Bera (JB):,1.064
Skew:,-0.692,Prob(JB):,0.587
Kurtosis:,3.214,Cond. No.,40.3


There is no relationship between the number of collaborations and the artist's popularity rating as the R-squared is 11.8%. This indicates that 11.8% of the variance in the data can be explained by this linear regression model.

#### Audio Feature Analysis per Artist
What kinds of songs does each artist perform in their collaboration?

In [163]:
audio_88_df = featured_88_only.iloc[:, 4:]
col = ['featured_artist', 'release_date', 'length(ms)',
       'acousticness', 'danceability', 'energy', 'liveness', 'tempo',
       'valence', 'popularity']
audio_88_df=audio_88_df[col]
print(len(audio_88_df))
audio_88_df.head()

244


Unnamed: 0,featured_artist,release_date,length(ms),acousticness,danceability,energy,liveness,tempo,valence,popularity
200,88rising,2021-10-14,167661,0.062,0.784,0.521,0.0995,101.491,0.817,69
201,bibi,2021-10-14,167661,0.062,0.784,0.521,0.0995,101.491,0.817,69
202,88rising,2022-04-16,152910,0.268,0.74,0.663,0.121,82.004,0.519,66
203,bibi,2022-04-16,152910,0.268,0.74,0.663,0.121,82.004,0.519,66
204,chung ha,2022-07-11,175480,0.0957,0.536,0.639,0.209,170.94,0.502,50


In [181]:
audio_88_df['featured_artist'].unique()

array(['88rising', 'bibi', 'chung ha', 'rich brian', 'guapdad 4000',
       'warren hue', 'jackson wang', 'higher brothers', 'niki', 'joji',
       'keith ape', 'stephanie poetri', 'milli'], dtype=object)

In [188]:
len(audio_88_df['featured_artist'].unique())

13

In [234]:
unique_artists = list(audio_88_df['featured_artist'].unique())
col_list = audio_88_df.columns[2:9].to_list()

In [235]:
col_list

['length(ms)',
 'acousticness',
 'danceability',
 'energy',
 'liveness',
 'tempo',
 'valence']

In [236]:
print(len(unique_artists), len(col_list))


13 7


In [266]:
# Function to calculate row and column value
import math
def rowValue(column, col_list):
    if col_list.index(column) < math.ceil(len(col_list)/2):
        return 1
    else: return 2

def colValue(column, col_list):
    if col_list.index(column) < math.ceil(len(col_list)/2):
        return col_list.index(column)+1
    else: 
        return math.ceil(col_list.index(column)/2)

In [267]:
print(rowValue('valence', col_list))
print(colValue('valence', col_list))

2
3


In [260]:
jackson_test_df = audio_88_df.loc[audio_88_df['featured_artist'] == 'jackson wang', ['featured_artist', 'popularity',]+col_list]

In [284]:
for a in unique_artists[0:2]:
    print(a)

88rising
bibi


In [289]:
fig = go.Figure()

for a in unique_artists[0:2]:
        fig = p_sub(rows=2, cols=4, start_cell="bottom-left")
        df = audio_88_df.loc[audio_88_df['featured_artist'] == a]
        fig.add_trace(
                                go.Scatter(
                                x = df['valence'],
                                y = df['popularity'],
                                name = a,
                                mode='markers'
                                ), row=1, col=1
                        )
        fig.add_trace(
                                go.Scatter(
                                x = df['liveness'],
                                y = df['popularity'],
                                name = a,
                                mode='markers'
                                ), row=1, col=2
                        )
        fig.add_trace(
                                go.Scatter(
                                x = df['length(ms)'],
                                y = df['popularity'],
                                name = a,
                                mode='markers'
                                ), row=1, col=3
                        )
        fig.add_trace(
                                go.Scatter(
                                x = df['acousticness'],
                                y = df['popularity'],
                                name = a,
                                mode='markers'
                                ), row=1, col=4
                        )
        fig.add_trace(
                                go.Scatter(
                                x = df['tempo'],
                                y = df['popularity'],
                                name = a,
                                mode='markers'
                                ), row=2, col=1
                        )
        fig.add_trace(
                                go.Scatter(
                                x = df['danceability'],
                                y = df['popularity'],
                                name = a,
                                mode='markers'
                                ), row=2, col=2
                        )
        fig.add_trace(
                                go.Scatter(
                                x = df['energy'],
                                y = df['popularity'],
                                name = a,
                                mode='markers'
                                ), row=2, col=3
                        )
fig.update_layout(
    updatemenus=[go.layout.Updatemenu(
        active=0,
        buttons=list(
            [dict(label = '88rising',
                  method = 'relayout',
                  args = [{'visibility': [True, False, False]}, {'title': '88rising',
                           'showlegend':False}]),
             dict(label = 'bibi',
                  method = 'relayout',
                  args = [{'visibility': [False, True, False]}, {'title': 'bibi',
                           'showlegend':False}]),
            #  dict(label = 'chung ha',
            #       method = 'relayout',
            #       args = [{'visibility': [False, False, True]}, {'title': 'chung ha',
            #                'showlegend':False}]),
#              dict(label = 'rich brian',
#                   method = 'update',
#                   args = [{'title': 'rich brian',
#                            'showlegend':False}]),
#              dict(label = 'guapdad 4000',
#                   method = 'update',
#                   args = [{'title': 'guapdad 4000',
#                            'showlegend':False}]),
#              dict(label = 'warren hue',
#                   method = 'update',
#                   args = [{'title': 'warren hue',
#                            'showlegend':False}]),
#              dict(label = 'jackson wang',
#                   method = 'update',
#                   args = [{'title': 'jackson wang',
#                            'showlegend':False}]),
#              dict(label = 'higher brothers',
#                   method = 'update',
#                   args = [{'title': 'higher brothers',
#                            'showlegend':False}]),
#              dict(label = 'niki',
#                   method = 'update',
#                   args = [{'title': 'niki',
#                            'showlegend':False}]),
#              dict(label = 'joji',
#                   method = 'update',
#                   args = [{'title': 'joji',
#                            'showlegend':False}]),
#              dict(label = 'keith ape',
#                   method = 'update',
#                   args = [{'title': 'keith ape',
#                            'showlegend':False}]),
#              dict(label = 'stephanie poetri',
#                   method = 'update',
#                   args = [{'title': 'stephanie poetri',
#                            'showlegend':False}]),
#              dict(label = 'milli',
#                   method = 'update',
#                   args = [{'title': 'milli',
#                            'showlegend':False}]),
            ])
        )
    ])

fig.show()

### Top 10 Songs with 88rising Collaborations


In [15]:
# Top Collaborated Tracks
data_sorted = all_collabs_df[['song name', 'album', 'artist', 'featured', 'popularity']].sort_values(by='popularity', ascending=False)
data_sorted = data_sorted.reset_index(drop=True)
data_sorted.head(10)

Unnamed: 0,song name,album,artist,featured,popularity
0,edamame (feat. Rich Brian),edamame (feat. Rich Brian),bbno$,"[bbno$, Rich Brian]",75
1,Daylight,Daylight,Joji,"[Joji, Diplo]",71
2,LAW (Prod. Czaer),Street Man Fighter Original Vol.3 (Mission by ...,Various Artists,"[Yoon Mirae, BIBI]",71
3,La La Lost You - Acoustic Version,NIKI Acoustic Sessions: Head In The Clouds II,NIKI,"[NIKI, 88rising]",70
4,CAN'T GET OVER YOU (feat. Clams Casino),BALLADS 1,Joji,"[Joji, Clams Casino]",70
5,Midsummer Madness,Head In The Clouds,88rising,"[88rising, Joji, Rich Brian]",69
6,The Weekend,The Weekend,88rising,"[88rising, BIBI]",69
7,Afterthought,Nectar,Joji,"[Joji, BENEE]",68
8,Run It (feat. Rick Ross & Rich Brian),Shang-Chi and The Legend of The Ten Rings: The...,88rising,"[DJ Snake, Rick Ross, Rich Brian]",68
9,"Costa Rica (with Bas & JID feat. Guapdad 4000,...",Revenge Of The Dreamers III,Dreamville,"[Dreamville, Bas, JID]",67


In [292]:
len(all_collabs_df['artist'].unique())

71

In [16]:
top_ten = data_sorted[0:10]
top_ten

Unnamed: 0,song name,album,artist,featured,popularity
0,edamame (feat. Rich Brian),edamame (feat. Rich Brian),bbno$,"[bbno$, Rich Brian]",75
1,Daylight,Daylight,Joji,"[Joji, Diplo]",71
2,LAW (Prod. Czaer),Street Man Fighter Original Vol.3 (Mission by ...,Various Artists,"[Yoon Mirae, BIBI]",71
3,La La Lost You - Acoustic Version,NIKI Acoustic Sessions: Head In The Clouds II,NIKI,"[NIKI, 88rising]",70
4,CAN'T GET OVER YOU (feat. Clams Casino),BALLADS 1,Joji,"[Joji, Clams Casino]",70
5,Midsummer Madness,Head In The Clouds,88rising,"[88rising, Joji, Rich Brian]",69
6,The Weekend,The Weekend,88rising,"[88rising, BIBI]",69
7,Afterthought,Nectar,Joji,"[Joji, BENEE]",68
8,Run It (feat. Rick Ross & Rich Brian),Shang-Chi and The Legend of The Ten Rings: The...,88rising,"[DJ Snake, Rick Ross, Rich Brian]",68
9,"Costa Rica (with Bas & JID feat. Guapdad 4000,...",Revenge Of The Dreamers III,Dreamville,"[Dreamville, Bas, JID]",67


In [17]:
# Convert table to HTML
html = top_ten.to_html()
  
# write html to file
text_file = open("top_ten_table.html", "w")
text_file.write(html)
text_file.close()