## Import Libraries



In [41]:
#import libraries
from cm_api import get_api_token, get_track_metadata, requests, get_chart_data, get_tiktok_chart_data, get_artist_id
import pandas as pd
import re
from cm_config import token
import pickle

## Refresh Token for ChartMetric API

In [2]:
#save refresh token to variable

REFRESH_TOKEN = token['refresh_token']

## Get api_token

In [47]:
#get api_token and save it to a variable
api_token = get_api_token(REFRESH_TOKEN)


## Get Shazam Chart Data

In [None]:
#shazam chart data
shazam_charts = get_chart_data(api_token, '28795304', 'shazam', '2019-08-04')

In [None]:
shazam_charts

In [None]:
#parse shazam data

roses_bucket = []
for item in shazam_charts:
    item_tuple = (item['id'], item['rank'], item['added_at'], item['code2'], item['city'], item['pre_rank'],
                 item['peak_rank'], item['peak_date'], item['release_dates'][0])
    roses_bucket.append(item_tuple)
    

    

In [None]:
#create pandas dataframe for shazam chart postions

df = pd.DataFrame(roses_bucket, columns=['shazam_id', 'rank', 'added_at', 'code2', 'city', 'pre_rank', 
                                         'peak_rank', 'peak_date', 'release_date'])

## Data Cleaning

In [None]:
#strip white space in code2 column
df['code2'] = df['code2'].apply(lambda x: x.strip())

In [None]:
df.info()

In [None]:
#convert dates columns to datetime

df['added_at'] = pd.to_datetime(df['added_at'], format='%Y/%m/%d')
df['peak_date'] = pd.to_datetime(df['peak_date'], format='%Y/%m/%d')
df['release_date'] = pd.to_datetime(df['release_date'], format='%Y/%m/%d')

In [None]:
df

## Save Dataframe to CSV file

In [None]:
df.to_csv('datasets/roses_shazam_chart.csv')

In [None]:
#open csv file
df = pd.read_csv('datasets/roses_shazam_chart', index_col=0)
df

In [None]:
#save file to json
df.to_json('datasets/roses_shazam_chart.json')

In [None]:
#save as excel file
df.to_excel('datasets/roses_shazam_chart.xlsx')

In [None]:
df_json = pd.read_json('datasets/roses_shazam_chart.json')
df_json

In [None]:
US_market =  df_json[df_json['code2'] == 'US']

In [None]:
US_market['added_at'].min()

In [None]:
df_json.iloc[538]

In [None]:
df_json['added_at'].sort_values(ascending=False)

In [None]:
US_market

## Get itunes Top Data

In [None]:
itunes_top = get_chart_data(api_token, '28795304', 'itunes_top', '2019-10-05')

In [None]:
#parse itunes data

roses_Itunes_bucket = []
for item in itunes_top:
    item_tuple = (item['id'], item['code2s'][0],item['code2'], item['release_dates'][0], item['rank'], item['added_at'], 
                    item['pre_rank'], item['peak_rank'], item['peak_date'])
    roses_Itunes_bucket.append(item_tuple)
   
    

## Clean iTunes Data

In [None]:
#convert itunes data to dataframe

itunes_df = pd.DataFrame(roses_Itunes_bucket, columns=['id', 'code2s', 'code2', 'release date', 'rank', 'added_at', 'pre_rank', 'peak_rank', 'peak_date'])

In [None]:


#convert dates columns to datetime

itunes_df['release date'] = pd.to_datetime(itunes_df['release date'], format='%Y/%m/%d')
itunes_df['added_at'] = pd.to_datetime(itunes_df['added_at'], format='%Y/%m/%d')
itunes_df['peak_date'] = pd.to_datetime(itunes_df['peak_date'], format='%Y/%m/%d')

## Save iTunes Data to CSV File

In [None]:
itunes_df.to_csv('datasets/roses_itunes_chart.csv')

## Retrieve TikTok Chart data

In [None]:
date_range = ['2020-08-08', '2020-08-15']

for date in date_range:
    #for each date grab the top 100 tik tok tracks
    tt_chart = get_tiktok_chart_data(api_token, 'tracks', date, 'weekly')
    
    data_bucket = []
    for track in tt_chart:
        #for each track on the chart, parse the data into a tuple
        track_tuple = (track['name'], track['tiktok_artist_names'][0], track['cm_track'], track['album_label'][0], track['release_dates'][0], track['rank'],
                      track['weekly_posts'], track['added_at'], track['velocity'], track['pre_rank'], track['peak_rank'], 
                      track['peak_date'], track['time_on_chart'], track['rankStats'][0]['rank'], track['rankStats'][0]['weekly_posts'], 
                      track['rankStats'][0]['timestp'], track['rankStats'][-1]['rank'], track['rankStats'][-1]['weekly_posts'], 
                      track['rankStats'][-1]['timestp'])
        #add each tuple to a giant list where all the desired track's metadata will live
        data_bucket.append(track_tuple)
    #create a dataframe with correct column names
    df = pd.DataFrame(data_bucket, columns=['track_name','artist_name', 'cm_id', 'label', 'release_date', 'rank', 
                                      'weekly_posts', 'add_date', 'velocity', 'pre_rank', 'peak_rank', 'peak_date', 'time_on_chart', 
                                      'rank_week_start', 'weekly_posts_start', 'week_start_date', 'rank_week_end', 'weekly_posts_end', 'week_end_date'])
    #convert date columns to actual datetimes
    df['release_date'] = pd.to_datetime(df['release_date'], format='%Y/%m/%d')
    df['add_date'] = pd.to_datetime(df['add_date'], format='%Y/%m/%d')
    df['peak_date'] = pd.to_datetime(df['peak_date'], format='%Y/%m/%d')
    df['week_start_date'] = pd.to_datetime(df['week_start_date'], format='%Y/%m/%d')
    df['week_end_date'] = pd.to_datetime(df['week_end_date'], format='%Y/%m/%d')
    #save dataframes as csv to a folder
    df.to_csv('datasets/tiktokweekly_{}.csv'.format(date))

In [None]:
import os
import re

ttwk_200502 = pd.read_csv('datasets/tiktokweekly_2020-05-02.csv', index_col=0)
ttwk_200509 = pd.read_csv('datasets/tiktokweekly_2020-05-09.csv', index_col=0)
ttwk_200516 = pd.read_csv('datasets/tiktokweekly_2020-05-16.csv', index_col=0)
ttwk_200523 = pd.read_csv('datasets/tiktokweekly_2020-05-23.csv', index_col=0)
ttwk_200530 = pd.read_csv('datasets/tiktokweekly_2020-05-30.csv', index_col=0)
ttwk_200606 = pd.read_csv('datasets/tiktokweekly_2020-06-06.csv', index_col=0)
ttwk_200613 = pd.read_csv('datasets/tiktokweekly_2020-06-13.csv', index_col=0)
ttwk_200620 = pd.read_csv('datasets/tiktokweekly_2020-06-20.csv', index_col=0)
ttwk_200627 = pd.read_csv('datasets/tiktokweekly_2020-06-27.csv', index_col=0)
ttwk_200704 = pd.read_csv('datasets/tiktokweekly_2020-07-04.csv', index_col=0)
ttwk_200718 = pd.read_csv('datasets/tiktokweekly_2020-07-18.csv', index_col=0)
ttwk_200725 = pd.read_csv('datasets/tiktokweekly_2020-07-25.csv', index_col=0)
ttwk_200801 = pd.read_csv('datasets/tiktokweekly_2020-08-01.csv', index_col=0)
ttwk_200808 = pd.read_csv('datasets/tiktokweekly_2020-08-08.csv', index_col=0)
ttwk_200815 = pd.read_csv('datasets/tiktokweekly_2020-08-15.csv', index_col=0)

In [None]:
master_df = pd.concat([ttwk_200502,ttwk_200509,ttwk_200516,ttwk_200523, ttwk_200530, ttwk_200606, ttwk_200613, ttwk_200620, ttwk_200627,
          ttwk_200704, ttwk_200718, ttwk_200725, ttwk_200801, ttwk_200808, ttwk_200815])
        

In [None]:
#save master df to csv file

master_df.to_csv('datasets/historic_ttwk.csv')

In [11]:
master_df = pd.read_csv('datasets/historic_ttwk.csv', index_col=0)
master_df

Unnamed: 0,track_name,artist_name,cm_id,label,release_date,rank,weekly_posts,add_date,velocity,pre_rank,peak_rank,peak_date,time_on_chart,rank_week_start,weekly_posts_start,week_start_date,rank_week_end,weekly_posts_end,week_end_date
0,Savage,Megan Thee Stallion,28677612.0,300 Entertainment,2020-03-06 00:00:00+00:00,1,1428140,2020-05-02 00:00:00+00:00,0.142857,1.0,1,2020-05-11 00:00:00+00:00,125,2,2062314,2020-04-25 00:00:00+00:00,1,1428140,2020-05-02 00:00:00+00:00
1,Laxed (Siren Beat),Jawsh 685,29203381.0,Jawsh 685,2020-04-24 00:00:00+00:00,2,876269,2020-05-02 00:00:00+00:00,,2.0,1,2020-05-09 00:00:00+00:00,88,4,1170310,2020-04-28 00:00:00+00:00,2,876269,2020-05-02 00:00:00+00:00
2,Let's Do It Again,J Boog,15953367.0,Washhouse Hawaii,2011-09-27 00:00:00+00:00,3,667071,2020-05-02 00:00:00+00:00,,3.0,3,2020-05-10 00:00:00+00:00,74,5,725338,2020-04-29 00:00:00+00:00,3,667071,2020-05-02 00:00:00+00:00
3,Bagaikan Langit（cover）,_ucil👑,,,,4,528711,2020-05-02 00:00:00+00:00,0.000000,4.0,1,2020-02-12 00:00:00+00:00,118,4,804732,2020-04-25 00:00:00+00:00,4,528711,2020-05-02 00:00:00+00:00
4,Toosie Slide,Drake,28904803.0,OVO,2020-04-03 00:00:00+00:00,5,501313,2020-05-02 00:00:00+00:00,0.000000,5.0,4,2020-05-05 00:00:00+00:00,106,5,785850,2020-04-25 00:00:00+00:00,5,501313,2020-05-02 00:00:00+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Kolors,Monte Booker & Smino,13755632.0,Soulection,2016-06-29 00:00:00+00:00,96,100000,2020-08-15 00:00:00+00:00,-10.142857,39.0,1,2020-07-06 00:00:00+00:00,63,25,300000,2020-08-08 00:00:00+00:00,96,100000,2020-08-15 00:00:00+00:00
96,High Fashion (feat. Mustard),Roddy Ricch,27599219.0,Atlantic Records,2019-12-06 00:00:00+00:00,97,100000,2020-08-15 00:00:00+00:00,,80.0,52,2020-08-10 00:00:00+00:00,88,54,100000,2020-08-09 00:00:00+00:00,97,100000,2020-08-15 00:00:00+00:00
97,I'm Just a Kid,Simple Plan,15073090.0,Rhino Atlantic,2018-04-06 00:00:00+00:00,98,100000,2020-08-15 00:00:00+00:00,,93.0,7,2020-04-29 00:00:00+00:00,142,58,100000,2020-08-09 00:00:00+00:00,98,100000,2020-08-15 00:00:00+00:00
98,Party Girl,StaySolidRocky,29155495.0,Columbia,2020-04-21 00:00:00+00:00,99,100000,2020-08-15 00:00:00+00:00,-9.285714,31.0,5,2020-05-25 00:00:00+00:00,85,34,200000,2020-08-08 00:00:00+00:00,99,100000,2020-08-15 00:00:00+00:00


In [14]:
#how many unique songs

print("Number of Unique Tracks: ", master_df['track_name'].nunique())
print("Number of Unique Artists: ", master_df['artist_name'].nunique())

Number of Unique Tracks:  375
Number of Unique Artists:  351


## Add CM ID for each artist

### Open Up Master Dataframe of Weekly TikTok Chart Data (08-15-20 thru 05-02-20)

In [6]:
master_df = pd.read_csv('datasets/historic_ttwk.csv', index_col=0)
master_df

Unnamed: 0,track_name,artist_name,cm_id,label,release_date,rank,weekly_posts,add_date,velocity,pre_rank,peak_rank,peak_date,time_on_chart,rank_week_start,weekly_posts_start,week_start_date,rank_week_end,weekly_posts_end,week_end_date
0,Savage,Megan Thee Stallion,28677612.0,300 Entertainment,2020-03-06 00:00:00+00:00,1,1428140,2020-05-02 00:00:00+00:00,0.142857,1.0,1,2020-05-11 00:00:00+00:00,125,2,2062314,2020-04-25 00:00:00+00:00,1,1428140,2020-05-02 00:00:00+00:00
1,Laxed (Siren Beat),Jawsh 685,29203381.0,Jawsh 685,2020-04-24 00:00:00+00:00,2,876269,2020-05-02 00:00:00+00:00,,2.0,1,2020-05-09 00:00:00+00:00,88,4,1170310,2020-04-28 00:00:00+00:00,2,876269,2020-05-02 00:00:00+00:00
2,Let's Do It Again,J Boog,15953367.0,Washhouse Hawaii,2011-09-27 00:00:00+00:00,3,667071,2020-05-02 00:00:00+00:00,,3.0,3,2020-05-10 00:00:00+00:00,74,5,725338,2020-04-29 00:00:00+00:00,3,667071,2020-05-02 00:00:00+00:00
3,Bagaikan Langit（cover）,_ucil👑,,,,4,528711,2020-05-02 00:00:00+00:00,0.000000,4.0,1,2020-02-12 00:00:00+00:00,118,4,804732,2020-04-25 00:00:00+00:00,4,528711,2020-05-02 00:00:00+00:00
4,Toosie Slide,Drake,28904803.0,OVO,2020-04-03 00:00:00+00:00,5,501313,2020-05-02 00:00:00+00:00,0.000000,5.0,4,2020-05-05 00:00:00+00:00,106,5,785850,2020-04-25 00:00:00+00:00,5,501313,2020-05-02 00:00:00+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Kolors,Monte Booker & Smino,13755632.0,Soulection,2016-06-29 00:00:00+00:00,96,100000,2020-08-15 00:00:00+00:00,-10.142857,39.0,1,2020-07-06 00:00:00+00:00,63,25,300000,2020-08-08 00:00:00+00:00,96,100000,2020-08-15 00:00:00+00:00
96,High Fashion (feat. Mustard),Roddy Ricch,27599219.0,Atlantic Records,2019-12-06 00:00:00+00:00,97,100000,2020-08-15 00:00:00+00:00,,80.0,52,2020-08-10 00:00:00+00:00,88,54,100000,2020-08-09 00:00:00+00:00,97,100000,2020-08-15 00:00:00+00:00
97,I'm Just a Kid,Simple Plan,15073090.0,Rhino Atlantic,2018-04-06 00:00:00+00:00,98,100000,2020-08-15 00:00:00+00:00,,93.0,7,2020-04-29 00:00:00+00:00,142,58,100000,2020-08-09 00:00:00+00:00,98,100000,2020-08-15 00:00:00+00:00
98,Party Girl,StaySolidRocky,29155495.0,Columbia,2020-04-21 00:00:00+00:00,99,100000,2020-08-15 00:00:00+00:00,-9.285714,31.0,5,2020-05-25 00:00:00+00:00,85,34,200000,2020-08-08 00:00:00+00:00,99,100000,2020-08-15 00:00:00+00:00


### Isolate Artists with Top 10 Ranked Tracks

In [7]:
#lets find a list of artists who have been ranked in the past 4 months

top10_rankings = master_df[master_df['rank'] < 11]
top10_ttartists = list(top10_rankings['artist_name'].unique())
top10_ttartists

['Megan Thee Stallion',
 'Jawsh 685',
 'J Boog',
 '_ucil👑',
 'Drake',
 'Pia Mia',
 'Simple Plan',
 'rapidsongs',
 'calebjaxin',
 'Tony Kakkar',
 'HRVY',
 'Meduza & Becky Hill & Goodboys',
 'Iqballl🍁',
 'goalsounds',
 'Conkarah',
 'tiktok_australia',
 'Ir Sais',
 'Engelwood',
 'flighthouse',
 'Puri',
 'StaySolidRocky',
 'BMW KENNY',
 'Johnny Orlando & Mackenzie Ziegler',
 'tyler_warwick',
 'bebiisan',
 'iamtiagz',
 'Melanie_Martinez',
 'Monte Booker & Smino',
 'lil darkie',
 'Ricky Desktop',
 'DaBaby, Roddy Ricch',
 'Kbfr',
 'Rod Wave',
 'Dixie D’Amelio',
 'ROSALÍA & Travis Scott',
 'Enrique Iglesias',
 'Saweetie',
 'BLACKPINK',
 'YFN Lucci',
 'Tainy & J Balvin',
 'Cochise',
 'Gafur & JONY',
 'Pop Smoke',
 'RAKHIM',
 'Claire Rosinkranz']

### Create DIctionary of Artist Chartmetric IDs

In [8]:
#lets create a dictionary consisting of artists and their associated chartmetric IDs so that we can retreive social media
#data for each artist

cm_artistIDs = []
for artist in top10_ttartists:
    cm_id = get_artist_id(api_token, artist, 'artists')
    cm_artistIDs.append(cm_id)
    
cm_artistIDs_dict = {}

for key in top10_ttartists:
    for value in cm_artistIDs:
        cm_artistIDs_dict[key] = value
        cm_artistIDs.remove(value)
        break


### Pickle Dictionary Obj

In [36]:
#pickle dictionary so I don't have to waste my api calls and I can just open this up whenever I want
# cm_artistIDs_dict

file_to_write = open("cm_artistID_dictionary.pickle", "wb")
pickle.dump(cm_artistIDs_dict, file_to_write)

file_to_write.close()

### Open Pickled Dictionary

In [42]:
file_input = open("cm_artistID_dictionary.pickle", "rb")
cm_artistIDs = pickle.load(file_input)
file_input.close()

In [43]:
cm_artistIDs

{'Megan Thee Stallion': 788785,
 'Jawsh 685': 3654834,
 'J Boog': 3979,
 '_ucil👑': None,
 'Drake': 3380,
 'Pia Mia': 4779,
 'Simple Plan': 1232,
 'rapidsongs': 'None',
 'calebjaxin': None,
 'Tony Kakkar': 29892,
 'HRVY': 710665,
 'Meduza & Becky Hill & Goodboys': 'None',
 'Iqballl🍁': 'None',
 'goalsounds': None,
 'Conkarah': 213607,
 'tiktok_australia': 'None',
 'Ir Sais': 207934,
 'Engelwood': 468920,
 'flighthouse': 3630481,
 'Puri': 240524,
 'StaySolidRocky': 3647326,
 'BMW KENNY': 3674302,
 'Johnny Orlando & Mackenzie Ziegler': 'None',
 'tyler_warwick': 'None',
 'bebiisan': None,
 'iamtiagz': 'None',
 'Melanie_Martinez': None,
 'Monte Booker & Smino': 'None',
 'lil darkie': 1450494,
 'Ricky Desktop': 3667592,
 'DaBaby, Roddy Ricch': 'None',
 'Kbfr': 1629644,
 'Rod Wave': 1009958,
 'Dixie D’Amelio': 3747209,
 'ROSALÍA & Travis Scott': 'None',
 'Enrique Iglesias': 531,
 'Saweetie': 808784,
 'BLACKPINK': 206548,
 'YFN Lucci': 5314,
 'Tainy & J Balvin': 'None',
 'Cochise': 1093056,
 'G

## Get Fan Metrics for Top 10 Ranked Artists

In [53]:
from cm_api import get_fan_metrics

In [54]:
get_fan_metrics(api_token, 3654834, 'spotify', '2019-09-02', 'followers')

{'link': 'https://open.spotify.com/artist/56mfhUDKa1vec6rSLZV5Eg',
 'followers': [{'value': 45,
   'timestp': '2020-04-26T00:00:00.000Z',
   'diff': None},
  {'value': 441, 'timestp': '2020-04-28T00:00:00.000Z', 'diff': 396},
  {'value': 1557, 'timestp': '2020-05-06T00:00:00.000Z', 'diff': 1116},
  {'value': 1557, 'timestp': '2020-05-09T00:00:00.000Z', 'diff': 0},
  {'value': 2403, 'timestp': '2020-05-13T00:00:00.000Z', 'diff': 846},
  {'value': 2403, 'timestp': '2020-05-14T00:00:00.000Z', 'diff': 0},
  {'value': 2403, 'timestp': '2020-05-15T00:00:00.000Z', 'diff': 0},
  {'value': 3244, 'timestp': '2020-05-16T00:00:00.000Z', 'diff': 841},
  {'value': 3244, 'timestp': '2020-05-17T00:00:00.000Z', 'diff': 0},
  {'value': 3244, 'timestp': '2020-05-18T00:00:00.000Z', 'diff': 0},
  {'value': 3734, 'timestp': '2020-05-19T00:00:00.000Z', 'diff': 490},
  {'value': 3244, 'timestp': '2020-05-20T00:00:00.000Z', 'diff': -490},
  {'value': 4017, 'timestp': '2020-05-21T00:00:00.000Z', 'diff': 773},
 