In [18]:
import pandas as pd
import numpy as np
from TikTokApi import TikTokApi
from tiktok_data_cleaner import get_trending_data, hashtag_cleaner, data_cleaner, to_csv

In [19]:
# outputs tiktok dictionary of trending videos
# input: number of videos
# output: tiktok dictionary
def get_trending_data(n = 10):
    from TikTokApi import TikTokApi
    api = TikTokApi()
    from TikTokApi.browser import set_async
    set_async()
    trending_vids = api.trending(count=n)
    return trending_vids

# extracts hashtags from video description and outputs a list of hashtags
# input: string
# output: list
def hashtag_cleaner(x):
    hashtags = []
    spec_char = ["[", "]", "{", "}", "%",
                 "^", "*", "+", "=", "\\",
                 "|", "~", "<", ">", "€",
                 "£", "¥", "•", ".", ",",
                 "!", "'", '"', "-", "/",
                 ":", ";", "(", ")", "$",
                 "&", "#", " "]
    for i, val in enumerate(x):
        ht = ""
        if val == "#":
            while (i+1 != len(x)) and (x[i+1] not in spec_char):
                ht += x[i+1]
                i += 1
            hashtags.append(ht)
    return hashtags

# compiles data to a pandas dataframe
# input: tiktok dictionary
# output: cleaned dataframe
def data_cleaner(data):
    import pandas as pd
    vids = []
    for tiktok in data:
        vid = {}
        vid['id'] = tiktok['id']
        vid['create_time'] = tiktok['createTime']
        vid['user_name'] = tiktok['author']['uniqueId']
        vid['hashtags'] = hashtag_cleaner(tiktok['desc'])
        vid['song'] = tiktok['music']['title']
        vid['song_id'] = tiktok['music']['id']
        vid['video_length'] = tiktok['video']['duration']
        vid['n_likes'] = tiktok['stats']['diggCount']
        vid['n_shares'] = tiktok['stats']['shareCount']
        vid['n_comments'] = tiktok['stats']['commentCount']
        vid['n_plays'] = tiktok['stats']['playCount']
        vid['n_followers'] = tiktok['authorStats']['followerCount']
        vid['n_total_likes'] = tiktok['authorStats']['heart']
        vid['n_total_vids'] = tiktok['authorStats']['videoCount']
        vids.append(vid)
    vids_df = pd.DataFrame(vids)
    return vids_df

# saves dataframe to csv file
# input: datafran, name of file
def to_csv(df, name = "trending"):
    import pandas as pd
    file = name+".csv"
    df.to_csv(file, index=False)


def main():
    tiktok_dict = get_trending_data(n = 10)
    dataframe = data_cleaner(tiktok_dict)
    to_csv(dataframe)


if __name__ == "__main__":
    main()


In [20]:
tiktok_dict = get_trending_data(n = 200)
dataframe = data_cleaner(tiktok_dict)
to_csv(dataframe, name = 'trending_11_7')

In [21]:
load = pd.read_csv("trending_11_7.csv")

In [22]:
load

Unnamed: 0,id,create_time,user_name,hashtags,song,song_id,video_length,n_likes,n_shares,n_comments,n_plays,n_followers,n_total_likes,n_total_vids
0,6869080912882175237,1599332535,addisonre,[],WAP（feat. Megan Thee Stallion）,6858456364713282309,10,6100000,63800,53500,50200000,67900000,4200000000,1261
1,6884687827032984838,1602966340,jasonderulo,[],Amityville Horror - Scary Halloween Sound Effects,6780285433650612226,17,1200000,16500,27700,8300000,39200000,868900000,575
2,6890358893302549765,1604286733,amyywoahh,[],original sound,6890358955176921862,32,3700000,10800,29600,19500000,6500000,273000000,708
3,6870989331562040582,1599776880,charlidamelio,"['duet', 'charlirunsondunkin', 'ad']",The Charli,6868312589710363398,25,9000000,33800,60900,52100000,97300000,7600000000,1641
4,6885250012909948166,1603097524,realwoahvicky,[],original sound,6885250553350294277,14,3100000,67600,83500,20400000,5100000,141300000,1352
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,6859890572371430661,1597192744,addisonre,[],im outside in a amg L.Dre remix,6829772829056174854,9,3400000,30500,57200,24000000,67900000,4200000000,1261
196,6860644975424638213,1597368392,addisonre,[],Money trees,6824763991999433477,15,3600000,43800,39900,27600000,67900000,4200000000,1261
197,6862969125791206662,1597909533,charlidamelio,[],Crisis,6837844116592659206,15,5300000,51100,47000,45300000,97300000,7600000000,1641
198,6860865069425773829,1597419636,charlidamelio,[],original sound,6811544728685923078,15,5900000,88800,114400,41300000,97300000,7600000000,1641


In [23]:
dupl_rem = load.drop_duplicates('id')

In [24]:
to_csv(dupl_rem, name = 'trending_11_7')

In [25]:
load = pd.read_csv("trending_11_7.csv")

In [26]:
load

Unnamed: 0,id,create_time,user_name,hashtags,song,song_id,video_length,n_likes,n_shares,n_comments,n_plays,n_followers,n_total_likes,n_total_vids
0,6869080912882175237,1599332535,addisonre,[],WAP（feat. Megan Thee Stallion）,6858456364713282309,10,6100000,63800,53500,50200000,67900000,4200000000,1261
1,6884687827032984838,1602966340,jasonderulo,[],Amityville Horror - Scary Halloween Sound Effects,6780285433650612226,17,1200000,16500,27700,8300000,39200000,868900000,575
2,6890358893302549765,1604286733,amyywoahh,[],original sound,6890358955176921862,32,3700000,10800,29600,19500000,6500000,273000000,708
3,6870989331562040582,1599776880,charlidamelio,"['duet', 'charlirunsondunkin', 'ad']",The Charli,6868312589710363398,25,9000000,33800,60900,52100000,97300000,7600000000,1641
4,6885250012909948166,1603097524,realwoahvicky,[],original sound,6885250553350294277,14,3100000,67600,83500,20400000,5100000,141300000,1352
5,6870854022073502981,1599745393,foodies,[],original sound,6870853788496972549,6,407400,46500,6009,2700000,12200000,480400000,688
6,6885884502858534149,1603244960,naiyhagulley7,"['productoverload', 'fyp', 'cleaningtiktok']",original sound,6885884566163311366,59,866500,11700,19600,4700000,43700,1200000,215
7,6890500404191300870,1604319846,slowmotion563,[],Ouch,6732161888362170369,22,779200,13000,12800,16600000,288600,3200000,59
8,6890701628538719494,1604366525,livdady6,"['5millionviews', 'dance']",original sound,6879132277599701762,15,580300,8160,3684,3600000,427600,8800000,600
9,6887001373548842245,1603505001,hay_babe_,"['striptok', 'exoticdancer', 'friyay']",original sound,6887001476909026053,54,307000,458,968,2800000,39800,364900,24


In [59]:
api = TikTokApi()


def bySongID(song_id, n=10):
    import pandas as pd
    api = TikTokApi()
    song_snowball = api.bySound(song_id, count=n)
    
    vids = []
    
    for tiktok in song_snowball:
        vid = {}
        vid['id'] = tiktok['itemInfos']['id']
        vid['create_time'] = tiktok['itemInfos']['createTime']
        vid['user_name'] = tiktok['authorInfos']['uniqueId']
        vid['song'] = tiktok['musicInfos']['musicName']
        vid['song_id'] = tiktok['musicInfos']['musicId']
        

        vid['n_likes'] = tiktok['itemInfos']['diggCount']
        vid['n_shares'] = tiktok['itemInfos']['shareCount']
        vid['n_comments'] = tiktok['itemInfos']['commentCount']
        vid['n_plays'] = tiktok['itemInfos']['playCount']
        vid['n_followers'] = tiktok['authorStats']['followerCount']
        vid['n_total_likes'] = tiktok['authorStats']['heartCount']
        vid['n_total_vids'] = tiktok['authorStats']['videoCount']
        vids.append(vid)
    
    vids_df = pd.DataFrame(vids)
    return vids_df


song_snowball = bySongID(load['song_id'][5], n=200)
#song_snowball2 = bySongID(load['song_id'][7], n=200)

#song_snowball = song_snowball.append(song_snowball2)


In [60]:
to_csv(song_snowball, name="song_trial3")