In [1]:
import pandas as pd
from sqlalchemy import create_engine
import json
from pandas.io.json import json_normalize
import pprint
import numpy as np

## Extract Youtube Data

In [2]:
csv_file = "./Resources/USvideos.csv"
youtube_data_df = pd.read_csv(csv_file, usecols=['video_id', 'title', 'views', 'tags'])


video_preurl = "https://www.youtube.com/watch?v="
youtube_data_df = youtube_data_df.drop_duplicates(subset="video_id", keep="last")

youtube_data_df['video_url'] = video_preurl + youtube_data_df['video_id']

youtube_data_df.head()

Unnamed: 0,video_id,title,tags,views,video_url
10,9wRQljFNDW8,Dion Lewis' 103-Yd Kick Return TD vs. Denver! ...,"NFL|""Football""|""offense""|""defense""|""afc""|""nfc""...",81377,https://www.youtube.com/watch?v=9wRQljFNDW8
36,Om_zGhJLZ5U,TL;DW - Every DCEU Movie Before Justice League,"screenjunkies|""screen junkies""|""sj news""|""hone...",288922,https://www.youtube.com/watch?v=Om_zGhJLZ5U
41,goP4Z5wyOlM,Iraq-Iran earthquake: Deadly tremor hits borde...,"bbc|""bbc news""|""news""|""iran""|""iran news""|""iraq...",34785,https://www.youtube.com/watch?v=goP4Z5wyOlM
55,8NHA23f7LvU,Jason Momoa Wows Hugh Grant With Some Dothraki...,"Graham Norton|""Graham Norton Show Official""|""E...",1496225,https://www.youtube.com/watch?v=8NHA23f7LvU
76,IE-xepGLVt8,Mayo Clinic's first face transplant patient me...,"Mayo Clinic|""Health Care (Issue)""|""Healthcare ...",237307,https://www.youtube.com/watch?v=IE-xepGLVt8


In [3]:
youtube_data_df[youtube_data_df['video_id']=='JcC5VGOx8I8']

Unnamed: 0,video_id,title,tags,views,video_url
3597,JcC5VGOx8I8,Calum Scott - You Are The Reason (Lyric Video),"Pop|""calum scott""|""you are the reason""|""you ar...",1728219,https://www.youtube.com/watch?v=JcC5VGOx8I8


## Extract TikTot Data

In [4]:
json_file = './Resources/trending.json'
with open(json_file, 'r', encoding='utf-8') as j:
     data = json.loads(j.read())

tiktok_data_df = pd.DataFrame(data['collector'], columns=['id', 'text', 'webVideoUrl', 'playCount', 'hashtags'])
tiktok_data_df.head()

Unnamed: 0,id,text,webVideoUrl,playCount,hashtags
0,6907228749016714497,Confidence went 📈,https://www.tiktok.com/@ninakleij/video/690722...,44800,[]
1,6875468410612993286,Quiet Zone... follow me on insta: joeysofo. Co...,https://www.tiktok.com/@joeysofo/video/6875468...,838100,[]
2,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000,"[{'id': '23428', 'name': 'tiktok', 'title': ''..."
3,6902819837345533186,,https://www.tiktok.com/@naomivaneeren/video/69...,94900,[]
4,6905635666588192002,小技です👟✨#tiktok教室#tutorial,https://www.tiktok.com/@io.dreamer_mk/video/69...,115300,"[{'id': '1627704798586882', 'name': 'tiktok教室'..."


In [5]:
# Connect to Database 
rds_connection_string = "trends_project:123456@localhost:5432/trending_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [6]:
engine.table_names() 

  engine.table_names()


['trending', 'tags']

## Transform Youtube Data 

In [7]:
new_youtube_df = youtube_data_df.rename(columns={"title": "video_title", "views": "video_view_count"})

new_youtube_df.head()

Unnamed: 0,video_id,video_title,tags,video_view_count,video_url
10,9wRQljFNDW8,Dion Lewis' 103-Yd Kick Return TD vs. Denver! ...,"NFL|""Football""|""offense""|""defense""|""afc""|""nfc""...",81377,https://www.youtube.com/watch?v=9wRQljFNDW8
36,Om_zGhJLZ5U,TL;DW - Every DCEU Movie Before Justice League,"screenjunkies|""screen junkies""|""sj news""|""hone...",288922,https://www.youtube.com/watch?v=Om_zGhJLZ5U
41,goP4Z5wyOlM,Iraq-Iran earthquake: Deadly tremor hits borde...,"bbc|""bbc news""|""news""|""iran""|""iran news""|""iraq...",34785,https://www.youtube.com/watch?v=goP4Z5wyOlM
55,8NHA23f7LvU,Jason Momoa Wows Hugh Grant With Some Dothraki...,"Graham Norton|""Graham Norton Show Official""|""E...",1496225,https://www.youtube.com/watch?v=8NHA23f7LvU
76,IE-xepGLVt8,Mayo Clinic's first face transplant patient me...,"Mayo Clinic|""Health Care (Issue)""|""Healthcare ...",237307,https://www.youtube.com/watch?v=IE-xepGLVt8


In [8]:
lst_col = 'name'
new_youtube_df['name'] = new_youtube_df['tags'].str.split('|')

new_youtube_df2 = pd.DataFrame({col:np.repeat(new_youtube_df[col].values, new_youtube_df[lst_col].str.len())
              for col in new_youtube_df.columns.difference([lst_col])
             }).assign(**{lst_col:np.concatenate(new_youtube_df[lst_col].values)})[new_youtube_df.columns.tolist()]

#new_youtube_df2 = youtube_data_df.rename(columns={"video_id": "platform_id", "views": "view_count", "split_tags":"name"})

new_youtube_df2 = new_youtube_df2.drop(columns=['tags'])
new_youtube_df2.head()

Unnamed: 0,video_id,video_title,video_view_count,video_url,name
0,9wRQljFNDW8,Dion Lewis' 103-Yd Kick Return TD vs. Denver! ...,81377,https://www.youtube.com/watch?v=9wRQljFNDW8,NFL
1,9wRQljFNDW8,Dion Lewis' 103-Yd Kick Return TD vs. Denver! ...,81377,https://www.youtube.com/watch?v=9wRQljFNDW8,"""Football"""
2,9wRQljFNDW8,Dion Lewis' 103-Yd Kick Return TD vs. Denver! ...,81377,https://www.youtube.com/watch?v=9wRQljFNDW8,"""offense"""
3,9wRQljFNDW8,Dion Lewis' 103-Yd Kick Return TD vs. Denver! ...,81377,https://www.youtube.com/watch?v=9wRQljFNDW8,"""defense"""
4,9wRQljFNDW8,Dion Lewis' 103-Yd Kick Return TD vs. Denver! ...,81377,https://www.youtube.com/watch?v=9wRQljFNDW8,"""afc"""


In [9]:
test_tiktok_data_df = pd.json_normalize(data['collector'], record_path="hashtags", meta=['id', 'text', 'webVideoUrl', 'playCount'], meta_prefix='video_')
test_tiktok_data_df = test_tiktok_data_df.drop(columns=['id','title','cover'])
test_tiktok_data_df

Unnamed: 0,name,video_id,video_text,video_webVideoUrl,video_playCount
0,tiktok,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000
1,viral,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000
2,fyp,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000
3,iphone,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000
4,test,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000
...,...,...,...,...,...
5029,horsegirl,6899116261746281730,"The collab you didn’t know you needed, myself ...",https://www.tiktok.com/@erinwilliams_1/video/6...,309000
5030,equestrian,6899116261746281730,"The collab you didn’t know you needed, myself ...",https://www.tiktok.com/@erinwilliams_1/video/6...,309000
5031,equestriangirl,6899116261746281730,"The collab you didn’t know you needed, myself ...",https://www.tiktok.com/@erinwilliams_1/video/6...,309000
5032,riding,6899116261746281730,"The collab you didn’t know you needed, myself ...",https://www.tiktok.com/@erinwilliams_1/video/6...,309000


## Transform Tiktok Data

In [10]:
test_tiktok_data_df = test_tiktok_data_df.rename(columns={"video_text": "video_title", "video_webVideoUrl": "video_url", "video_playCount": "video_view_count"})
test_tiktok_data_df.head()

Unnamed: 0,name,video_id,video_title,video_url,video_view_count
0,tiktok,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000
1,viral,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000
2,fyp,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000
3,iphone,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000
4,test,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000


### Merge Data

In [11]:
combined_tags_df = test_tiktok_data_df.append(new_youtube_df2)
combined_tags_df.head()

Unnamed: 0,name,video_id,video_title,video_url,video_view_count
0,tiktok,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000
1,viral,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000
2,fyp,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000
3,iphone,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000
4,test,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,https://www.tiktok.com/@jackeyephone/video/689...,15300000


## Load Data 

In [12]:
combined_tags_df.to_sql('tags', if_exists='replace', con=engine, index=False) 
#engine.execute('select * from tags').fetchall()

In [13]:
tagcount = combined_tags_df['name'].nunique()
print(f'Total unique tags: {tagcount:,d}')

Total unique tags: 59,970


In [14]:
videocount = combined_tags_df['video_id'].nunique()
print(f'Total unique videos: {videocount:,d}')

Total unique videos: 7,156


# Analyze Data !

In [15]:
summarydf = combined_tags_df.groupby(['name'])['video_view_count'].sum().reset_index()
summarydf.sort_values('video_view_count', ascending=False)

Unnamed: 0,name,video_view_count
11033,"""Pop""",1350499920
28423,"""funny""",906181483
22441,"""comedy""",725433295
11385,"""Rap""",536997523
11478,"""Records""",517401466
...,...,...
34497,"""kentucky""",559
58436,mondkapje,484
57832,ikdoenietmeermee,484
58883,regering,484


## Top 10 Tags

In [16]:
summarydf.sort_values('video_view_count', ascending=False).head(10)

Unnamed: 0,name,video_view_count
11033,"""Pop""",1350499920
28423,"""funny""",906181483
22441,"""comedy""",725433295
11385,"""Rap""",536997523
11478,"""Records""",517401466
40702,"""official""",465012220
36191,"""live""",464683615
31844,"""humor""",338880750
56196,[none],334763113
50628,"""trailer""",328545440


## Bottom 10 Tags

In [17]:
summarydf.sort_values('video_view_count', ascending=False).tail(10)

Unnamed: 0,name,video_view_count
26983,"""fatality""",559
37404,"""marshall county high school""",559
59073,shooting,559
51205,"""u.s. news""",559
29531,"""governor matt bevin""",559
34497,"""kentucky""",559
58436,mondkapje,484
57832,ikdoenietmeermee,484
58883,regering,484
57831,ikdoenietmee,484


## Top 10 Videos by "Pop"

In [18]:
def make_clickable(val):
    return '<a href="{}">{}</a>'.format(val,val)

In [19]:
popdf = combined_tags_df[combined_tags_df['name'] == "\"Pop\""]
popdf.head(10).style.format({'video_url': make_clickable})

Unnamed: 0,name,video_id,video_title,video_url,video_view_count
193,"""Pop""",pz95u3UVpaM,Camila Cabello - Havana (Vertical Video) ft. Young Thug,https://www.youtube.com/watch?v=pz95u3UVpaM,5476737
306,"""Pop""",7fm7mll2qvg,Sigrid - Strangers (Lyric Video),https://www.youtube.com/watch?v=7fm7mll2qvg,91776
445,"""Pop""",9wg3v-01yKQ,Harry Styles - Kiwi,https://www.youtube.com/watch?v=9wg3v-01yKQ,9632678
626,"""Pop""",08nkwgZIE4I,P!nk - Barbies (Audio),https://www.youtube.com/watch?v=08nkwgZIE4I,651036
944,"""Pop""",UFPSIa1cLRQ,Phillip Phillips - Magnetic (Audio),https://www.youtube.com/watch?v=UFPSIa1cLRQ,38068
1873,"""Pop""",cYw-oyJ7AEY,"Pitbull, Stereotypes - Jungle (Lyric Video) ft. E-40, Abraham Mateo",https://www.youtube.com/watch?v=cYw-oyJ7AEY,304700
2003,"""Pop""",eHIY3HNNqzM,The Script - Arms Open (Acoustic) [Audio],https://www.youtube.com/watch?v=eHIY3HNNqzM,157037
2007,"""Pop""",5gFpcEKayz4,MØ - When I Was Young (Official Video),https://www.youtube.com/watch?v=5gFpcEKayz4,895507
2891,"""Pop""",e4FApt6z55c,Kimbra - Top of the World (Official Music Video),https://www.youtube.com/watch?v=e4FApt6z55c,226659
3889,"""Pop""",ObIQ0s02UHg,Jason Derulo - Tip Toe feat. French Montana (Official Lyric Video),https://www.youtube.com/watch?v=ObIQ0s02UHg,2954882


## Top 10 Videos by "funny"

In [20]:
funnydf = combined_tags_df[combined_tags_df['name'] == "\"funny\""]
funnydf.head(10).style.format({'video_url': make_clickable})

Unnamed: 0,name,video_id,video_title,video_url,video_view_count
127,"""funny""",cOc3tsFWoRs,Jason Momoa & Lisa Bonet: Love at First Sight,https://www.youtube.com/watch?v=cOc3tsFWoRs,1497519
143,"""funny""",wnwF1FHybDQ,Mark Wahlberg's Kids Use Him for His Celeb Connections,https://www.youtube.com/watch?v=wnwF1FHybDQ,225286
235,"""funny""",k29YnfttqEU,Mean Tweets – Country Music Edition #3,https://www.youtube.com/watch?v=k29YnfttqEU,1315873
875,"""funny""",510KQ_LXGww,My Every Day Autumn Makeup | Zoella,https://www.youtube.com/watch?v=510KQ_LXGww,1390440
900,"""funny""",mwpHSMv1pI4,Girlfriend Gives Me A Surprise Makeover!? | Hannah Hart,https://www.youtube.com/watch?v=mwpHSMv1pI4,359120
1129,"""funny""",MCW5HUkrr-o,Impossible Active Audio Noise Cancelling by Muzo,https://www.youtube.com/watch?v=MCW5HUkrr-o,560569
1154,"""funny""",VsYmwBOYfW8,Mean Tweets – Jimmy Kimmel Edition,https://www.youtube.com/watch?v=VsYmwBOYfW8,2765121
1218,"""funny""",TSqwDQHE1J8,Carey Mulligan Doesn't Tell People She's an Actress,https://www.youtube.com/watch?v=TSqwDQHE1J8,137749
1318,"""funny""",HBS9783c0BI,Jaden Smith: Watch Me,https://www.youtube.com/watch?v=HBS9783c0BI,102639
1689,"""funny""",Rq9-mW6HgQE,Sean Astin Knew Nothing About 'Lord Of The Rings' Once Upon A Time,https://www.youtube.com/watch?v=Rq9-mW6HgQE,908489
