# Youtube Data Analysis

## Import Channel Data

In [1]:
from MySQL_DB_connect_functions import *
from MySQL_DB_update_functions import *
from youtube_api_functions import *

In [2]:
import numpy as np

In [2]:
host_name, dbname, schema_name, port, username, password = get_db_info()

In [3]:
cnx = None
cnx = connect_to_db(username, password, host_name, schema_name, port)
cursor = cnx.cursor()

Connected to Database


In [4]:
table_name = 'videos' # Name of schema/table storing requested data in the database
df = return_df_from_db(table_name, cnx)

In [5]:
df.head()

Unnamed: 0,video_id,channelTitle,title,description,publishedAt,viewCount,likeCount,favoriteCount,commentCount,caption,publishDayName,durationSecs,tagCount
0,_UoTTq651dE,3Blue1Brown,Why 5/3 is a fundamental constant for turbulence,Some mathematical order amidst the chaos of tu...,2018-11-07,622119,23357,0,822,1,Wednesday,688,10
1,-9OUyo8NFZg,3Blue1Brown,Euler's Formula and Graph Duality,"A description of planar graph duality, and how...",2015-06-21,412304,9630,0,331,1,Sunday,447,12
2,-qgreAUpPwM,3Blue1Brown,Pure Fourier series animation montage,Because why not?\nLearn the math behind this: ...,2019-07-03,600345,18533,0,1241,1,Wednesday,749,8
3,-RdOwhmqP5s,3Blue1Brown,"Newton’s method produces this fractal, why don...",Who knew root-finding could be so complicated?...,2021-10-12,1832565,74928,0,3107,1,Tuesday,1566,7
4,1SMmc9gQmHQ,3Blue1Brown,How to count to 1000 on two hands,Typically when we think of counting on two han...,2015-09-18,385566,11316,0,1087,1,Friday,154,11


## Natural Language Processing

In [6]:
import nltk
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')

In [None]:
comments_df = get_video_comments(youtube_obj, video_id='VYQVlVoWoPY')

In [None]:
comments_df

In [7]:
example = df['title'][50]
print(example)

What is backpropagation really doing? | Chapter 3, Deep learning


In [8]:
tokens = nltk.word_tokenize(example)
tokens[:10]

['What',
 'is',
 'backpropagation',
 'really',
 'doing',
 '?',
 '|',
 'Chapter',
 '3',
 ',']

In [9]:
tagged = nltk.pos_tag(tokens)
tagged[:10]

[('What', 'WP'),
 ('is', 'VBZ'),
 ('backpropagation', 'NN'),
 ('really', 'RB'),
 ('doing', 'VBG'),
 ('?', '.'),
 ('|', 'JJ'),
 ('Chapter', 'NN'),
 ('3', 'CD'),
 (',', ',')]

In [10]:
entities = nltk.chunk.ne_chunk(tagged)
entities.pprint() #PrettyPrint

(S
  What/WP
  is/VBZ
  backpropagation/NN
  really/RB
  doing/VBG
  ?/.
  |/JJ
  Chapter/NN
  3/CD
  ,/,
  (GPE Deep/NNP)
  learning/NN)


In [11]:
from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm
sia = SentimentIntensityAnalyzer()

In [12]:
sia.polarity_scores(example)

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

In [13]:
results = {}
for i, row in tqdm(df.iterrows(), total=len(df)):
    text = row['title']
    myid = row['video_id']
    results[myid] = sia.polarity_scores(text)

  0%|          | 0/124 [00:00<?, ?it/s]

In [19]:
vaders = pd.DataFrame(results).T
vaders = vaders.reset_index().rename(columns={'index': 'video_id'})
vaders = vaders.merge(df, how='left')

In [20]:
vaders

Unnamed: 0,video_id,neg,neu,pos,compound,channelTitle,title,description,publishedAt,viewCount,likeCount,favoriteCount,commentCount,caption,publishDayName,durationSecs,tagCount
0,_UoTTq651dE,0.0,1.0,0.0,0.0,3Blue1Brown,Why 5/3 is a fundamental constant for turbulence,Some mathematical order amidst the chaos of tu...,2018-11-07,622119,23357,0,822,1,Wednesday,688,10
1,-9OUyo8NFZg,0.0,1.0,0.0,0.0,3Blue1Brown,Euler's Formula and Graph Duality,"A description of planar graph duality, and how...",2015-06-21,412304,9630,0,331,1,Sunday,447,12
2,-qgreAUpPwM,0.0,1.0,0.0,0.0,3Blue1Brown,Pure Fourier series animation montage,Because why not?\nLearn the math behind this: ...,2019-07-03,600345,18533,0,1241,1,Wednesday,749,8
3,-RdOwhmqP5s,0.0,1.0,0.0,0.0,3Blue1Brown,"Newton’s method produces this fractal, why don...",Who knew root-finding could be so complicated?...,2021-10-12,1832565,74928,0,3107,1,Tuesday,1566,7
4,1SMmc9gQmHQ,0.0,1.0,0.0,0.0,3Blue1Brown,How to count to 1000 on two hands,Typically when we think of counting on two han...,2015-09-18,385566,11316,0,1087,1,Friday,154,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119,ZA4JkHKZM50,0.0,1.0,0.0,0.0,3Blue1Brown,Why “probability of 0” does not mean “impossib...,An introduction to probability density functio...,2020-04-12,2125788,58621,0,3247,1,Sunday,601,7
120,zjMuIxRvygQ,0.0,1.0,0.0,0.0,3Blue1Brown,"Quaternions and 3d rotation, explained interac...",Go experience the explorable videos: https://e...,2018-10-26,815190,21020,0,673,1,Friday,359,9
121,zLzLxVeqdQg,0.0,1.0,0.0,0.0,3Blue1Brown,Euler's Formula Poem,"A poem about why e to the pi i equals -1, base...",2015-03-05,140989,4316,0,139,1,Thursday,64,10
122,zwAD6dRSVyI,0.0,1.0,0.0,0.0,3Blue1Brown,Thinking outside the 10-dimensional box,Visualizing high-dimensional spheres to unders...,2017-08-11,2787477,59021,0,3496,1,Friday,1627,15
