# Youtube Data Scraping

In [1]:
from urllib.request import urlopen
import json
import pafy
import pandas as pd

In [2]:
from google_auth import api_key

In [3]:
def get_all_video_in_channel(channel_id):
    base_video_url = 'https://www.youtube.com/watch?v='
    base_search_url = 'https://www.googleapis.com/youtube/v3/search?'

    first_url = base_search_url+'key={}&channelId={}&part=snippet,id&order=date&maxResults=25'.format(api_key, channel_id)

    video_links = []
    url = first_url
    while True:
        inp = urlopen(url)
        resp = json.load(inp)

        for i in resp['items']:
            if i['id']['kind'] == "youtube#video":
                video_links.append(base_video_url + i['id']['videoId'])

        try:
            next_page_token = resp['nextPageToken']
            url = first_url + '&pageToken={}'.format(next_page_token)
        except:
            break
    return video_links

In [4]:
channel_id = "UClzR7HySZTVb6g-tSzAhi5A"

In [5]:
video_urls = get_all_video_in_channel(channel_id)

In [6]:
from multiprocessing import Pool
p=Pool(10)

In [7]:
#getvideo(video_urls)

In [8]:
results = []

for url in video_urls: 
    test = pafy.new(url)
    result = test.published,test.title, test.rating, test.viewcount, test.duration, test.likes, url
    results.append(result)
    print(test.title)

Word boundaries in Chinese
Chinese and English word order comparison
The power of review with reading Chinese words
Adorable Chinese words (alternate title: logical Chinese words)
Rocket learning : Back on track with learning to read Chinese
Real life Chinese language interference
5,000 views celebration
Burning Question: What Emergency language in Chinese have you used?
Online Chinese tutoring success!
Chinese language update AND Duo Lingo Chinese app review
Back in hanzi heaven, errr, I mean China
European trip endcap ponderings: Spanish and German
German, English positive transfer wit writing
Burning questions about the languages around you
Chinese character walkabout in Madrid, Spain's Chinatown
Academia & Polyglots collide (in a good way) : a Podcast episode review
Hebrew Scripts in Synogogue (Cordoba, Spain)
Burning question: language vocabulary stories
Aha moment: Writing Scripts are magical, mystical almost (more images in show notes)
Burning question: Should I include writing 

Tools: Color coding Chinese tones with Pleco app (June 15, 2017)
Aha Moment: Return on motivation w/ Chinese characters (June 14, 2017)
Chinese characters walkabout: Redtown, Shanghai, China (June 10, 2017)
Burning Question (June 13, 2017)
Update (June 9, 2017)
Chinese character walkabout: Hongqiao Train Station, Shanghai (June 8, 2017)
Chinese character walkabout : French Concession, Shanghai, China (June 8, 2017)
About this language learning channel and me! (June 6, 2017)
Update (June 5, 2017)
Tools: Chinese character tracing, the Zorro stroke & tracing app of truth (May 31, 2017)
Update (May 30, 2017)
Chinese characters walkabout: NOT letter B (is it called fu?) (May 29, 2017)
Update: Like character stress & impressing locals on the train (May 25, 2017)
Chinese character walkabout: Shanghai (May 21, 2017)
Update: Characters IRL & Anki flashcards (May 14, 2017)
Chinese language space issues & character matching survival skill (May 11, 2017)
Update: language learning failure (May 9, 2

In [9]:
print("The link to the first video is: " + results[-1][-1])
print("The link to the most recent video is: " + results[0][-1])

The link to the first video is: https://www.youtube.com/watch?v=_zYrd1JnU3E
The link to the most recent video is: https://www.youtube.com/watch?v=91aIIU5ZWZY


In [10]:
df = pd.DataFrame(results, columns = ["Date", "Title", "Rating", "View Count", "Duration", "Likes", "URL"])
df.head()

Unnamed: 0,Date,Title,Rating,View Count,Duration,Likes,URL
0,2017-12-22 08:35:37,Word boundaries in Chinese,0.0,16,00:04:41,0,https://www.youtube.com/watch?v=91aIIU5ZWZY
1,2017-12-15 05:33:44,Chinese and English word order comparison,0.0,22,00:16:30,0,https://www.youtube.com/watch?v=uK7LFu_Rvak
2,2017-12-13 14:37:04,The power of review with reading Chinese words,0.0,17,00:12:04,0,https://www.youtube.com/watch?v=LE-EvaQzWUE
3,2017-12-11 13:57:55,Adorable Chinese words (alternate title: logic...,5.0,39,00:04:08,1,https://www.youtube.com/watch?v=ZaxSyqghW5Y
4,2017-12-10 13:42:05,Rocket learning : Back on track with learning ...,5.0,18,00:06:48,1,https://www.youtube.com/watch?v=rVzMwgp2wk4


In [11]:
df.tail()

Unnamed: 0,Date,Title,Rating,View Count,Duration,Likes,URL
153,2017-05-21 05:58:54,"Chinese character walkabout: Shanghai (May 21,...",5.0,44,00:07:27,2,https://www.youtube.com/watch?v=Gw029m_5EUE
154,2017-05-14 04:39:09,Update: Characters IRL & Anki flashcards (May ...,0.0,35,00:12:00,0,https://www.youtube.com/watch?v=QKeBZmpdNe8
155,2017-05-11 11:54:15,Chinese language space issues & character matc...,5.0,22,00:26:37,1,https://www.youtube.com/watch?v=8DQ5muRI90M
156,2017-05-09 07:56:52,"Update: language learning failure (May 9, 2017)",5.0,31,00:11:29,2,https://www.youtube.com/watch?v=JHzjuRBczMI
157,2017-04-30 13:05:54,"Update: beginning strokes (April 30, 2017)",5.0,217,00:13:10,4,https://www.youtube.com/watch?v=_zYrd1JnU3E


In [12]:
least_viewed = df.sort_values(by='View Count')
least_viewed.head()

Unnamed: 0,Date,Title,Rating,View Count,Duration,Likes,URL
30,2017-10-19 06:21:17,Aha moment: when countries/languages collide,0.0,6,00:00:48,0,https://www.youtube.com/watch?v=ke7didLHqVs
19,2017-10-29 10:23:18,Burning question: Should I include writing scr...,0.0,6,00:01:15,0,https://www.youtube.com/watch?v=aGc1RtMPiCo
16,2017-11-01 20:50:03,"Hebrew Scripts in Synogogue (Cordoba, Spain)",0.0,6,00:02:11,0,https://www.youtube.com/watch?v=MvLaCaYsgHU
128,2017-06-25 11:01:01,Challenge: Chinese flashcard making marathon (...,0.0,6,00:09:31,0,https://www.youtube.com/watch?v=_CS6QdcT74M
23,2017-10-26 10:48:26,Multi-lingual tourism pondering,0.0,7,00:03:40,0,https://www.youtube.com/watch?v=WPz-Z_ET9W0


In [13]:
least_viewed_url = least_viewed.iloc[0, 6]
least_view_title = least_viewed.iloc[0,1]
print("The video with the fewest views is '{}' and it's link is: {}.".format(least_view_title, least_viewed_url))

The video with the fewest views is 'Aha moment: when countries/languages collide' and it's link is: https://www.youtube.com/watch?v=ke7didLHqVs.


In [14]:
message = "Learning #Chinese ? Me too! Check out my video '{}' here: {}".format(least_view_title, least_viewed_url)
print(message)

Learning #Chinese ? Me too! Check out my video 'Aha moment: when countries/languages collide' here: https://www.youtube.com/watch?v=ke7didLHqVs


In [15]:
from twython import Twython
from twitter_auth import (
    consumer_key, 
    consumer_secret,
    access_token, 
    access_token_secret
)

In [16]:
twitter = Twython(
    consumer_key,
    consumer_secret,
    access_token,
    access_token_secret)

In [17]:
twitter.update_status(status=message)

{'contributors': None,
 'coordinates': None,
 'created_at': 'Mon Dec 25 04:22:16 +0000 2017',
 'entities': {'hashtags': [{'indices': [9, 17], 'text': 'Chinese'}],
  'symbols': [],
  'urls': [{'display_url': 'youtube.com/watch?v=ke7did…',
    'expanded_url': 'https://www.youtube.com/watch?v=ke7didLHqVs',
    'indices': [100, 123],
    'url': 'https://t.co/iHxfmwvmaR'}],
  'user_mentions': []},
 'favorite_count': 0,
 'favorited': False,
 'geo': None,
 'id': 945147658636738560,
 'id_str': '945147658636738560',
 'in_reply_to_screen_name': None,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'is_quote_status': False,
 'lang': 'en',
 'place': None,
 'possibly_sensitive': False,
 'retweet_count': 0,
 'retweeted': False,
 'source': '<a href="https://github.com/evansimpson79" rel="nofollow">Auto_tweeter_youtube</a>',
 'text': "Learning #Chinese ? Me too! Check out my video 'Aha moment: when countries/languages