In [1]:
# playing around with Youtube API to extract comments from videos about past US presidents' presidential inaugrations

In [2]:
# Import packages
import google.oauth2.credentials

from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

import os
import pickle
import pandas as pd

pd.options.mode.chained_assignment = None

In [3]:
# Client Secret File containing OAuth client ID
CLIENT_SECRET_FILE = r"C:\Users\leejak\Desktop\Projects\youtube-comments-scraper\client_secret.json"

In [4]:
# Specify the scope to the YouTube Data API, v3
# Link to API documentation: https://developers.google.com/identity/protocols/googlescopes#youtubev3
# This scrope can "See, edit, and permanently delete your YouTube videos, ratings, comments and captions"
SCOPES = ['https://www.googleapis.com/auth/youtube.force-ssl']
API_SERVICE_NAME = 'youtube'
API_VERSION = 'v3'

In [5]:
# Function to create and return the service that will interact with the API
def create_authenticated_service():
    credentials = None
    # Load pickle file containing cached credentials if exists in project directory 
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            credentials = pickle.load(token)
    #  Check if the credentials do not exist in project directory or are invalid
    if not credentials or not credentials.valid:
        # If the credentials exist in project directory and has expired, refresh credentials
        if credentials and credentials.expired and credentials.refresh_token:
            credentials.refresh(Request())
        # Else, run console to generate authorization URL
        else:
            flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, SCOPES)
            credentials = flow.run_console()
        # Write newly created credentials to pickle file
        with open('token.pickle', 'wb') as token:
            pickle.dump(credentials, token)
    # Create and return the service
    return build(API_SERVICE_NAME, API_VERSION, credentials=credentials)

In [6]:
# create service
os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
service = create_authenticated_service()

In [7]:
def get_comments(service, **kwargs):
    comments = []
    results = service.commentThreads().list(**kwargs).execute()
    while results:
        for item in results['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            comments.append(comment)
        # Check if another page exists
        if 'nextPageToken' in results:
            kwargs['pageToken'] = results['nextPageToken']
            results = service.commentThreads().list(**kwargs).execute()
        else:
            break
    return comments

In [8]:
# import file
videolinks_df = pd.read_csv('presidential_inauguration_youtube_ids.csv')

In [9]:
output_df = pd.DataFrame(columns=['video_id', 'video_subject'])
for index, row in videolinks_df.iterrows():
    video_id = row["video_id"]
    video_subject = row["video_subject"]
    comments = get_comments(service, part='snippet', videoId=video_id, textFormat='plainText')
    # dictionary of lists  
    temp_dict = {'video_id': video_id,
                 'video_subject': video_subject,
                 'comment': comments} 
    temp_df = pd.DataFrame(temp_dict)
    output_df = output_df.append(temp_df, ignore_index=True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


In [10]:
output_df.head(15)

Unnamed: 0,comment,video_id,video_subject
0,I feel the audio has been heavily edited for n...,SwenOlpbvTA,"Dwight D. Eisenhower, 1953"
1,"Hoje não se pode falar em Deus, ofende os filh...",SwenOlpbvTA,"Dwight D. Eisenhower, 1953"
2,DWIGHT D EISENHOWER (1953-1961) \n\nPros: agre...,SwenOlpbvTA,"Dwight D. Eisenhower, 1953"
3,Ike was a good President!,SwenOlpbvTA,"Dwight D. Eisenhower, 1953"
4,A WWII hero and great president.,SwenOlpbvTA,"Dwight D. Eisenhower, 1953"
5,Ask yourself how did America go from that! Bei...,SwenOlpbvTA,"Dwight D. Eisenhower, 1953"
6,A stirring inaugural address. Poignant words ...,SwenOlpbvTA,"Dwight D. Eisenhower, 1953"
7,President Gabel,SwenOlpbvTA,"Dwight D. Eisenhower, 1953"
8,Eisenhower really was a great President. Instr...,SwenOlpbvTA,"Dwight D. Eisenhower, 1953"
9,big president rest in peace Dwight Eisenhauer,SwenOlpbvTA,"Dwight D. Eisenhower, 1953"
