In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import youtubecollector as ytc
import pandas as pd
from tqdm import tqdm_notebook as tqdm

## Youtube client setup

In [None]:
youtube_client = ytc.youtube_client.create_youtube_client("./api.conf")

## Channel Seed
The pipeline starts with a list of channels for which all videos are checked, for which all comments, recommendations and captations are collected

In [None]:
channel_seed_filename = "input/seeds_right.csv"
channel_outputfile = "output/channels_right.csv"

In [None]:
channel_seed_df = pd.read_csv(channel_seed_filename)

channels = ytc.channels.get_channels(channel_seed_df.loc[0:], youtube_client)

ytc.channels.write_channels(channels, channel_outputfile)

## Videos

In [None]:
video_output_file = "output/videos_right1.csv"

In [None]:
channels = channels[0:1]

In [None]:
all_videos = list()

for channel in tqdm(channels):
    response = ytc.video.get_videos(channel.channel_uploads, youtube_client)
    next_page_token = response.get('nextPageToken')
    videos = ytc.video.convert_to_videos(response, youtube_client)
    all_videos.extend(videos)
    ytc.video.write_videos(videos, video_output_file)
    
    while next_page_token:
        response = ytc.video.get_more_videos(channel.channel_uploads, youtube_client, next_page_token)            
        next_page_token = response.get('nextPageToken')
        videos = ytc.video.convert_to_videos(response, youtube_client)
        all_videos.extend(videos)
        ytc.video.write_videos(videos, video_output_file)

## Comments

In [None]:
comments_output_file = "output/comments_right1.csv"

In [None]:
all_videos = all_videos[1015:]

all_comments = list()
for video in tqdm(all_videos):
    response = ytc.comments.get_comments(video.video_id, youtube_client)
    comments = ytc.comments.convert_to_comments(response)
    all_comments.extend(comments)
    ytc.comments.write_comments(comments_output_file, comments)
    try:
        next_page_token = response.get('nextPageToken')
    except AttributeError:
        continue
    
    while next_page_token:
        response = ytc.comments.get_more_comments(video.video_id, youtube_client, next_page_token)
        try:
            next_page_token = response.get('nextPageToken')                       
        except AttributeError:
            continue
        comments = ytc.comments.convert_to_comments(response)
        all_comments.extend(comments)
        ytc.comments.write_comments(comments_output_file, comments)
    
    

## Recommendations

In [None]:
recommendations_output_file = "output/recommendations_right1.csv"

In [None]:
video_to_recommendations = dict()
for video in tqdm(all_videos, ):
    try:
        response = ytc.recommendations.get_recommendations(video.video_id, youtube_client)
    except rateLimitExceeded:
        youtube_client = ytc.youtube_client.create_youtube_client("./api.conf")
        pass
    
    recommendations = ytc.recommendations.convert_to_recommendations(response, video.video_id)
    video_to_recommendations[video.video_id]=recommendations
    
    ytc.recommendations.write_recommendations(recommendations_output_file, recommendations)
    

## Transcripts

In [None]:
transcripts_output_file = "/home/dim/Documents/projecten/extremisme/youtube/yt/YouTubeExtremism/DataCollection/transcripts_right1.csv"

In [None]:
ytc.transcripts.get_captions(all_videos)

In [None]:
video_id_transcripts = ytc.transcripts.extract_transcripts("./*.vtt")

ytc.transcripts.write_transcripts(transcripts_output_file, video_id_transcripts)

In [None]:
import shutil
import glob
import os


for filename in glob.glob('/home/dim/Documents/projecten/extremisme/youtube/yt/YouTubeExtremism/DataCollection/*vtt'):
        os.remove(filename)