# Imports / Global Setup

In [None]:
import pandas as pd
import json
import os
import json
import pandas as pd
import time
from PIL import Image
import requests
from io import BytesIO
import numpy as np
from datetime import datetime
import dateutil.relativedelta
from dateutil.parser import parse
from scipy import stats
import ast
import sys

import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors

In [None]:
# load global vars 
# These should all be provided in run.py using only the config file 
with open("../../api_key.json") as json_file:
    cred = json.load(json_file)
api_keys = cred['api_keys'] # change this
api_service_name = "youtube"
api_version = "v3"
out_fp = "../../data/test/fortnite/video_data/fortnite_full_metadata.csv"
master_dic_write_fp = "../../data/local/fortnite/video_data/fortnite_requests.json"
init_data_fp = "../../data/test/fortnite/video_data/scrape_04_16_20.json"
game_title = "fortnite"
master_dic_fp = "../../data/local/fortnite/video_data/fortnite_requests.json"

# Library Functions

In [None]:
def check_vid_game(vid_stats, game_title):
    game_title = game_title.lower()
    try:
        tags = vid_stats['tags']
    except:
        tags = []
    try:
        title = vid_stats['title'].lower()
    except:
        title = np.nan
    try:
        description = vid_stats['description'].lower()
    except:
        description = np.nan
    if type(tags) == float:
        tags = []
    if type(title) == float:
        title = ""
    if type(description) == float:
        description = "" 
    if game_title in title or game_title in description:
        return True
    else:
        for tag in tags:
            if game_title in tag.lower():
                return True
    return False



def generate_metadata(master_dic, data, game_title, api_keys, api_service_name, api_version):
    all_metadata = pd.DataFrame()
    progress_count = 0
    for searched_vid in data['data']:
        if progress_count % 10 == 0:
            print("Progress:",progress_count,"of",len(data['data']))
        channel_game_vids = []
        for channel_vid in searched_vid['channel_videos']:
            if channel_vid in master_dic.keys():
                cur_vid_details = master_dic[channel_vid]
                cur_vid_stats = get_vid_stats(cur_vid_details)
                if check_vid_game(cur_vid_stats, game_title):
                    channel_game_vids.append(cur_vid_stats)
            else:
                # TODO: Handle missing / incorrect API key
                try:
                    api_key = api_keys[api_idx]
                    api_idx += 1
                    if api_idx == len(api_keys):
                        api_idx = 0
                    cur_vid_details = request_video_details(channel_vid,
                                                            api_key,
                                                            api_service_name,
                                                            api_version)
                    if len(cur_vid_details['items']) == 0:
                        cur_vid_details = {}
                    else:
                        cur_vid_details = cur_vid_details['items'][0]
                    master_dic[channel_vid] = cur_vid_details
                    cur_vid_stats = get_vid_stats(cur_vid_details)
                    if check_vid_game(cur_vid_stats, game_title):
                        channel_game_vids.append(cur_vid_stats)
                except:
                    print("Video was not in local storage and there was a problem scraping:")
                    print(sys.exc_info()[0])
                    raise
        cur_metadata = pd.DataFrame(channel_game_vids)
        cur_metadata['tags'] = cur_metadata['tags'].apply(lambda x: str(x))
        cur_metadata['thumbnails'] = cur_metadata['thumbnails'].apply(lambda x: str(x))
        cur_metadata['z_views'] = stats.zscore(cur_metadata['viewCount'])
        cur_metadata['z_likes'] = stats.zscore(cur_metadata['likeCount'])
        cur_metadata['z_dislikes'] = stats.zscore(cur_metadata['dislikeCount'])
        cur_metadata['z_comments'] = stats.zscore(cur_metadata['commentCount'])
        all_metadata = pd.concat([all_metadata,cur_metadata],sort=True).reset_index(drop=True)
        progress_count += 1
    unique_metadata = all_metadata.drop_duplicates().reset_index(drop=True)
    return unique_metadata

def generate_search_result_df(unique_metadata,data):
    out_data = []
    for searched_vid in data['data']:
        if searched_vid['video_id'] in unique_metadata['videoId'].values:
            vid_stats = unique_metadata[unique_metadata['videoId'] == searched_vid['video_id']].iloc[0]
            vid_stats['position'] = searched_vid['position']
            out_data.append(vid_stats)
        else:
            all_nans = unique_metadata.iloc[0].apply(lambda x: np.nan)
            all_nans['videoId'] = searched_vid['video_id']
            all_nans['position'] = searched_vid['position']
            out_data.append(all_nans)
    out_df = pd.DataFrame(out_data).reset_index(drop=True)
    return out_df

def get_vid_stats(vid):
    try:
        vid_id = vid['id']
    except:
        vid_id = np.nan
    try:
        channel_id = vid['snippet']['channelId']
    except:
        channel_id = np.nan
    try:
        channel_title = vid['snippet']['channelTitle']
    except:
        channel_title = np.nan
    try:
        thumbnail_links = vid['snippet']['thumbnails']
    except:
        thumbnail_links = np.nan
    try:
        title = vid['snippet']['title']
    except:
        title = np.nan
    try:
        language = vid['snippet']['defaultAudioLanguage']
    except:
        language = np.nan
    try:
        date = parse(vid['snippet']['publishedAt'])
    except:
        date = np.nan
    try:
        duration = vid['contentDetails']['duration']
    except:
        duration = np.nan
    try:
        views = vid['statistics']['viewCount']
    except:
        views = np.nan
    try:
        likes = vid['statistics']['likeCount']
    except:
        likes = np.nan
    try:
        dislikes = vid['statistics']['dislikeCount']
    except:
        dislikes = np.nan
    try:
        comments = vid['statistics']['commentCount']
    except:
        comments = np.nan
    try:
        favorites = vid['statistics']['favoriteCount']
    except:
        favorites = np.nan
    try:
        description = vid['snippet']['description']
    except:
        description = np.nan
    try:
        tags = vid['snippet']['tags']
    except:
        tags = np.nan
    try:
        cat_id = vid['snippet']['categoryId']
    except:
        cat_id = np.nan
    stats = {"videoId": vid_id,
             "channelId":channel_id,
             "channelTitle":channel_title,
             "thumbnails":thumbnail_links,
             "title":title,
             "date":date,
             "duration": duration,
             "viewCount":float(views),
             "likeCount":float(likes),
             "dislikeCount":float(dislikes),
             "commentCount":float(comments),
             "favoriteCount":float(favorites),
             "tags":tags,
             "defaultLanguage":language,
             "categoryId":float(cat_id),
             "description": description}
    return stats

def init_master_dic(dic_fp):
    if dic_fp == None:
        return {}
    elif not os.path.exists(dic_fp):
        print("Requests Dictionary path does not exist. If you do not have a local requests dic, enter None")
        raise
    with open(dic_fp) as json_file:
        out_dic = json.load(json_file)
    return out_dic
    
    
def metadata_main(api_keys, api_service_name, api_version,
                  out_fp, master_dic_write_fp, 
                  init_data_fp, game_title, master_dic_fp):
    
    master_dic = init_master_dic(master_dic_fp)
    with open(init_data_fp) as json_file:
        data = json.load(json_file)
        
    all_metadata = generate_metadata(master_dic, data, game_title, api_keys, api_service_name, api_version)
    
    if len(master_dic.keys()) > 0:
        save_requests_dic(master_dic_write_fp, master_dic)
        
    out_df = generate_search_result_df(all_metadata, data)
    out_df.to_csv(out_fp,index=False)
    print("Metadata Saved at: " + out_fp)

def request_video_details(video_id, api_key, api_service_name, api_version):
    """API cost of 7"""
    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, developerKey=api_key)
    # note that this uses youtube.videos instead of youtube.search
    request = youtube.videos().list(
        part="snippet,statistics,contentDetails",
        id=video_id
    )
    response = request.execute()
    return response

def save_requests_dic(fp, data):
    with open(fp,"w") as json_file:
        json.dump(data, json_file)
    print("API Requests logged locally at: " + fp)

# Full Test Run

In [None]:
metadata_main(api_keys, api_service_name, api_version, 
              out_fp, master_dic_write_fp, init_data_fp, 
              game_title, master_dic_fp)

# Work in Progress

In [None]:
# placeholder

# Old / Test Code

In [None]:
# with open("../../api_key.json") as json_file:
#     cred = json.load(json_file)
# cwynne_api_key = cred['api_key']
# api_keys = [cwynne_api_key]
# api_service_name = "youtube"
# api_version = "v3"
# api_idx = 0

In [None]:
# unique_metadata = generate_metadata(master_dic, data, api_keys, api_service_name, api_version)

In [None]:
# out_df.to_csv("../../../fortnite_full_metadata.csv",index=False)

In [None]:
# # save current master dictionary to local storage
# save_requests_dic(requests_local_fp, master_dic)

In [None]:
# with open("../../api_key.json") as json_file:
#     cred = json.load(json_file)
# api_key = cred['api_key']
# api_service_name = "youtube"
# api_version = "v3"
# game_title = "fortnite"

# requests_local_fp = "../../../youtube_videos.json"
# if os.path.exists(requests_local_fp):
#     with open(requests_local_fp) as json_file:
#         master_dic = json.load(json_file)
        
# init_data_fp = "../../data/out/fortnite/scrape_04_16_20.json"
# with open(init_data_fp) as json_file:
#     data = json.load(json_file)

In [None]:
# cur_vid_details = request_video_details("rGiNqQnNNYQ",
#                                                     api_key,
#                                                     api_service_name,
#                                                     api_version)

In [None]:
# response = request_video_details("jAfLfd_EmtM",api_key, api_service_name, api_version)

In [None]:
# response

In [None]:
# sample_vals = data['data'][0]
# sample = {"data": [sample_vals]}

In [None]:
# sample_vid = sample_vals['channel_videos'][0]
# sample_vid

In [None]:
# db_path = "../../../" + game_title + "_metadata_master.csv"
# # master_dic = {}

In [None]:
# def check_vid_game(vid_stats, game_title):
#     game_title = game_title.lower()
#     try:
#         tags = vid_stats['tags']
#     except:
#         tags = []
#     try:
#         title = vid_stats['title'].lower()
#     except:
#         title = np.nan
#     try:
#         description = vid_stats['description'].lower()
#     except:
#         description = np.nan
#     if type(tags) == float:
#         tags = []
#     if type(title) == float:
#         title = ""
#     if type(description) == float:
#         description = "" 
#     if game_title in title or game_title in description:
#         return True
#     else:
#         for tag in tags:
#             if game_title in tag.lower():
#                 return True
#     return False

# def request_video_details(video_id, api_key, api_service_name, api_version):
#     """API cost of 7"""
#     youtube = googleapiclient.discovery.build(
#         api_service_name, api_version, developerKey=api_key)
#     # note that this uses youtube.videos instead of youtube.search
#     request = youtube.videos().list(
#         part="snippet,statistics",
#         id=video_id
#     )
#     response = request.execute()
#     return response

# def get_vid_stats(vid):
#     try:
#         channel_id = vid['snippet']['channelId']
#     except:
#         channel_id = np.nan
#     try:
#         channel_title = vid['snippet']['channelTitle']
#     except:
#         channel_title = np.nan
#     try:
#         thumbnail_links = vid['snippet']['thumbnails']
#     except:
#         thumbnail_links = np.nan
#     try:
#         title = vid['snippet']['title']
#     except:
#         title = np.nan
#     try:
#         language = vid['snippet']['defaultAudioLanguage']
#     except:
#         language = np.nan
#     try:
#         date = parse(vid['snippet']['publishedAt'])
#     except:
#         date = np.nan
#     try:
#         views = vid['statistics']['viewCount']
#     except:
#         views = np.nan
#     try:
#         likes = vid['statistics']['likeCount']
#     except:
#         likes = np.nan
#     try:
#         dislikes = vid['statistics']['dislikeCount']
#     except:
#         dislikes = np.nan
#     try:
#         comments = vid['statistics']['commentCount']
#     except:
#         comments = np.nan
#     try:
#         favorites = vid['statistics']['favoriteCount']
#     except:
#         favorites = np.nan
#     try:
#         description = vid['snippet']['description']
#     except:
#         description = np.nan
#     try:
#         tags = vid['snippet']['tags']
#     except:
#         tags = np.nan
#     try:
#         cat_id = vid['snippet']['categoryId']
#     except:
#         cat_id = np.nan
#     stats = {"channelId":channel_id,
#              "channelTitle":channel_title,
#              "thumbnails":thumbnail_links,
#              "title":title,
#              "date":date,
#              "viewCount":float(views),
#              "likeCount":float(likes),
#              "dislikeCount":float(dislikes),
#              "commentCount":float(comments),
#              "favoriteCount":float(favorites),
#              "tags":tags,
#              "defaultLanguage":language,
#              "categoryId":float(cat_id),
#              "description": description}
#     return stats

# def save_requests_dic(fp, data):
#     with open(fp,"w") as json_file:
#         json.dump(data, json_file)

In [None]:
# response = request_video_details(sample_vid, api_key, api_service_name, api_version)['items'][0]

In [None]:
# to_df = [get_vid_stats(response)]
# pd.DataFrame(to_df)

In [None]:
# check_vid_game(to_df[0],"fortnite")

In [None]:
# meta = pd.read_csv("../../../fortnite_metadata.csv")
# meta.head()

In [None]:
# def initialize_video_db(db_path):
#     if os.path.exists(db_path):
#         return pd.read_csv(db_path)
#     else:
#         return pd.DataFrame()