In [107]:
class Video:
    def __init__(self, id, title, desc, category_id, comments, tags, channel_title, view_count, comment_count) -> None:
        self.id = id
        self.title = title
        self.desc = desc
        self.category_id = int(category_id)
        self.comments = comments
        self.tags = tags
        self.channel_title = channel_title
        self.view_count = view_count
        self.comment_count = comment_count
    
    def __str__(self):
        return f'{self.id},{self.title},{self.desc[:10]},{self.category_id},{len(self.comments)},{len(self.tags)},{self.channel_title},{self.view_count},{self.comment_count}'


<a href='https://colab.research.google.com/github/cdwangco/SentimentAnalysisProject/blob/main/MLProjectYTSentimentAnalysis.ipynb' target='_parent'><img src='https://colab.research.google.com/assets/colab-badge.svg' alt='Open In Colab'/></a>

In [108]:
import requests, sys, time, os

key_path = 'api_key.txt'
output_dir = 'output/'

header = ['video_id', 'title', 'desc', 'categoryId', 'comments', 'tags', 'channel_title',  'view_count', 'comment_count']
MAX_VIDEOS = 100
MAX_COMMENTS = 100

def api_request(page_token, api_key):
    # Builds the URL and requests the JSON from it
    country_code = 'US'
    chart = 'mostPopular'
    request_url = f'https://www.googleapis.com/youtube/v3/videos?part=id,statistics,snippet{page_token}chart={chart}&regionCode={country_code}&maxResults={MAX_VIDEOS}&key={api_key}'
    request = requests.get(request_url)
    if request.status_code == 429:
        print('Temp-Banned due to excess requests, please wait and continue later')
        sys.exit()

    video_data_page = request.json()
    res = video_data_page
    items = video_data_page.get('items', [])
    video_dict = {}
    for video in items:
        video_id = video['id']
        snippet = video['snippet']
        title = snippet.get('title','')
        desc = snippet.get('description', '')
        category_id = snippet.get('categoryId','')
        tags = snippet.get('tags', ['[none]'])
        statistics = video['statistics']
        view_count = statistics.get('viewCount', 0)
        comment_count = statistics['commentCount']
        channel_title = snippet.get('channelTitle','')
        
        request_url = f'https://www.googleapis.com/youtube/v3/commentThreads?key={api_key}&textFormat=plainText&part=snippet&videoId={video_id}&maxResults={MAX_COMMENTS}&pageToken={page_token}'
        comments_request = requests.get(request_url)
        comments_request = comments_request.json()
        comments_items = comments_request.get('items', [])
        comments = [c.get('snippet', {}).get('topLevelComment',{}).get('snippet', {}).get('textDisplay', '') for c in comments_items]
        video = Video(video_id, title, desc, category_id, comments, tags, channel_title, view_count, comment_count)
        video_dict[video_id] = video

    return video_dict

def write_to_file(country_data):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    with open(f'{output_dir}/{time.strftime("%y.%d.%m")}_videos.csv', 'w+', encoding='utf-8') as file:
        for row in country_data:
            file.write(f'{row}\n')


def get_video_data(should_write_to_file=False):
    api_key = ""
    with open(key_path, 'r') as file:
        api_key = file.readline()
    result = api_request('&', api_key)
    results_list = list(result.values())
    country_data = [','.join(header)] + results_list
    if should_write_to_file:
        write_to_file(country_data)
    return result


{'u3V5KDHRQvk': <__main__.Video at 0x7fda7712ddc0>,
 'ZfVYgWYaHmE': <__main__.Video at 0x7fda7712d5b0>,
 '2IwhkJ0XzRE': <__main__.Video at 0x7fda74a99880>,
 'js-bcssOQyo': <__main__.Video at 0x7fda76740e20>,
 'u18be_kRmC0': <__main__.Video at 0x7fda76021d90>,
 'RcTLBsXvzQk': <__main__.Video at 0x7fda7624b9d0>,
 'EPWrVyyd3U4': <__main__.Video at 0x7fda7378ee80>,
 'mkHQDPch6fo': <__main__.Video at 0x7fda76b288b0>,
 'CKg3FV5gwMc': <__main__.Video at 0x7fda75d747c0>,
 'qyi1DaFZzXQ': <__main__.Video at 0x7fda75d74d90>}