In [77]:
class Video:
    def __init__(self, id, title, desc, category_id, comments, tags, channel_title, view_count, comment_count) -> None:
        self.id = id
        self.title = title
        self.desc = desc
        self.category_id = category_id
        self.comments = comments
        self.tags = tags
        self.channel_title = channel_title
        self.view_count = view_count
        self.comment_count = comment_count
    
    def __str__(self):
        return f'{self.id},{self.title},desc,{self.category_id},comments,tags,{self.channel_title},{self.view_count},{self.comment_count}'


<a href='https://colab.research.google.com/github/cdwangco/SentimentAnalysisProject/blob/main/MLProjectYTSentimentAnalysis.ipynb' target='_parent'><img src='https://colab.research.google.com/assets/colab-badge.svg' alt='Open In Colab'/></a>

In [87]:
import requests, sys, time, os
output_dir = 'output/'
# Any characters to exclude, generally these are things that become problematic in CSV files
unsafe_characters = ['\n', '"']

# Used to identify columns, currently hardcoded order
header = ['video_id', 'title', 'desc', 'categoryId', 'comments', 'tags', 'channel_title',  'view_count', 'comment_count']
MAX_VIDEOS = 10
MAX_COMMENTS = 10

def setup(api_path, code_path):
    with open(api_path, 'r') as file:
        api_key = file.readline()

    with open(code_path) as file:
        country_codes = [x.rstrip() for x in file]

    return api_key, country_codes

def api_request(page_token, country_code):
    # Builds the URL and requests the JSON from it
    request_url = f'https://www.googleapis.com/youtube/v3/videos?part=id,statistics,snippet{page_token}chart=mostPopular&regionCode={country_code}&maxResults={MAX_VIDEOS}&key={api_key}'
    request = requests.get(request_url)
    if request.status_code == 429:
        print('Temp-Banned due to excess requests, please wait and continue later')
        sys.exit()

    video_data_page = request.json()
    res = video_data_page
    items = video_data_page.get('items', [])
    video_dict = {}
    for video in items:
        video_id = video['id']
        snippet = video['snippet']
        title = snippet.get('title','')
        desc = snippet.get('description', '')
        category_id = snippet.get('categoryId','')
        tags = snippet.get('tags', ['[none]'])
        statistics = video['statistics']
        view_count = statistics.get('viewCount', 0)
        comment_count = statistics['commentCount']
        channel_title = snippet.get('channelTitle','')
        
        request_url = f'https://www.googleapis.com/youtube/v3/commentThreads?key={api_key}&textFormat=plainText&part=snippet&videoId={video_id}&maxResults={MAX_COMMENTS}&pageToken={page_token}'
        comments_request = requests.get(request_url)
        comments_request = comments_request.json()
        comments_items = comments_request.get('items', [])
        comments = [c.get('snippet', {}).get('topLevelComment',{}).get('snippet', {}).get('textDisplay', '') for c in comments_items]
        video = Video(video_id, title, desc, category_id, comments, tags, channel_title, view_count, comment_count)
        video_dict[video_id] = video

    return video_dict

def write_to_file(country_code, country_data):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    with open(f'{output_dir}/{time.strftime("%y.%d.%m")}_{country_code}_videos.csv', 'w+', encoding='utf-8') as file:
        for row in country_data:
            file.write(f'{row}\n')


def get_video_data(should_write_to_file=False):
    for country_code in country_codes:
        result = api_request('&', country_code)
        results_list = list(result.values())
        country_data = [','.join(header)] + results_list
        if should_write_to_file:
            write_to_file(country_code, country_data)
    return result


key_path = 'api_key.txt'
country_codes = 'country_codes.txt'
api_key, country_codes = setup(key_path, country_codes)

videos = get_video_data()
for v in videos.values():
    print(videos[v.id].comments)

['Shout out to Spacehog for having "In the Meantime" as the theme song here!\nOne of the coolest tunes from 90\'s making quite a comeback!!!', 'Those 5 color....\n....\nMORPHIN TIME', '0:47 among us', "Please don't suck like the last one.", 'Let me guess, they kill Adam with a dance off.', 'Perfect Movies On 2023!', 'I like the part where Rocket said "It\'s Rockin\' time", and started Rocking out', '1:28 Adam Warlock?', 'I LOVE IT!', "not excited for this honestly, 1) did they seriously make an Among Us reference ( 0:49 )  2) Groot looks awful, a big Baby Groot head and Adult Groot body. It's just bad design and is really distracting"]
['Well this looks bad.', '😊 looking forward to this movie.', 'Holy shit can we just stop making sequels to dead franchises for five minutes', 'Crystal Skull: “I won’t be the worst Indy movie anymore”', 'But will they bring back Short Round?', 'please let this be as good as I want it to be', 'This is going to make Crystal Skull look good.', "It's going to