In [36]:
import os
import re
import datetime
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from unidecode import unidecode

DATA_DIR = "data"

def clean_text(text):
    cleaned_text = unidecode(text)
    cleaned_text = re.sub(r'[^\w\s]', '', cleaned_text)
    cleaned_text = re.sub(r'[\uD800-\uDBFF][\uDC00-\uDFFF]', '', cleaned_text)
    return cleaned_text.lower()

def get_comment_replies(youtube, parent_id):
    replies = []
    try:
        response = youtube.comments().list(
            part="snippet",
            parentId=parent_id,
            maxResults=100,
            textFormat="plainText"
        ).execute()
        for element in response["items"]:
            replies.append(element["snippet"]["textDisplay"])

    except HttpError as e:
        print(e)
    return replies

def save_video_comments(youtube, video_id, filename):
    video_info = youtube.videos().list(
        part="snippet",
        id=video_id
    ).execute()
    video_title = video_info["items"][0]["snippet"]["title"]
    published_at = video_info["items"][0]["snippet"]["publishedAt"]

    video_published_date = datetime.datetime.strptime(published_at, '%Y-%m-%dT%H:%M:%SZ')
    start_date = datetime.datetime(2022, 1, 1)
    end_date = datetime.datetime(2022, 12, 31, 23, 59, 59)  # Último segundo de 2022

    if start_date <= video_published_date <= end_date:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=100,
            textFormat="plainText"
        )

        try:
            response = request.execute()
            has_next = True
            page_idx = 0
            while has_next:
                page_idx += 1
                print(f"Page: {page_idx}")
                comments = []
                for item in response["items"]:
                    snippet = item["snippet"]
                    comment = snippet["topLevelComment"]
                    text = comment["snippet"]["textDisplay"]
                    comments.append(f"{video_title}: {text}")  # Adiciona o título do vídeo ao comentário
                    if snippet["totalReplyCount"] > 0:
                        replies = get_comment_replies(youtube, comment["id"])
                        comments.extend(replies)
                
                with open(os.path.join(DATA_DIR, filename), "a") as savefile:
                    cleaned_comments = [clean_text(comment) for comment in comments]
                    savefile.write("\n".join(cleaned_comments))

                if "nextPageToken" in response:
                    token = response["nextPageToken"]
                    response = youtube.commentThreads().list(
                        part="snippet",
                        videoId=video_id,
                        maxResults=100,
                        textFormat="plainText",
                        pageToken=token
                    ).execute()
                else:
                    has_next = False

        except HttpError as e:
            print(e)

if __name__ == '__main__':
    with open("apikey.txt") as apifile:
        api_key = apifile.read().strip()
    api_name = "youtube"
    api_version = "v3"
    playlist_url = "https://www.youtube.com/playlist?list=PLaE_mZALZ0V2E0lVJowee_oerd3OMvyJu"
    filename = "anitta.txt"

    youtube = build(api_name, api_version, developerKey=api_key)

    playlist_id = playlist_url.split("list=")[-1]
    request = youtube.playlistItems().list(
        part="snippet",
        playlistId=playlist_id,
        maxResults=50
    )
    response = request.execute()

    video_ids = []
    while "items" in response:
        for item in response["items"]:
            snippet = item["snippet"]
            video_ids.append(snippet["resourceId"]["videoId"])

        if "nextPageToken" in response:
            token = response["nextPageToken"]
            request = youtube.playlistItems().list(
                part="snippet",
                playlistId=playlist_id,
                maxResults=50,
                pageToken=token
            )
            response = request.execute()
        else:
            break

    for video_id in video_ids:
        save_video_comments(youtube, video_id, filename)

    with open(os.path.join(DATA_DIR, filename)) as commentsfile:
        comments = commentsfile.readlines()
    
    hashtag = ""  # Coloque a hashtag desejada aqui
    found = sum(1 for comment in comments if hashtag in comment.lower())
    print(f"Total de comentários: {len(comments)}\nCom {hashtag}: {found}")


IndexError: list index out of range