## **Q1**

In [None]:
import os
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from gensim import corpora, models
from googleapiclient.discovery import build
nltk.download('stopwords')
nltk.download('punkt')

# Function to preprocess comments
def preprocess_comments(comments):
    # Tokenize comments
    tokenized_comments = [word_tokenize(comment.lower()) for comment in comments]

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_comments = [[word for word in comment if word not in stop_words] for comment in tokenized_comments]

    # Lemmatize words
    lemmatizer = WordNetLemmatizer()
    lemmatized_comments = [[lemmatizer.lemmatize(word) for word in comment] for comment in filtered_comments]

    return lemmatized_comments

# Function to perform topic modeling
def perform_topic_modeling(comments):
    dictionary = corpora.Dictionary(comments)
    corpus = [dictionary.doc2bow(comment) for comment in comments]
    lda_model = models.LdaModel(corpus, num_topics=5, id2word=dictionary, passes=10)
    topics = lda_model.print_topics(num_words=5)

    return topics

# Function to authenticate and retrieve comments using YouTube Data API
def get_video_comments(api_key, video_id):
    youtube = build('youtube', 'v3', developerKey=api_key)

    comments = []
    nextPageToken = None

    while True:
        response = youtube.commentThreads().list(
            part='snippet',
            videoId=video_id,
            textFormat='plainText',
            pageToken=nextPageToken,
            maxResults=100
        ).execute()

        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            comments.append(comment)

        if 'nextPageToken' in response:
            nextPageToken = response['nextPageToken']
        else:
            break

    return comments

# Get YouTube video URL and API key from user
video_url = input("Enter the YouTube video URL: ")
api_key = input("Enter your YouTube Data API key: ")

# Extract video ID from the URL
video_id = video_url.split('v=')[-1]

# Retrieve comments using YouTube Data API
comments = get_video_comments(api_key, video_id)

# Store comments in a CSV file
df = pd.DataFrame(comments, columns=['Comment'])
df.to_csv('youtube_comments.csv', index=False)

# Preprocess comments
preprocessed_comments = preprocess_comments(comments)

# Perform topic modeling
topics = perform_topic_modeling(preprocessed_comments)

# Print the identified topics
for topic in topics:
    print(topic)