### Extract Comments and Video Information

In [None]:
import os
from dotenv import load_dotenv
import sys
sys.path.append('src')
import googleapiclient.discovery
import googleapiclient.errors
from ETL.extract.fetch_comments import getYoutubeComments
from ETL.extract.fetch_video_information import getVideos

api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = os.getenv("API_KEY")

video_IDs = ['video_IDs']
#'zxYjTTXc-J8', 'fMfipiV_17o', 'FM7Z-Xq8Drc', '1WEAJ-DFkHE', 'r7zJ8srwwjk', 'iogcY_4xGjo', '0e3GPea1Tyg','48h57PspBec','GLoeAJUcz38','9bqk6ZUsKyA'

youtube = googleapiclient.discovery.build(
    api_service_name,
    api_version,
    developerKey=DEVELOPER_KEY
)

youtubeVideoInfo = getVideos(youtube, video_IDs)

youtubeComments = []
for ID in video_IDs:
    youtubeComments.extend(getYoutubeComments(youtube, ID))


In [None]:
from ETL.transform.preprocess import toDataframeComments
from ETL.transform.preprocess import toDataFrameVideoInfo

videoinfo_df = toDataFrameVideoInfo(youtubeVideoInfo)
all_comments_df = toDataframeComments(youtubeComments)

In [None]:
all_comments_df.head()

In [None]:
videoinfo_df.head()

### Preporcessing, lowercasae, remove stopwords, punctuations, and tokenize

In [None]:
from ETL.transform.preprocess import preprocessText

all_comments_df_preprocessed = preprocessText(all_comments_df, 'Original_Comment_Text')

In [None]:
all_comments_df_preprocessed.head()

In [None]:
all_comments_df_preprocessed.shape

### Sentiment Analysis

In [None]:
from ETL.transform.sentiment_analysis import sentiment_analysis

sentiment_result = sentiment_analysis(all_comments_df_preprocessed, 'Original_Comment_Text')

In [None]:
sentiment_result.head()

### Topic Modelling

In [None]:
from ETL.transform.topic_modelling import create_bertopic_model

model, topics, probabilities, topic_modelling_df = create_bertopic_model(
    all_comments_df_preprocessed, 'No_Stopwords_Text', 'Comment_ID', 15
)

In [None]:
model.get_topic_info()

In [None]:
topic_modelling_df.head()

### Upload Data to AWS S3

In [None]:
from ETL.load.store_data import saveToS3

# saveToS3(all_comments_df_preprocessed, 'youtube-commets-info', 'YouTube_Comments.csv')
saveToS3(videoinfo_df, 'youtube-commets-info', 'Video_Info.csv')
saveToS3(sentiment_result, 'youtube-commets-info', 'Sentiment_Result.csv')
saveToS3(topic_modelling_df, 'youtube-commets-info', 'Topic_Modelling_Result.csv')

### Load data

In [None]:
from ETL.load.store_data import saveToJson
from ETL.load.store_data import saveToCsv

saveToCsv(all_comments_df_preprocessed, 'data/preprocess_comments.csv')
saveToCsv(videoinfo_df, 'data/video_information.csv')
saveToCsv(sentiment_result, 'data/sentiment_result.csv')
saveToCsv(topic_modelling_df, 'data/topic_modelling_result.csv')


### Visualize the results

In [None]:
from analyze.visualize import plotSentimentDistribution

plotSentimentDistribution(sentiment_result, 'Sentiment_Label')

In [None]:
from analyze.visualize import plotWordcloud

plotWordcloud(all_comments_df_preprocessed, 'No_Stopwords_Text')

In [None]:
from analyze.visualize import visualizeNgram
from analyze.visualize import ngramCount

visualizeNgram(ngramCount(all_comments_df_preprocessed, 'No_Stopwords_Text'), 'count')

In [None]:
threegram = ngramCount(all_comments_df_preprocessed, 'No_Stopwords_Text', 3)
visualizeNgram(threegram, 'count', 3)

In [None]:
from analyze.visualize import visualizeLikeCount

visualizeLikeCount(videoinfo_df)

In [None]:
from analyze.visualize import visualizeReplyCount

visualizeReplyCount(videoinfo_df)

In [None]:
from analyze.visualize import visualizeViewCount

visualizeViewCount(videoinfo_df)

In [None]:
from analyze.visualize import visualizeCommentPublishTrend
from analyze.visualize import resamplePublishTime

visualizeCommentPublishTrend(resamplePublishTime(all_comments_df_preprocessed))

In [None]:
from ETL.transform.preprocess import joinDataFrame
from analyze.visualize import visualizeSemtimentandRTopic

sentiment_model = joinDataFrame(topic_modelling_df, sentiment_result, 'Comment_ID')
visualizeSemtimentandRTopic(    
    sentiment_model,
    topic_column='Topic',
    sentiment_column='Sentiment_Label'
)
