In [31]:
import dash
from dash import dcc
import dash_core_components as dcc
from dash import html
import dash_html_components as html
import plotly.express as px
from wordcloud import WordCloud
from googleapiclient.discovery import build
from jupyter_dash import JupyterDash
from dash.dependencies import Output, Input, State
from dash.exceptions import PreventUpdate
import joblib
import pandas as pd
from functools import lru_cache
from pythainlp.corpus.common import thai_stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import LinearSVC

model = joblib.load('text_svm.pkl')
vectorizer = joblib.load('vectorizer.pkl')

# Set up the Dash app
app = JupyterDash(__name__)

app.layout = html.Div(
    style={'text-align': 'center'},
    children=[
        html.H1("YouTube Comment Sentiment Analysis"),
        html.Div([
            html.Label("Enter a YouTube video ID:"),
            dcc.Input(id="video-id-input", type="text"),
            html.Button("Submit", id="submit-button"),
        ]),
        html.Div(
            style={'text-align': 'center'},
            children=
            [html.Label("Choose a word cloud to display:"),
            dcc.Dropdown(
                id='dropdown',
                options=[
                    {'label': 'All comments', 'value': 'all'},
                    {'label': 'Positive comments only', 'value': 'positive'},
                    {'label': 'Negative comments only', 'value': 'negative'}
                ],
                value='all'
            )
        ]),
        html.H2("YouTube Comment Word Cloud"),
        html.Div(id="output-div",
        children=html.Div(style={'display': 'flex', 'align-items': 'center', 'justify-content': 'center'})
            ),
        html.Div(
            style={'margin': 'auto'},
             children=[
                html.H3("Sentiment and Likes by Comment"),
                dcc.Graph(id='sunburst-chart', figure={})
                ]),
        html.H4('Time Series Plot'),
        dcc.Graph(id='time-series-plot', figure={}), # new graph component
    ])






@lru_cache(maxsize=1024)
def get_comments(video_id):
    # Scrape the comments for the specified video
    api_key = 'AIzaSyC5cwg9T2jF5GvVBCJIN-tcK7MMW1MnDS4'

    # Replace VIDEO_ID with the ID of the video you want to scrape comments from
    video_id = str(video_id)

    # Create a YouTube service object
    service = build('youtube', 'v3', developerKey=api_key)

    data = []
    max_results = 1000
    next_page_token = ''

    while True:
        # Call the YouTube API to retrieve comments for the video
        request = service.commentThreads().list(
            part='snippet',
            videoId=video_id,
            textFormat='plainText',
            maxResults=max_results,
            pageToken=next_page_token)
        response = request.execute()

        # Iterate over the response items
        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']
            data.append({
                'text': comment['textDisplay'],
                'author': comment['authorDisplayName'],
                'likes': comment['likeCount'],
                'publishedAt': comment['publishedAt']})

        # Check if there is a next page of results
        if 'nextPageToken' in response:
            next_page_token = response['nextPageToken']
        else:
            break

    df = pd.DataFrame(data)

    # Process the comments to extract the text and clean it up
    stop_words = thai_stopwords()
    df['text'] = df['text'].apply(lambda x: ' '.join([word for word in x.split() if word not in stop_words]))

    # Classify the comments as positive or negative
    X = vectorizer.transform(df['text'])
    predictions = model.predict(X)
    df['sentiment'] = predictions

    # Create dataframes for positive and negative comments
    positive_df = df.loc[df['sentiment'] == 'pos']
    negative_df = df.loc[df['sentiment'] == 'neg']

    return df, positive_df, negative_df

#wordcloud
@app.callback(
    Output("output-div", "children"),
    [Input("submit-button", "n_clicks"), Input("dropdown", "value")],
    [State("video-id-input", "value")],
)
def update_output(n_clicks, dropdown_value, video_id):
    if not n_clicks:
        raise PreventUpdate
    else:
        df, positive_df, negative_df = get_comments(video_id)

        # Generate word clouds
        reg = r"[ก-๙a-zA-Z']+"
        fp = "THSarabunNew.ttf"
        all_wordcloud = WordCloud(background_color = 'white', max_words=1000, height = 500, width=800, font_path=fp, regexp=reg).generate(" ".join(df['text']))
        positive_wordcloud = WordCloud(background_color = 'white', max_words=1000, height = 500, width=800, font_path=fp, regexp=reg).generate(" ".join(positive_df['text']))
        negative_wordcloud = WordCloud(background_color = 'white', max_words=1000, height = 500, width=800, font_path=fp, regexp=reg).generate(" ".join(negative_df['text']))

        if dropdown_value == 'all':
            return html.Img(src=all_wordcloud.to_image())
        elif dropdown_value == 'positive':
            return html.Img(src=positive_wordcloud.to_image())
        elif dropdown_value == 'negative':
            return html.Img(src=negative_wordcloud.to_image())
#sunburst
@app.callback(
    Output("sunburst-chart", "figure"),
    [Input("submit-button", "n_clicks"), Input("dropdown", "value")],
    [State("video-id-input", "value")],
)
def update_sunburst_chart(n_clicks, dropdown_value, video_id):
    if not n_clicks:
        raise PreventUpdate
    else:
        df, positive_df, negative_df = get_comments(video_id)

        if dropdown_value == 'all':
            df_plot = df
        elif dropdown_value == 'positive':
            df_plot = positive_df
        elif dropdown_value == 'negative':
            df_plot = negative_df

        # Create the sunburst chart
        fig = px.sunburst(df_plot, path=['sentiment', 'likes', 'text'], values='likes', height = 1000, width=1500,color='sentiment',
        color_discrete_map={'pos':'rgb(136,204,238)', 'neg':'rgb(251,128,114)'})
        
        return fig
#line plot
@app.callback(
    Output('time-series-plot', 'figure'),
    [Input('video-id-input', 'value')]
)
def update_time_series(video_id):
    if video_id is None:
        raise PreventUpdate
    else:
        df, positive_df, negative_df = get_comments(video_id)
        positive_df['publishedAt'] = pd.to_datetime(positive_df['publishedAt'])
        negative_df['publishedAt'] = pd.to_datetime(negative_df['publishedAt'])
        positive_df['month'] = positive_df['publishedAt'].apply(lambda x: x.strftime("%Y-%m"))
        negative_df['month'] = negative_df['publishedAt'].apply(lambda x: x.strftime("%Y-%m"))
        positive_monthly_summary = positive_df.groupby('month').agg({'sentiment': 'count'})
        negative_monthly_summary = negative_df.groupby('month').agg({'sentiment': 'count'})
        positive_monthly_summary.columns = ['positive_comment_count']
        negative_monthly_summary.columns = ['negative_comment_count']
        monthly_summary = pd.merge(positive_monthly_summary, negative_monthly_summary, left_index=True, right_index=True)
        monthly_summary['positive_cumsum'] = monthly_summary['positive_comment_count'].cumsum()
        monthly_summary['negative_cumsum'] = monthly_summary['negative_comment_count'].cumsum()

        # create the plotly figure
        fig = px.line(monthly_summary, y=['positive_cumsum', 'negative_cumsum'])
        fig.update_layout(xaxis_title="Month",yaxis_title="comment count")
        

        return fig


if __name__ == '__main__':
    app.run_server(debug=True,port=8052)

Dash app running on http://127.0.0.1:8052/




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/