# Plot trailer comments (on App)

This notebook simulate when a user input a movie name on the App,
how to fetch the trailers and trailer comments.
- If the movie exist in Data folder, directly fetch the trailer comments locally.
- If not, use Youtube Data API to search and download the trailer comments.

In [1]:
import os
import pandas as pd
import time
import json
import matplotlib.pyplot as plt

from textblob import TextBlob

import chart_studio
import chart_studio.plotly as py
import plotly.graph_objects as go

from mojo_api import Movie
from youtube_api import *
from data_util import *
from model_util import *

# for auto reload a module after editing.
%load_ext autoreload
%autoreload 2

In [2]:
# prepare plotly api key
with open('./private_api_key/plotly_API_key.txt', 'r') as f:
    s = f.read().split('\n')
chart_studio.tools.set_credentials_file(username=s[0], api_key=s[1])

cache_file = "./data/movie_2014-2019.csv"
trailer_list_file = "./data/trailer_list/trailer_list_updated.csv"
trailer_comments_dir = "./data/trailer_comments"
cache_df = pd.read_csv(cache_file)
trailer_list_df = pd.read_csv(trailer_list_file)

In [3]:
# An example
user_movie = "Avengers: Endgame"
movie = Movie()
find_movie = movie.get_app_movie_info(user_movie)

In [4]:
# get trailer list and trailer comments
trailer_list = trailer_list_df[(trailer_list_df.tt_id == movie.tt_id) & \
                               (trailer_list_df.comment_disabled == False)].trailer_id.to_list() 
if len(trailer_list) == 0:
    print("Sorry, trailers not found for {}.".format(movie.title))
else:
    print(trailer_list)
    comment_df = pd.DataFrame()
    for video_id in trailer_list:
        comment_file = os.path.join(trailer_comments_dir, video_id + ".csv")
        tr_df = pd.read_csv(comment_file)
        tr_df = tr_df[tr_df["used_for_visualization"] == True]    
        comment_df = comment_df.append(tr_df, ignore_index=True)

['TcMBFSGVi1c', 'hA6hldpSTF8']


In [5]:
# process trailer comments
comment_df = comment_df[comment_df["used_for_visualization"] == True]    
comment_df.loc[:, 'datetime'] = pd.to_datetime(comment_df.loc[:, 'datetime'], format="%Y-%m-%d %H:%M:%S")
comment_df.loc[:, 'Year-Month-Day'] = comment_df.loc[:, 'datetime'].apply(lambda x: "{:d}-{:02d}-{:02d}".format(x.year, x.month, x.day))
threshold = 0.2
comment_df.loc[:, 'isPos'] = comment_df.loc[:, 'sentiment_score'].apply(lambda x: int(x > threshold))
comment_df.loc[:, 'isNeg'] = comment_df.loc[:, 'sentiment_score'].apply(lambda x: int(x < -threshold))

In [6]:
comment_df_agg = comment_df.groupby(['Year-Month-Day']).agg({
                "datetime": "first",
                "text": "count",
                "isPos": "sum",
                "isNeg": "sum",
                "used_for_model": "first"
            })
comment_df_agg['pos_ratio'] = comment_df_agg['isPos'] / comment_df_agg['text']
comment_df_agg['neg_ratio'] = comment_df_agg['isNeg'] / comment_df_agg['text']
rolling_window = 7
comment_df_agg['pos_ratio_roll'] = comment_df_agg['pos_ratio'].rolling(rolling_window).mean()
comment_df_agg['neg_ratio_roll'] = comment_df_agg['neg_ratio'].rolling(rolling_window).mean()

comment_df_agg = comment_df_agg.sort_values('datetime')
comment_df_agg = comment_df_agg.set_index("datetime", drop=True)[['text', 'pos_ratio', 'neg_ratio', 'pos_ratio_roll', 'neg_ratio_roll', 'used_for_model']]

x_start = comment_df_agg.index[0]
x_end = comment_df_agg.index[-1]
x_open = comment_df_agg[comment_df_agg.used_for_model == True].index[-1]  # movie release date.
y2_max = max(comment_df_agg.pos_ratio_roll.max()+0.1 , comment_df_agg.neg_ratio_roll.max()+0.1, 0.5)

comment_df_agg

Unnamed: 0_level_0,text,pos_ratio,neg_ratio,pos_ratio_roll,neg_ratio_roll,used_for_model
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-12-07 13:01:42,156348,0.184121,0.055006,,,True
2018-12-08 00:00:01,32082,0.193348,0.065177,,,True
2018-12-09 00:00:06,9830,0.189318,0.068464,,,True
2018-12-10 00:00:13,5094,0.194543,0.066549,,,True
2018-12-11 00:00:07,2743,0.194313,0.069996,,,True
...,...,...,...,...,...,...
2019-07-20 00:33:08,54,0.296296,0.074074,0.250449,0.068366,False
2019-07-21 00:06:57,175,0.137143,0.051429,0.222422,0.072539,False
2019-07-22 00:10:28,91,0.164835,0.076923,0.220573,0.074004,False
2019-07-23 00:10:56,79,0.227848,0.037975,0.213256,0.069462,False


In [7]:
# plot comments sentiment
trace_cnt = go.Scatter(x=comment_df_agg.index, y=comment_df_agg['text'], name='Comment Count', yaxis = 'y1', line=dict(color='blue'))
trace_pos = go.Scatter(x=comment_df_agg.index, y=comment_df_agg['pos_ratio_roll'], name='Positive Comment Ratio', yaxis = 'y2', line=dict(color='red'))
trace_neg = go.Scatter(x=comment_df_agg.index, y=comment_df_agg['neg_ratio_roll'], name='Negative Comment Ratio', yaxis = 'y2', line=dict(color='red', dash='dash'))

trace_shade = go.Scatter(
                x=[x_start, x_open, x_end], 
                y=[-0.3, 1, 1], 
                yaxis = 'y2',
                name="Shaded region", 
                mode="none", 
                fill="tozeroy", 
                fillcolor="rgba(135, 143, 135, 0.2)",
                line=dict(shape='hv', width=0),
                hoverinfo="none", 
                showlegend=False,
)

annode_text = "<b>Movie<br>released</b>" if x_open < x_end else ""
trace_annode = go.Scatter(
                x=[x_open],
                y=[y2_max],
                yaxis = 'y2',
                name="movie release data",
                mode="text",
                text=[annode_text],
                textfont=dict(size=16),
                textposition="bottom right",
                showlegend=False,
)

data = [trace_cnt, trace_pos, trace_neg, trace_shade, trace_annode]
layout = go.Layout(title='',
                   legend=dict(x=0.05, y=0.95),
                   xaxis=dict(title='Date'),
                   yaxis=dict(title='Comment Count', color='blue'),
                   yaxis2=dict(title='Comment Sentiment', color='red', overlaying='y', side='right', range=[0, y2_max])
                   )

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, sharing='public')
