In [18]:
# Movie Sentiment Analysis (v1.0)
# Authors: Ankita Gadage, Swapnil Borse, Nikhil Yathindra, Mrunal Maniar
# Term Project - Social Media and Data Mining - Syracuse University - 2019
# Guidance - Prof. Martin Harrison

###################################################################################
# required libraries
###################################################################################
import os
import re
import json, requests
import facebook
from math import *


from textblob import TextBlob
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from apiclient.discovery import build
from apiclient.errors import HttpError
from oauth2client.tools import argparser
import google.oauth2.credentials
import google_auth_oauthlib.flow
import matplotlib.pyplot as plt
from bokeh.io import output_file, show 
from bokeh.layouts import layout 
from bokeh.models import ( 
  HoverTool, ColumnDataSource, Legend, LegendItem 
) 
from bokeh.plotting import figure 
from bokeh.palettes import brewer 
from numpy import pi 

import pandas as pd
import pprint 
import numpy as np 
import matplotlib.pyplot as pd
import tmdbsimple as tmdb

#plotting libraries
from bokeh.plotting import figure, output_file, output_notebook, show
from bokeh.layouts import column, row
from bokeh.plotting import figure, curdoc
from bokeh.models import Button
from bokeh.models.widgets import TextInput, Button, Paragraph
from bokeh.plotting import figure
from bokeh.layouts import layout, widgetbox
from bokeh.plotting import figure
import ipywidgets as widgets


In [19]:
###################################################################################
# redirecting output to python notebook
###################################################################################

output_notebook()

In [20]:
###################################################################################
# function to return the video id of the movie
###################################################################################
def get_video_id_yt(movie_name):
    search = tmdb.Search()
    response = search.movie(query = movie_name)
    if not response:
        print("Invalid movie name. No video found.")
        return false
    id_movie = response['results'][0]['id']
    movie = tmdb.Movies(id_movie)
    video_id = movie.videos()['results'][0]['key']
    return video_id

In [21]:
###################################################################################
# function to remove extra characters from the comment
###################################################################################
def clean_comment(comment):
    '''
    Utility function to clean comment text by removing links, special characters
    using simple regex statements.
    '''
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", comment).split())

In [22]:
###################################################################################
# function to perform the sentiment analysis
###################################################################################
def get_sentiment(comment):
    '''
    Utility function to classify sentiment of passed comment
    using textblob's sentiment method
    '''
    # create TextBlob object of passed cpmment text
    analysis = TextBlob(clean_comment(comment))
    # set sentiment
    if analysis.sentiment.polarity > 0:
        return 'positive'
    elif analysis.sentiment.polarity == 0:
        return 'neutral'
    else:
        return 'negative'

In [23]:
###################################################################################
#   function to return sentiment scores for youtube comments
###################################################################################
def get_sentiment_scores_yt(youtube,video_id):
    sentiment_list = list()
    response = youtube.commentThreads().list(maxResults = 100,part='snippet', videoId = video_id,textFormat="plainText").execute()
    
    for item in response['items']:
        parsed_comment = {}
        parsed_comment['text'] = item['snippet']['topLevelComment']['snippet']['textDisplay']
        parsed_comment['sentiment'] = get_sentiment(item['snippet']['topLevelComment']['snippet']['textDisplay'])
        sentiment_list.append(parsed_comment)

    page_token = response.get('nextPageToken')
    positive_review = [comment for comment in sentiment_list if comment['sentiment'] == 'positive']
    pos = 100*len(positive_review)/len(sentiment_list)
    negative_review = [comment for comment in sentiment_list if comment['sentiment'] == 'negative']
    # percentage of negative reviews
    neg = 100*len(negative_review)/len(sentiment_list)
    neutral_review = [comment for comment in sentiment_list if comment['sentiment'] == 'neutral']
    # percentage of neutral review
    neu = 100*len(neutral_review)/len(sentiment_list)
    return pos, neg, neu

In [24]:
###################################################################################
# function to extract video id from the url
###################################################################################
def get_video_id(url):
    videoId = url.split('/')[-1]
    if(videoId == ""):
        videoId = url.split('/')[-2]
    return videoId

In [25]:
###################################################################################
# function to return the facebook comments
###################################################################################
def get_facebook_comments(graph, iVidId):
    allComments = []
    name = graph.get_object(id = iVidId, fields = "comments")
    wrapper = name['comments']
    
    while True:
        try:
            commentsArray = wrapper['data']
            for comment in commentsArray:
                allComments.append(comment['message'])
            wrapper = requests.get(wrapper['paging']['next']).json()
            if(len(allComments) >= 500):
                break;
        except KeyError:
            break
    return allComments

In [26]:
###################################################################################
# function to get facebook comments sentiment scores
###################################################################################
def get_sentiment_scores_fb(fbComments):
    sentiment_list = list()
    for comment in fbComments:
        parsed_comment = {}
        parsed_comment['text'] = comment
        cleaned_comment = clean_comment(comment)
        parsed_comment['sentiment'] = get_sentiment(cleaned_comment)
        sentiment_list.append(parsed_comment)
    
    positive_review = [comment for comment in sentiment_list if comment['sentiment'] == 'positive']
    pos = 100*len(positive_review)/len(sentiment_list)
    
    negative_review = [comment for comment in sentiment_list if comment['sentiment'] == 'negative']
    neg = 100*len(negative_review)/len(sentiment_list)
    
    neutral_review = [comment for comment in sentiment_list if comment['sentiment'] == 'neutral']
    neu = 100*len(neutral_review)/len(sentiment_list)
    return pos, neg, neu

In [27]:
###################################################################################
# function to parse TMBD and OMDB Json
###################################################################################

def getRating(name):
    try:
        movieArray = str(name).split(" ")
        movieName = ""
        for i in range(len(movieArray)):
            movieName = movieName + movieArray[i] + "+"
        movie = movieName[:-1]
        tmdb_url = 'https://api.themoviedb.org/3/search/movie?query='+movie+'&api_key=ab40854b6a982808ea3933d299c15bb7'
        omdb_url = 'http://www.omdbapi.com/?i=tt3896198&apikey=85626e7e&t='+movie
        tmdb_response = requests.get(tmdb_url)
        omdb_response = requests.get(omdb_url)
        tmdb_data = json.loads(tmdb_response.text)
        omdb = json.loads(omdb_response.text)
        tmdb = tmdb_data['results']
        s1 = omdb['Ratings'][1]['Value'][:-1]
        s2 = omdb['Ratings'][2]['Value'][:-4]
        avg_vote = float(tmdb[0]['vote_average'])
        imdb_rating = float(omdb['imdbRating'])
        r_rating = float(s1[:1] + '.' + s1[1:])
        critic_rating = float(s2[:1] + '.' + s2[1:])
        return avg_vote,imdb_rating,r_rating,critic_rating 
    except:
        print("Error: Invalid movie name for TMDB/OMDB")
        return None

In [28]:
###################################################################################
# function to return the youtube object
###################################################################################
def getYtObject():
    # This OAuth 2.0 access scope allows for full read/write access to the
    # authenticated user's account and requires requests to use an SSL connection.
    try:
        SCOPES = ['https://www.googleapis.com/auth/youtube.force-ssl']
        API_SERVICE_NAME = 'youtube'
        API_VERSION = 'v3'
        DEVELOPER_KEY = "AIzaSyDzMnUCNoVkszY_0dPf8pThteeS9t1mZVg"
        tmdb.API_KEY = 'd2b3777109943b9daed543eab2138338'
        CLIENT_SECRETS_FILE = "client_secret.json"
        YOUTUBE_API_SERVICE_NAME = "youtube"
        YOUTUBE_API_VERSION = "v3"
        youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,developerKey=DEVELOPER_KEY)
        return youtube
    except:
        print("Error: Invalid YouTube token")

In [29]:
###################################################################################
# function to return the facebook graph object
###################################################################################
def getFbObject():
    try:
        access_token = "EAACEdEose0cBABtHKe13UctdL0oRTP3LCeEnw2JRi95lffZCDmjLZBXc7QB53qZAJjw2HWN8GpWOSCL8wC6blGZBTUjgx0BPZBBwE67DQGrqg4GPu99wexMVW8IXbnMQubKFqHlQ1bZANFdJXvulZAjNBMJPCq6EBWTS3PrhG9QgPfjRgZBnydyG1XKyHzMeLf0ZD"
        graph = facebook.GraphAPI(access_token = access_token, version = 2.7)
        return graph
    except:
        print("Error: Invalid FaceBook token")
        return None

In [30]:
###################################################################################
# driver function for YouTube sentiment analysis pipeline
###################################################################################
def youTubeDriver(movie_name):
    try:
        youtube = getYtObject();
        video_id = get_video_id_yt(movie_name)
        pos, neg, neu = get_sentiment_scores_yt(youtube, video_id)
        return pos, neg, neu
    except:
        print("Error: Invalid movie name for youtube")
        return None

In [31]:
###################################################################################
# driver function for Facebook sentiment analysis pipeline
###################################################################################
def faceBookDriver(url):
    graph = getFbObject();
    #url = "https://www.facebook.com/avengers/videos/870485249676149/"
    video_id = get_video_id(url)
    comments = get_facebook_comments(graph, video_id)
    pos, neg, neu = get_sentiment_scores_fb(comments)
    return pos, neg, neu

In [32]:
###################################################################################
# function to plot graphs
###################################################################################
def plot_graphs(b):
    
    from bokeh.io import show, output_file
    from bokeh.layouts import column

    from bokeh.plotting import figure
    from bokeh.models import ColumnDataSource, FactorRange
    from bokeh.transform import dodge
    from bokeh.core.properties import value
    from bokeh.core.properties import value
    from bokeh.io import show, output_file
    from bokeh.models import ColumnDataSource
    from bokeh.plotting import figure
    from bokeh.transform import dodge
    try:
        categories = ['Average Vote', 'Imdb Rating', 'Rotton Tomato Rating', 'Critic rating']

        p = figure(x_range=categories, plot_height=250, title="Rating Graph",
                   toolbar_location=None, tools="")
        ratings = getRating(movieName.value)
        p.vbar(x=categories, top=getRating(movieName.value), width=0.2)

        p.xgrid.grid_line_color = None
        p.y_range.start = 0


        #output_file("dodged_bars.html")
        pos = 0
        neg = 0
        neu = 0
        pos,neg,neu = youTubeDriver(movieName.value)

        youtube_data = [pos,neg,neu]
        facebook_data = faceBookDriver(movieUrl.value)
        pie1 = createPieChart(pos,neg,neu)
        pos,neg,neu = facebook_data
        pie2 = createPieChart(pos,neg,neu)
        sentiments = ['Positive ', 'Negative', 'Neutral']
        years = ['youtube', 'facebook']


        data = {'sentiments' : sentiments,
                'youtube'   : youtube_data,
                'facebook'   : facebook_data}

        source = ColumnDataSource(data=data)

        p1 = figure(x_range=sentiments, y_range=(0, 100), plot_height=250, title="Sentiment analysis",
                   toolbar_location=None, tools="")

        p1.vbar(x=dodge('sentiments', -0.25, range=p1.x_range), top='youtube', width=0.2, source=source,
               color="#e84d60", legend=value("youtube"))

        p1.vbar(x=dodge('sentiments',  0.0,  range=p1.x_range), top='facebook', width=0.2, source=source,
               color="#718dbf", legend=value("facebook"))

        p1.x_range.range_padding = 0.1
        p1.xgrid.grid_line_color = None
        p1.legend.location = "top_left"
        p1.legend.orientation = "horizontal"
        show(column(p1, p,pie1,pie2))
    except:
        print("Error in plotting graph")
        return None
        

In [33]:
###################################################################################
# function to create a pie chart
###################################################################################
def createPieChart(pos, neg, neu):
    percents = [0, (0+pos)/100, (pos+neg)/100, 1]
    starts = [p*2*pi for p in percents[:-1]]
    ends = [p*2*pi for p in percents[1:]]
    labels = ['Positive', 'Negative', 'Nuetral'] 
    colors = ['green', 'red','blue'] 
    amounts = [pos, neg, neu] 
    source=ColumnDataSource(dict(starts=starts, ends=ends, labels=labels, colors=colors, amounts=amounts)) 
    plot =  figure(toolbar_location="above") 
    hover = HoverTool( 
            tooltips=[ 
              ('type', '@labels'), 
              ('quantity','@amounts') 
            ] 
        ) 
    plot.add_tools(hover) 

    r = plot.wedge(0, 0, radius=1, start_angle='starts', end_angle='ends', color='colors', source=source) 

    legend = Legend(items=[LegendItem(label=dict(field="labels"), renderers=[r])], location=(0, 0)) 
    plot.add_layout(legend, 'right') 
    return plot


In [35]:

movieName = widgets.Text(description="Movie name:",value = "Red Sparrow")
movieUrl = widgets.Text(description="Movie url:",value = "https://www.facebook.com/RedSparrowMovie/videos/575551206148549/")
button = widgets.Button(description='Enter')
status = widgets.Label()
button.on_click(plot_graphs)
output_file(movieName.value+"_analysis.html")

widgets.VBox([movieName,movieUrl,button,status])


# Sample Input:
# Movie name - FaceBook URL
#
# Red Sparrow - https://www.facebook.com/RedSparrowMovie/videos/575551206148549/
# Avengers - https://www.facebook.com/avengers/videos/870485249676149/
# The Revenant - https://www.facebook.com/RevenantMovie/videos/782677141866187/




A Jupyter Widget