In [10]:
#Tweet Retrieval Import

import GetOldTweets3 as got
import sys
import json
from dateutil.parser import parse
#Sentiment Analysis Imports
from textblob import TextBlob
import pandas as pd
import numpy as np

# Configuration

# Tweet Retrieval

In [51]:
class tweetFetcher:

### FUNCTIONS THAT HANDLE ACTOR LIST ###    
    
    #TODO needs to be changed to update actor_list based on how jennifer creates actor_lists in the other notebook

    def create_movie_actor_list_from_file(self, input_actor_file):
        
        with open(input_actor_file, 'r') as input_file:
            
            data = input_file.read()

            # parse file            
            self.movie_actor_list = json.loads(data)
        
    def set_movie_actor_list(self, provided_actor_list):
        self.movie_actor_list = provided_actor_list    
        
    def get_movie_actor_list(self):
        return self.movie_actor_list
    
### FUNCTIONS THAT QUERY DATA
    
    # Query all movies using internal movie_actor_list 
    # Store results of all tweets
    # Convert to JSON
    # Store in Output file
    
    def query_tweets_as_JSON(self, output_file, tweets_per_actor, movie_limit, verbose = False):
                    
        with open(output_file, 'w') as write_file:

            movie_tweet_object_list = []
            processed_query_results = []
            
            total_actor_count = 0
            total_tweet_count = 0 
            movie_counter = 0
            for movie in self.movie_actor_list:
                if movie_counter > movie_limit:
                    break            

                title = movie['title'] 
                release_date = movie['release_date']
                end_date = movie['end_date']
                actors = movie['actors']
                
                if parse(release_date) < parse('2007-01-01'):
                    continue
                
                movie_dictionary = {'title': title, 'actors': []}
                
                actor_count = 0

                for actor in actors:

                    #Retrieve tweet objects for an actor and store them in a list
                    actor_tweet_object_list = self.query_tweets(actor, release_date, end_date, tweets_per_actor)
                    
                    actor_dictionary = {'name': actor, 'tweets': []}
                    
                    movie_dictionary['actors'].append(actor_dictionary)
                    

                    
                    #Convert each tweet_object into its data
                    for actor_tweet_object in actor_tweet_object_list:
                        actor_dictionary['tweets'].append(self.parse_tweet_object([actor_tweet_object]))
                        
                        total_tweet_count += 1
                        if verbose:
                            if total_tweet_count % 50 == 0:
                            
                                print("Total Tweets Processed: " + str(total_tweet_count))
                        
                        
                    total_actor_count += 1
                    
                    if verbose:
                        if total_actor_count % 25 == 0:
                            
                            print("Total Actors Processed: " + str(total_actor_count))

                

                processed_query_results.append(movie_dictionary)
                movie_counter += 1
                if verbose:
                    if movie_counter % 25 == 0:
                        print('Movies Processed: ' + str(movie_counter))
                
                                        
                                        
            json.dump(processed_query_results, write_file)
            if verbose:
                print("JSON written to "+ output_file)
    
    
    # Use the GOT3 module to get a tweet
    #Provide dates in "YYYY-MM-DD" Format
    def query_tweets(self, query, start_date = "2006-03-21", end_date = "2019-06-30", max_tweets = 1):
        tweetCriteria = got.manager.TweetCriteria().setQuerySearch(query)\
                                               .setSince(start_date)\
                                               .setUntil(end_date)\
                                               .setMaxTweets(max_tweets)
        
        queried_tweet_object_list = got.manager.TweetManager.getTweets(tweetCriteria)
        return queried_tweet_object_list

    
    
    # Given a list of tweet objects, return a list of dictionaries
    # where each dictionary looks like {date: strTweetDate, text: strText}
    def parse_tweet_object(self, tweet_object_list):
        
        tweet_data_list = []
            
        for tweet in tweet_object_list:
                            
            date = str(tweet.date)
            text = tweet.text
            tweet_data = {"date": date, "text": text}

            tweet_data_list.append(tweet_data)

                                
        return tweet_data_list

## Example of Using TweetFetcher

In [37]:
#Example of how to use TweetFetcher

def fetch_tweets(input_actor_file, output_tweet_file, tweets_per_actor, number_of_movies, verbose = False):
    tweet_fetcher = tweetFetcher()    
    
    tweet_fetcher.create_movie_actor_list_from_file(input_actor_file)
    
    tweet_fetcher.query_tweets_as_JSON(output_tweet_file, tweets_per_actor, number_of_movies, verbose)

    
fetch_tweets(input_actor_file = "intermediates/actors.json",
             output_tweet_file = "intermediates/data_file.json",
             tweets_per_actor = 5,
             number_of_movies = 100,
             verbose = True)

Movies Processed: 25
Movies Processed: 50
Movies Processed: 75
Movies Processed: 100


TypeError: can only concatenate str (not "_io.TextIOWrapper") to str

# Sentiment Analysis

In [44]:
class sentiment_analyzer:
    
    
    #Given a selection of raw text, return the sentiment of that text
    def get_text_sentiment(self, raw_text):
        sentiment = TextBlob(raw_text).sentiment
        return sentiment.polarity
    
    
    # if verbose, will print 
    def mass_tweet_analysis(self, archive_file, write_file, minimum_sentiment_threshold = 0, verbose = False, logging_mode = False, actor_log_location = None, movie_log_location = None):
        
        with open(archive_file) as json_file:  
            parsed = json.load(json_file)
            
            # Just in case - for ease of regression
            if logging_mode:
                
                movie_sentiment_log = []
                actor_sentiment_log = []
            
            movie_count = 0 
            total_tweet_count = 0
            total_actor_count = 0
            
            for movie in parsed:
                
                movie_count +=1 
                
                if verbose:
                    print("Parsing: " + movie['title'])
            
                movie_sentiment_sum = 0 
                actor_count = 0 
                
                for actor in movie['actors']:
                    
                    actor_sentiment_sum = 0 
                    tweet_count = 0 
                    
      
                    for tweet in actor['tweets']:
                        tweet = tweet[0]
                        tweet_sentiment = self.get_text_sentiment(tweet['text'])
                        
                        if (abs(tweet_sentiment) < minimum_sentiment_threshold):
                            continue
                        
                        tweet['tweet_sentiment'] = tweet_sentiment
                        
                        tweet_count+=1
                        total_tweet_count+=1
                        actor_sentiment_sum += tweet_sentiment
                    
                    if tweet_count == 0:
                        continue
                        
                    actor_sentiment = actor_sentiment_sum / tweet_count

                    actor['actor_sentiment'] = actor_sentiment
                    
                    if verbose:
                        print("\t Parsed " + actor['name'])
                    
                    if logging_mode:    
                        
                        actor_sentiment_log.append((movie['title'], actor['name'], actor_sentiment))
                    
                    actor_count += 1
                    total_actor_count +=1
                    movie_sentiment_sum += actor_sentiment
                    
                if actor_count == 0:
                    continue
                    
                movie_sentiment = movie_sentiment_sum / actor_count
                movie['movie_sentiment'] = movie_sentiment
                
                if verbose:
                
                    print("Finished Parsing " + movie['title'])
                    print()
                    
                if movie_count % 100 == 0:
                    print("Parsed " + str(movie_count) + " movies")
                
                if logging_mode:
                    
                    movie_sentiment_log.append((movie['title'], movie_sentiment))
            
            if verbose:
                
                print("Parsed "+str(movie_count)+" movies")
                print("Parsed "+str(total_actor_count)+ " actors")
                print("Parsed "+str(total_tweet_count)+ " tweets")
                print()
            
            if logging_mode:
                                                
                with open(actor_log_location, 'w') as log_file:
                    for item in actor_sentiment_log:
                        log_file.write("".join('%s %s %s \n' % item))
                
                if verbose:
                    
                    print("Wrote actor sentiment logs to " + actor_log_location)
                    print()
                        
                with open(movie_log_location, 'w') as log_file:
                    for item in movie_sentiment_log:
                        log_file.write("".join('%s %s \n' % item))
                    
                if verbose:
                    
                    print("Wrote movie sentiment logs to " + movie_log_location)
                    print()
                          
            with open(write_file, 'w') as json_file: 
                    
                json.dump(parsed, json_file)
                
                if verbose:
                    print("Wrote JSON with sentiment to "+write_file)

## Example of Using Sentiment Analyzer

In [45]:
sentiment_analyzer = sentiment_analyzer()

### No Log, Silent Mode

Computes sentiment from tweets in JSON archive_file and writes a new json blob to write_file.

Only one alert is provided: every 100 movies parsed, it will alert on # of movies processed.

In [6]:
sentiment_analyzer.mass_tweet_analysis(archive_file = "intermediates/data_file.json", 
                                       write_file = "intermediates/sentiment_file.json")

Parsed 100 movies
Parsed 200 movies
Parsed 300 movies
Parsed 400 movies
Parsed 500 movies


### In Logging Mode and Silent Mode

Computes sentiment from tweets in JSON archive_file and writes a new json blob to write_file.

Only one alert is provided: every 100 movies parsed, it will alert on # of movies processed.

Also produces two log files. 

actor_log_location parameter will be a file where each line is a tuple: (movie_title, actor_name, actor_sentiment)

movie_log_location parameter will be a file where each line is a tuple: (movie_title, movie_sentiment)

In [7]:
sentiment_analyzer.mass_tweet_analysis(archive_file = "intermediates/data_file.json", 
                                       write_file = "intermediates/sentiment_file.json",
                                       logging_mode = True,
                                       actor_log_location = "intermediates/actor_log",
                                       movie_log_location = "intermediates/movie_log")

Parsed 100 movies
Parsed 200 movies
Parsed 300 movies
Parsed 400 movies
Parsed 500 movies


### No Log, Verbose Mode

Computes sentiment from tweets in JSON archive_file and writes a new json blob to write_file.

Along the way, provides updates on completion.

In [8]:
sentiment_analyzer.mass_tweet_analysis(archive_file = "intermediates/data_file.json", 
                                       write_file = "intermediates/sentiment_file.json",
                                       verbose = True)

Parsing: Avatar
	 Parsed Sam Worthington
	 Parsed Zoe Saldana
	 Parsed Sigourney Weaver
Finished Parsing Avatar

Parsing: Pirates of the Caribbean: At World's End
	 Parsed Johnny Depp
	 Parsed Orlando Bloom
	 Parsed Keira Knightley
Finished Parsing Pirates of the Caribbean: At World's End

Parsing: Spectre
	 Parsed Daniel Craig
	 Parsed Christoph Waltz
	 Parsed Léa Seydoux
Finished Parsing Spectre

Parsing: The Dark Knight Rises
	 Parsed Christian Bale
	 Parsed Michael Caine
	 Parsed Gary Oldman
Finished Parsing The Dark Knight Rises

Parsing: John Carter
	 Parsed Taylor Kitsch
	 Parsed Lynn Collins
	 Parsed Samantha Morton
Finished Parsing John Carter

Parsing: Spider-Man 3
	 Parsed Tobey Maguire
	 Parsed Kirsten Dunst
	 Parsed James Franco
Finished Parsing Spider-Man 3

Parsing: Tangled
	 Parsed Zachary Levi
	 Parsed Mandy Moore
	 Parsed Donna Murphy
Finished Parsing Tangled

Parsing: Avengers: Age of Ultron
	 Parsed Robert Downey Jr.
	 Parsed Chris Hemsworth
	 Parsed Mark Ruffalo
Fi

	 Parsed Ben Kingsley
	 Parsed Sacha Baron Cohen
	 Parsed Asa Butterfield
Finished Parsing Hugo

Parsing: The Mummy: Tomb of the Dragon Emperor
	 Parsed Brendan Fraser
	 Parsed Jet Li
	 Parsed John Hannah
Finished Parsing The Mummy: Tomb of the Dragon Emperor

Parsing: Suicide Squad
	 Parsed Will Smith
	 Parsed Margot Robbie
	 Parsed Joel Kinnaman
Finished Parsing Suicide Squad

Parsing: Evan Almighty
	 Parsed Steve Carell
	 Parsed Lauren Graham
	 Parsed John Goodman
Finished Parsing Evan Almighty

Parsing: Edge of Tomorrow
	 Parsed Tom Cruise
	 Parsed Emily Blunt
	 Parsed Brendan Gleeson
Finished Parsing Edge of Tomorrow

Parsing: G.I. Joe: The Rise of Cobra
	 Parsed Dennis Quaid
	 Parsed Channing Tatum
	 Parsed Marlon Wayans
Finished Parsing G.I. Joe: The Rise of Cobra

Parsing: Inside Out
	 Parsed Amy Poehler
	 Parsed Phyllis Smith
	 Parsed Richard Kind
Finished Parsing Inside Out

Parsing: The Jungle Book
	 Parsed Neel Sethi
	 Parsed Bill Murray
	 Parsed Ben Kingsley
Finished Parsi

	 Parsed David Schwimmer
Finished Parsing Madagascar 3: Europe's Most Wanted

Parsing: Ghostbusters
	 Parsed Melissa McCarthy
	 Parsed Kristen Wiig
	 Parsed Kate McKinnon
Finished Parsing Ghostbusters

Parsing: Beowulf
	 Parsed Ray Winstone
	 Parsed Angelina Jolie
	 Parsed Anthony Hopkins
Finished Parsing Beowulf

Parsing: Kung Fu Panda 3
	 Parsed Jack Black
	 Parsed Bryan Cranston
	 Parsed Dustin Hoffman
Finished Parsing Kung Fu Panda 3

Parsing: Mission: Impossible - Ghost Protocol
	 Parsed Tom Cruise
	 Parsed Jeremy Renner
	 Parsed Simon Pegg
Finished Parsing Mission: Impossible - Ghost Protocol

Parsing: Rise of the Guardians
	 Parsed Chris Pine
	 Parsed Alec Baldwin
	 Parsed Jude Law
Finished Parsing Rise of the Guardians

Parsing: Exodus: Gods and Kings
	 Parsed Christian Bale
	 Parsed Joel Edgerton
	 Parsed John Turturro
Finished Parsing Exodus: Gods and Kings

Parsing: Star Trek
	 Parsed Chris Pine
	 Parsed Zachary Quinto
	 Parsed Leonard Nimoy
Finished Parsing Star Trek

Parsi

	 Parsed Jayma Mays
Finished Parsing The Smurfs

Parsing: Allegiant
	 Parsed Shailene Woodley
	 Parsed Theo James
	 Parsed Zoë Kravitz
Finished Parsing Allegiant

Parsing: Real Steel
	 Parsed Hugh Jackman
	 Parsed Dakota Goyo
	 Parsed Evangeline Lilly
Finished Parsing Real Steel

Parsing: The Smurfs 2
	 Parsed Hank Azaria
	 Parsed Neil Patrick Harris
	 Parsed Brendan Gleeson
Finished Parsing The Smurfs 2

Parsing: Ender's Game
	 Parsed Asa Butterfield
	 Parsed Harrison Ford
	 Parsed Hailee Steinfeld
Finished Parsing Ender's Game

Parsing: Live Free or Die Hard
	 Parsed Bruce Willis
	 Parsed Justin Long
Finished Parsing Live Free or Die Hard

Parsing: The Princess and the Frog
	 Parsed Anika Noni Rose
	 Parsed Bruno Campos
	 Parsed Keith David
Finished Parsing The Princess and the Frog

Parsing: The Martian
	 Parsed Matt Damon
	 Parsed Jessica Chastain
	 Parsed Kristen Wiig
Finished Parsing The Martian

Parsing: Public Enemies
	 Parsed Christian Bale
	 Parsed Johnny Depp
	 Parsed Giovan

Finished Parsing It's Complicated

Parsing: Ocean's Thirteen
	 Parsed George Clooney
	 Parsed Brad Pitt
	 Parsed Matt Damon
Finished Parsing Ocean's Thirteen

Parsing: Divergent
	 Parsed Shailene Woodley
	 Parsed Theo James
	 Parsed Kate Winslet
Finished Parsing Divergent

Parsing: Arthur Christmas
	 Parsed James McAvoy
	 Parsed Hugh Laurie
	 Parsed Bill Nighy
Finished Parsing Arthur Christmas

Parsing: Mirror Mirror
	 Parsed Julia Roberts
	 Parsed Lily Collins
	 Parsed Armie Hammer
Finished Parsing Mirror Mirror

Parsing: Scott Pilgrim vs. the World
	 Parsed Michael Cera
	 Parsed Mary Elizabeth Winstead
	 Parsed Kieran Culkin
Finished Parsing Scott Pilgrim vs. the World

Parsing: Dredd
	 Parsed Karl Urban
	 Parsed Olivia Thirlby
	 Parsed Lena Headey
Finished Parsing Dredd

Parsing: Cats & Dogs 2 : The Revenge of Kitty Galore
	 Parsed James Marsden
	 Parsed Nick Nolte
	 Parsed Christina Applegate
Finished Parsing Cats & Dogs 2 : The Revenge of Kitty Galore

Parsing: Jumper
	 Parsed Hay

	 Parsed Ramón Rodríguez
	 Parsed Will Rothhaar
Finished Parsing Battle: Los Angeles

Parsing: War Horse
	 Parsed Tom Hiddleston
	 Parsed Benedict Cumberbatch
	 Parsed Toby Kebbell
Finished Parsing War Horse

Parsing: The Monuments Men
	 Parsed Matt Damon
	 Parsed Cate Blanchett
	 Parsed George Clooney
Finished Parsing The Monuments Men

Parsing: Wall Street: Money Never Sleeps
	 Parsed Michael Douglas
	 Parsed Shia LaBeouf
	 Parsed Josh Brolin
Finished Parsing Wall Street: Money Never Sleeps

Parsing: Dracula Untold
	 Parsed Luke Evans
	 Parsed Sarah Gadon
	 Parsed Dominic Cooper
Finished Parsing Dracula Untold

Parsing: Stardust
	 Parsed Claire Danes
	 Parsed Michelle Pfeiffer
Finished Parsing Stardust

Parsing: The Dilemma
	 Parsed Kevin James
	 Parsed Vince Vaughn
	 Parsed Winona Ryder
Finished Parsing The Dilemma

Parsing: Underworld: Awakening
	 Parsed Kate Beckinsale
	 Parsed Stephen Rea
	 Parsed Michael Ealy
Finished Parsing Underworld: Awakening

Parsing: Rock of Ages
	 Parsed

	 Parsed Matt Damon
	 Parsed Tony Kgoroge
Finished Parsing Invictus

Parsing: State of Play
	 Parsed Russell Crowe
	 Parsed Ben Affleck
	 Parsed Rachel McAdams
Finished Parsing State of Play

Parsing: Duplicity
	 Parsed Clive Owen
	 Parsed Julia Roberts
	 Parsed Paul Giamatti
Finished Parsing Duplicity

Parsing: Planet 51
	 Parsed Dwayne Johnson
	 Parsed Seann William Scott
	 Parsed Jessica Biel
Finished Parsing Planet 51

Parsing: Trouble with the Curve
	 Parsed Clint Eastwood
	 Parsed Amy Adams
	 Parsed Justin Timberlake
Finished Parsing Trouble with the Curve

Parsing: Edge of Darkness
	 Parsed Mel Gibson
	 Parsed Ray Winstone
	 Parsed Danny Huston
Finished Parsing Edge of Darkness

Parsing: Righteous Kill
	 Parsed Robert De Niro
	 Parsed Carla Gugino
	 Parsed 50 Cent
Finished Parsing Righteous Kill

Parsing: The Soloist
	 Parsed Robert Downey Jr.
	 Parsed Jamie Foxx
	 Parsed Catherine Keener
Finished Parsing The Soloist

Parsing: Priest
	 Parsed Paul Bettany
	 Parsed Karl Urban
	 P

	 Parsed Scarlett Johansson
	 Parsed Thomas Haden Church
Finished Parsing We Bought a Zoo

Parsing: Knowing
	 Parsed Nicolas Cage
	 Parsed Rose Byrne
	 Parsed Chandler Canterbury
Finished Parsing Knowing

Parsing: Crazy, Stupid, Love.
	 Parsed Steve Carell
	 Parsed Julianne Moore
	 Parsed Ryan Gosling
Finished Parsing Crazy, Stupid, Love.

Parsing: Moneyball
	 Parsed Brad Pitt
	 Parsed Jonah Hill
	 Parsed Philip Seymour Hoffman
Finished Parsing Moneyball

Parsing: Non-Stop
	 Parsed Liam Neeson
	 Parsed Julianne Moore
	 Parsed Scoot McNairy
Finished Parsing Non-Stop

Parsing: Race to Witch Mountain
	 Parsed Dwayne Johnson
	 Parsed AnnaSophia Robb
	 Parsed Alexander Ludwig
Finished Parsing Race to Witch Mountain

Parsing: Hansel & Gretel: Witch Hunters
	 Parsed Jeremy Renner
	 Parsed Gemma Arterton
	 Parsed Famke Janssen
Finished Parsing Hansel & Gretel: Witch Hunters

Parsing: I Am Number Four
	 Parsed Alex Pettyfer
	 Parsed Timothy Olyphant
	 Parsed Teresa Palmer
Finished Parsing I Am 

###  Logging Mode, Verbose Mode

Computes sentiment from tweets in JSON archive_file and writes a new json blob to write_file.

Along the way, provides updates, and produces log files as described above.

In [50]:
sentiment_analyzer.mass_tweet_analysis(archive_file = "intermediates/data_file.json", 
                                       write_file = "intermediates/sentiment_file.json",
                                       minimum_sentiment_threshold = 0.2,
                                       verbose = True,
                                       logging_mode = True,
                                       actor_log_location = "intermediates/actor_log",
                                       movie_log_location = "intermediates/movie_log")

Parsing: Avatar
	 Parsed Sam Worthington
	 Parsed Zoe Saldana
	 Parsed Sigourney Weaver
Finished Parsing Avatar

Parsing: Pirates of the Caribbean: At World's End
	 Parsed Johnny Depp
	 Parsed Orlando Bloom
Finished Parsing Pirates of the Caribbean: At World's End

Parsing: Spectre
	 Parsed Daniel Craig
	 Parsed Christoph Waltz
	 Parsed Léa Seydoux
Finished Parsing Spectre

Parsing: The Dark Knight Rises
	 Parsed Christian Bale
	 Parsed Michael Caine
	 Parsed Gary Oldman
Finished Parsing The Dark Knight Rises

Parsing: John Carter
	 Parsed Taylor Kitsch
	 Parsed Lynn Collins
	 Parsed Samantha Morton
Finished Parsing John Carter

Parsing: Spider-Man 3
	 Parsed Tobey Maguire
	 Parsed Kirsten Dunst
	 Parsed James Franco
Finished Parsing Spider-Man 3

Parsing: Tangled
	 Parsed Zachary Levi
	 Parsed Mandy Moore
	 Parsed Donna Murphy
Finished Parsing Tangled

Parsing: Avengers: Age of Ultron
	 Parsed Chris Hemsworth
	 Parsed Mark Ruffalo
Finished Parsing Avengers: Age of Ultron

Parsing: Har

	 Parsed Keanu Reeves
	 Parsed Hiroyuki Sanada
	 Parsed Kou Shibasaki
Finished Parsing 47 Ronin

Parsing: Captain America: The Winter Soldier
	 Parsed Samuel L. Jackson
	 Parsed Scarlett Johansson
Finished Parsing Captain America: The Winter Soldier

Parsing: Shrek Forever After
	 Parsed Mike Myers
	 Parsed Eddie Murphy
	 Parsed Cameron Diaz
Finished Parsing Shrek Forever After

Parsing: Tomorrowland
	 Parsed George Clooney
	 Parsed Raffey Cassidy
Finished Parsing Tomorrowland

Parsing: Big Hero 6
	 Parsed Ryan Potter
	 Parsed Daniel Henney
Finished Parsing Big Hero 6

Parsing: Wreck-It Ralph
	 Parsed John C. Reilly
	 Parsed Sarah Silverman
	 Parsed Jack McBrayer
Finished Parsing Wreck-It Ralph

Parsing: Independence Day: Resurgence
	 Parsed Liam Hemsworth
	 Parsed Jeff Goldblum
	 Parsed Bill Pullman
Finished Parsing Independence Day: Resurgence

Parsing: How to Train Your Dragon
	 Parsed Jay Baruchel
	 Parsed Gerard Butler
	 Parsed Craig Ferguson
Finished Parsing How to Train Your Dra