In [38]:
#Tweet Retrieval Import

import GetOldTweets3 as got
import sys
import json
from dateutil.parser import parse
#Sentiment Analysis Imports
from textblob import TextBlob
import pandas as pd
import numpy as np

# Configuration

# Tweet Retrieval

In [39]:
class tweetFetcher:

### FUNCTIONS THAT HANDLE ACTOR LIST ###    
    
    #TODO needs to be changed to update actor_list based on how jennifer creates actor_lists in the other notebook

    def create_movie_actor_list_from_file(self, input_actor_file):
        
        with open(input_actor_file, 'r') as input_file:
            
            data = input_file.read()

            # parse file            
            self.movie_actor_list = json.loads(data)
        
    def set_movie_actor_list(self, provided_actor_list):
        self.movie_actor_list = provided_actor_list    
        
    def get_movie_actor_list(self):
        return self.movie_actor_list
    
### FUNCTIONS THAT QUERY DATA
    
    # Query all movies using internal movie_actor_list 
    # Store results of all tweets
    # Convert to JSON
    # Store in Output file
    
    def query_tweets_as_JSON(self, output_file, tweets_per_actor, movie_limit):
                    
        with open(output_file, 'w') as write_file:

            movie_tweet_object_list = []
            processed_query_results = []
            
            movie_counter = 0
            for movie in self.movie_actor_list:
                if movie_counter > movie_limit:
                    break            

                title = movie['title'] 
                release_date = movie['release_date']
                end_date = movie['end_date']
                actors = movie['actors']
                
                if parse(release_date) < parse('2007-01-01'):
                    continue
                
                movie_dictionary = {'title': title, 'actors': []}

                for actor in actors:
                    
                    #Retrieve tweet objects for an actor and store them in a list
                    actor_tweet_object_list = self.query_tweets(actor, release_date, end_date, tweets_per_actor)
                    
                    actor_dictionary = {'name': actor, 'tweets': []}
                    
                    movie_dictionary['actors'].append(actor_dictionary)
                    

                    
                    #Convert each tweet_object into its data
                    for actor_tweet_object in actor_tweet_object_list:
                        actor_dictionary['tweets'].append(self.parse_tweet_object([actor_tweet_object]))

                

                processed_query_results.append(movie_dictionary)
                movie_counter += 1
                
                                        
                                        
            json.dump(processed_query_results, write_file)
    
    
    # Use the GOT3 module to get a tweet
    #Provide dates in "YYYY-MM-DD" Format
    def query_tweets(self, query, start_date = "2006-03-21", end_date = "2019-06-30", max_tweets = 1):
        tweetCriteria = got.manager.TweetCriteria().setQuerySearch(query)\
                                               .setSince(start_date)\
                                               .setUntil(end_date)\
                                               .setMaxTweets(max_tweets)
        
        queried_tweet_object_list = got.manager.TweetManager.getTweets(tweetCriteria)
        return queried_tweet_object_list

    
    
    # Given a list of tweet objects, return a list of dictionaries
    # where each dictionary looks like {date: strTweetDate, text: strText}
    def parse_tweet_object(self, tweet_object_list):
        
        tweet_data_list = []
            
        for tweet in tweet_object_list:
                            
            date = str(tweet.date)
            text = tweet.text
            tweet_data = {"date": date, "text": text}

            tweet_data_list.append(tweet_data)

                                
        return tweet_data_list

## Example of Using TweetFetcher

In [40]:
#Example of how to use TweetFetcher

def fetch_tweets(input_actor_file, output_tweet_file, tweets_per_actor, number_of_movies):
    tweet_fetcher = tweetFetcher()    
    
    tweet_fetcher.create_movie_actor_list_from_file(input_actor_file)
    
    tweet_fetcher.query_tweets_as_JSON(output_tweet_file, tweets_per_actor, number_of_movies)

    
fetch_tweets(input_actor_file = "intermediates/actors.json",
             output_tweet_file = "intermediates/data_file.json",
             tweets_per_actor = 6,
             number_of_movies = 50)

# Sentiment Analysis

In [44]:
class sentiment_analyzer:
    
    
    #Given a selection of raw text, return the sentiment of that text
    def get_text_sentiment(self, raw_text):
        sentiment = TextBlob(raw_text).sentiment
        return sentiment.polarity
    
    
    # if verbose, will print 
    def mass_tweet_analysis(self, archive_file, write_file, verbose = False, logging_mode = False, actor_log_location = None, movie_log_location = None):
        
        with open(archive_file) as json_file:  
            parsed = json.load(json_file)
            
            # Just in case - for ease of regression
            if logging_mode:
                
                movie_sentiment_log = []
                actor_sentiment_log = []
            
            movie_count = 0 
            total_tweet_count = 0
            total_actor_count = 0
            
            for movie in parsed:
                
                movie_count +=1 
                
                if verbose:
                    print("Parsing: " + movie['title'])
            
                movie_sentiment_sum = 0 
                actor_count = 0 
                
                for actor in movie['actors']:
                    
                    actor_sentiment_sum = 0 
                    tweet_count = 0 
                    
                    if len(actor['tweets']) == 0:
                        continue
                        
                    for tweet in actor['tweets']:
                        tweet = tweet[0]
                        tweet['tweet_sentiment'] = self.get_text_sentiment(tweet['text'])
                        
                        tweet_count+=1
                        total_tweet_count+=1
                        actor_sentiment_sum += tweet['tweet_sentiment']
                        
                    actor_sentiment = actor_sentiment_sum / tweet_count
                    actor['actor_sentiment'] = actor_sentiment
                    
                    if verbose:
                        print("\t Parsed " + actor['name'])
                    
                    if logging_mode:    
                        
                        actor_sentiment_log.append((movie['title'], actor['name'], actor_sentiment))
                    
                    actor_count += 1
                    total_actor_count +=1
                    movie_sentiment_sum += actor_sentiment
                    
                movie_sentiment = movie_sentiment_sum / tweet_count
                movie['movie_sentiment'] = movie_sentiment
                
                if verbose:
                
                    print("Finished Parsing " + movie['title'])
                    print()
                    
                if movie_count % 100 == 0:
                    print("Parsed " + movie_count + (" movies"))
                
                if logging_mode:
                    
                    movie_sentiment_log.append((movie['title'], movie_sentiment))
            
            if verbose:
                
                print("Parsed "+str(movie_count)+" movies")
                print("Parsed "+str(total_actor_count)+ " actors")
                print("Parsed "+str(total_tweet_count)+ " tweets")
                print()
            
            if logging_mode:
                                                
                with open(actor_log_location, 'w') as log_file:
                    for item in actor_sentiment_log:
                        log_file.write("".join('%s %s %s \n' % item))
                
                if verbose:
                    
                    print("Wrote actor sentiment logs to " + actor_log_location)
                    print()
                        
                with open(movie_log_location, 'w') as log_file:
                    for item in movie_sentiment_log:
                        log_file.write("".join('%s %s \n' % item))
                    
                if verbose:
                    
                    print("Wrote movie sentiment logs to " + movie_log_location)
                    print()
                          
            with open(write_file, 'w') as json_file: 
                    
                json.dump(parsed, json_file)
                
                if verbose:
                    print("Wrote JSON with sentiment to "+write_file)

## Example of Using Sentiment Analyzer

In [45]:
sentiment_analyzer = sentiment_analyzer()

### No Log, Silent Mode

Computes sentiment from tweets in JSON archive_file and writes a new json blob to write_file.

Only one alert is provided: every 100 movies parsed, it will alert on # of movies processed.

In [None]:
sentiment_analyzer.mass_tweet_analysis(archive_file = "intermediates/data_file.json", 
                                       write_file = "intermediates/sentiment_file.json")

### In Logging Mode and Silent Mode

Computes sentiment from tweets in JSON archive_file and writes a new json blob to write_file.

Only one alert is provided: every 100 movies parsed, it will alert on # of movies processed.

Also produces two log files. 

actor_log_location parameter will be a file where each line is a tuple: (movie_title, actor_name, actor_sentiment)

movie_log_location parameter will be a file where each line is a tuple: (movie_title, movie_sentiment)

In [None]:
sentiment_analyzer.mass_tweet_analysis(archive_file = "intermediates/data_file.json", 
                                       write_file = "intermediates/sentiment_file.json",
                                       logging_mode = True,
                                       actor_log_location = "intermediates/actor_log",
                                       movie_log_location = "intermediates/movie_log")

### No Log, Verbose Mode

Computes sentiment from tweets in JSON archive_file and writes a new json blob to write_file.

Along the way, provides updates on completion.

In [None]:
sentiment_analyzer.mass_tweet_analysis(archive_file = "intermediates/data_file.json", 
                                       write_file = "intermediates/sentiment_file.json",
                                       verbose = True)

###  Logging Mode, Verbose Mode

Computes sentiment from tweets in JSON archive_file and writes a new json blob to write_file.

Along the way, provides updates, and produces log files as described above.

In [46]:
sentiment_analyzer.mass_tweet_analysis(archive_file = "intermediates/data_file.json", 
                                       write_file = "intermediates/sentiment_file.json",
                                       verbose = True,
                                       logging_mode = True,
                                       actor_log_location = "intermediates/actor_log",
                                       movie_log_location = "intermediates/movie_log")

Parsing: Avatar
	 Parsed Sam Worthington
	 Parsed Zoe Saldana
	 Parsed Sigourney Weaver
Finished Parsing Avatar

Parsing: Pirates of the Caribbean: At World's End
	 Parsed Johnny Depp
	 Parsed Orlando Bloom
	 Parsed Keira Knightley
Finished Parsing Pirates of the Caribbean: At World's End

Parsing: Spectre
	 Parsed Daniel Craig
	 Parsed Christoph Waltz
	 Parsed Léa Seydoux
Finished Parsing Spectre

Parsing: The Dark Knight Rises
	 Parsed Christian Bale
	 Parsed Michael Caine
	 Parsed Gary Oldman
Finished Parsing The Dark Knight Rises

Parsing: John Carter
	 Parsed Taylor Kitsch
	 Parsed Lynn Collins
	 Parsed Samantha Morton
Finished Parsing John Carter

Parsing: Spider-Man 3
	 Parsed Tobey Maguire
	 Parsed Kirsten Dunst
	 Parsed James Franco
Finished Parsing Spider-Man 3

Parsing: Tangled
	 Parsed Zachary Levi
	 Parsed Mandy Moore
	 Parsed Donna Murphy
Finished Parsing Tangled

Parsing: Avengers: Age of Ultron
	 Parsed Robert Downey Jr.
	 Parsed Chris Hemsworth
	 Parsed Mark Ruffalo
Fi