A notebook for performing sentiment analysis on celebrity chef tweets, to assess how positively--and unbiasedly--excited their fan bases are for them. While this analysis didn't get a place at the table in the final thrust of the current iteration of this project, it was fun to do. Perhaps you'll find it intriguing, too.

#0. Setup

In [9]:
import pymongo
import pandas as pd
import numpy as np
from tqdm import tqdm
from textblob import TextBlob

In [3]:
client = pymongo.MongoClient()
chefs = client.chefs_db
tweets = client.chefs_db.celebrity_tweets

In [4]:
tweets_df = pd.DataFrame(columns=["Chef", "Tweet", "Hashtags", "Terms"])
count = 0
for tweet in tqdm(tweets.find({}, {"chef": 1, "tweet": 1, "hashtags": 1, "terms": 1,
                                     "_id": 0})):
    row = pd.DataFrame({"Chef":[tweet["chef"]], "Tweet": [tweet["tweet"]], 
                        "Hashtags": [tweet["hashtags"]], 
                        "Terms": [tweet["terms"]]})
    #[term for term in tweet["terms"] if term != u'RT']
    row.index = [count]
    tweets_df = tweets_df.append(row)
    count += 1



In [5]:
tweets_by_chef = tweets_df.groupby('Chef')

In [11]:
num_tweets = {}
for chef, group in tweets_by_chef:
    num_tweets[chef] = len(group)

In [12]:
num_tweets

{u'Alex Guarnaschelli': 177,
 u'Alton Brown': 3638,
 u'Bobby Flay': 952,
 u'Duff Goldman': 277,
 u'Giada De Laurentiis': 323,
 u'Guy Fieri': 1011,
 u'Ina Garten': 184,
 u'Marcela Valladolid': 77,
 u'Ree Drummond': 1363,
 u'Robert Irvine': 1566,
 u'Sunny Anderson': 473,
 u'Trisha Yearwood': 3353,
 u'Valerie Bertinelli': 546}

#1. Analysis

In [6]:
overall_sentiment = {}

for chef, group in tqdm(tweets_by_chef):
    #tweets = [word.lower() for word in tweet for tweet in group['Tweet']]
    flattened = ' '.join(group['Tweet'])
    overall_sentiment[chef] = TextBlob(flattened).sentiment



In [7]:
overall_sentiment

{u'Alex Guarnaschelli': Sentiment(polarity=0.32679404953642155, subjectivity=0.5520099192146438),
 u'Alton Brown': Sentiment(polarity=0.20729714841301863, subjectivity=0.5234751930168455),
 u'Bobby Flay': Sentiment(polarity=0.27242927743836776, subjectivity=0.477308313155771),
 u'Duff Goldman': Sentiment(polarity=0.27260972626355223, subjectivity=0.4632258812615951),
 u'Giada De Laurentiis': Sentiment(polarity=0.25374179468244723, subjectivity=0.5691501630478002),
 u'Guy Fieri': Sentiment(polarity=0.2867979275928453, subjectivity=0.5849825331604992),
 u'Ina Garten': Sentiment(polarity=0.2680021740141368, subjectivity=0.5603314112103178),
 u'Marcela Valladolid': Sentiment(polarity=0.6810437694099379, subjectivity=0.853105590062112),
 u'Ree Drummond': Sentiment(polarity=0.31747155031574215, subjectivity=0.6002326562686514),
 u'Robert Irvine': Sentiment(polarity=0.32760423214821466, subjectivity=0.5731099333236773),
 u'Sunny Anderson': Sentiment(polarity=0.2236912304945533, subjectivity=0

#2. Scoring 

In [13]:
sentiment_scores = {}
for chef, sentiment in overall_sentiment.items():
    polarity = sentiment[0]
    subjectivity = sentiment[1]
    
    ##1st pass: naive baseline
    #sentiment_score = polarity/subjectivity
    
    ##2nd pass: weight by number of tweets
    #sentiment_score = polarity/subjectivity * num_tweets[chef]
    
    ##3rd pass: take logs!
    #sentiment_score = (np.log(polarity)/np.log(subjectivity) * np.log(num_tweets[chef]))
    
    ##4th pass: render scores interpretable (think grades)
    sentiment_score = 100 - (np.log(polarity)/np.log(subjectivity) * np.log(num_tweets[chef]))
    sentiment_score = int(np.round(sentiment_score))
    sentiment_scores[chef] = sentiment_score

In [14]:
sentiment_scores 

{u'Alex Guarnaschelli': 90,
 u'Alton Brown': 80,
 u'Bobby Flay': 88,
 u'Duff Goldman': 91,
 u'Giada De Laurentiis': 86,
 u'Guy Fieri': 84,
 u'Ina Garten': 88,
 u'Marcela Valladolid': 89,
 u'Ree Drummond': 84,
 u'Robert Irvine': 85,
 u'Sunny Anderson': 85,
 u'Trisha Yearwood': 82,
 u'Valerie Bertinelli': 77}