In [None]:
### data
import pandas as pd
import numpy as np

### APIs
import tweepy

## Plots
from IPython.display import display
import matplotlib.pyplot as plt
import seaborn as sea
get_ipython().magic('matplotlib inline')

# Sentiment analysis
from textblob import TextBlob
import re, os, json

In [None]:
with open(os.path.join(os.getcwd(),"data/credentials.json")) as data_file:    
    key = json.load(data_file)

#print key["API_KEY"], key["API_SECRET"], key["ACCESS_TOKEN"], key["ACCESS_TOKEN_SECRET"]

In [None]:
def twitter_setup():
    #authentication
    auth = tweepy.OAuthHandler(key["API_KEY"], key["API_SECRET"])
    auth.set_access_token(key["ACCESS_TOKEN"], key["ACCESS_TOKEN_SECRET"])    
    return tweepy.API(auth)

extractor = twitter_setup()



In [None]:
tweets = extractor.user_timeline(screen_name="cdvel", count=100)
print ("No. tweets extracted: {}\n".format(len(tweets)))

In [None]:
print ("Latest 5 tweets")
for tweet in tweets[:5]:
    print (tweet.text)
    print()


In [None]:
data = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweets'])
data['length']  = np.array([len(tweet.text) for tweet in tweets])
data['id']   = np.array([tweet.id for tweet in tweets])
data['created'] = np.array([tweet.created_at for tweet in tweets])
data['source'] = np.array([tweet.source for tweet in tweets])
data['no_likes'] = np.array([tweet.favorite_count for tweet in tweets])
data['no_retweets'] = np.array([tweet.retweet_count for tweet in tweets])
data['slug'] = np.array([''.join([tweet.user.screen_name, '/status/', str(tweet.id)])  for tweet in tweets])


#print tweet
display(data.head(10))

In [None]:
mean_length = np.mean(data['length'])

print("Tweet's avg length: {}".format(mean_length))

likes_max = np.max(data['no_likes'])
retweets_max = np.max(data['no_retweets'])

most_likes = data[data.no_likes == likes_max].index[0]
most_retweets = data[data.no_retweets == retweets_max].index[0]

print ("\nMost liked: \n> {} length={} ♡={}".format(data['Tweets'][most_likes].encode('utf-8'), data['length'][most_likes], likes_max))
print ("\nMost retweeted: \n> {} length={} RTs={}".format(data['Tweets'][most_retweets].encode('utf-8'), data['length'][most_retweets], retweets_max))

In [None]:
series_length = pd.Series(data=data['length'].values, index=data['created'])
series_likes  = pd.Series(data=data['no_likes'].values, index=data['created'])
series_retweets = pd.Series(data=data['no_retweets'].values, index=data['created'])

series_length.plot(figsize=(16,4), color='r')

In [None]:
series_likes.plot(figsize=(16,4), label='Likes', legend=True)
series_retweets.plot(figsize=(16,4), label='Retweets', legend=True)

In [None]:
all_sources = []
for source in data['source']:
    all_sources.append(str(source))

all_sources = list(set(all_sources))    
print("Sources: {}".format(all_sources))


In [None]:
percent = np.zeros(len(all_sources))

for src in data['source']:
    percent[all_sources.index(src)] += 1

percent /= 100

pie_chart = pd.Series(percent, index=all_sources, name='sources')
pie_chart.plot.pie(fontsize=11, autopct='%.2f', figsize=(6,6))

In [None]:
def analyse(tweet):
    analysis = TextBlob(tweet)
    return  1 if analysis.polarity > 0 else 0 if analysis.polarity == 0 else -1

data['sentiment'] = np.array([analyse(tweet) for tweet in data['Tweets']])

display(data.head(10))

In [None]:
positives = [tweet for index, tweet in enumerate(data['Tweets']) if data['sentiment'][index] > 0]
neutral   = [tweet for index, tweet in enumerate(data['Tweets']) if data['sentiment'][index] == 0]
negatives = [tweet for index, tweet in enumerate(data['Tweets']) if data['sentiment'][index] < 0]

print("👍: {}, 👎: {}, 😐: {} ". format(len(positives), len(negatives), len(neutral)))

### References 
- https://github.com/avisaxena33/Twitter-Data-Sentiment-Analysis-with-Python-Workshop/blob/master/twitter.py

### Notes
oauthlib, textblob for OSX64
- https://anaconda.org/asmeurer/oauthlib
- https://anaconda.org/sloria/textblob
    