## Hands on Análise de Sentimentos 
#### Betina Costa

Verifique se você está com as librarys instaladas (vide requirements.txt)

https://github.com/betinacosta/handson-sentiment-analysis/blob/master/README.md

In [43]:
import tweepy
import numpy as np
import json
from textblob import TextBlob
import pandas as pd

In [44]:
def twitter_api(key_file):
    keys = json.loads(open(key_file,"r").read())
    consumer_key=keys['TWITTER_API_KEY']
    consumer_secret=keys['TWITTER_API_SECRET_KEY']

    access_token=keys['TWITTER_ACCESS_TOKEN']
    access_token_secret=keys['TWITTER_SECRET_ACCESS_TOKEN']
    auth = tweepy.OAuthHandler(consumer_key,consumer_secret)
    auth.set_access_token(access_token,access_token_secret)
    api = tweepy.API(auth)
    return api

In [45]:
def is_english(text):
    if text.detect_language() == 'en':
        return True
    return False

In [46]:
def get_weighted_polarity_mean(valid_tweets):
    return np.average(valid_tweets['polarity'],weights=valid_tweets['subjectivity'])

In [47]:
def get_polarity_mean(valid_tweets):
    return np.mean(valid_tweets['polarity'])

In [48]:
def tweet_analysis(query,items,api):
    
    cols = ['created_at','id_str','text','user_id_str','polarity','subjectivity'] 
    df = pd.DataFrame(columns=cols)
    
    tweets = tweepy.Cursor(api.search, q=query + " -filter:retweets").items(items)
    subjectivities = []
    polarities = []
    
    for tweet in tweets:
        phrase = TextBlob(tweet.text)
        #import pdb;pdb.set_trace()
        
        if not is_english(phrase):
            phrase = TextBlob(str(phrase.translate(to='en')))

        if phrase.sentiment.polarity != 0.0 and phrase.sentiment.subjectivity != 0.0:
            polarities.append(phrase.sentiment.polarity)
            subjectivities.append(phrase.sentiment.subjectivity)
            
        tweet_list = [  tweet._json['created_at'],
                        tweet._json['id_str'],
                        tweet._json['text'],
                        tweet._json['user']['id_str'],
                        phrase.sentiment.polarity,
                        phrase.sentiment.subjectivity
                     ]
        tweet_list = dict(zip(df.columns, tweet_list))
        df = df.append(tweet_list, ignore_index=True)

    df['created_at'] = pd.to_datetime(df['created_at'])
    df['weighted_polarity_mean'] = get_weighted_polarity_mean(df)
    df['polarity_mean'] = get_polarity_mean(df)
    
    #df['ressult_mean'] = 'Positive' if df['polarity_mean'] > 0.0 else 'False'
    #df['result_weighted'] = 'Positive' if df['weighted_polarity_mean'] > 0.0 else 'False'
    
    return df

In [49]:
def print_result(mean):
    if mean > 0.0:
        print('POSITIVE')
    elif mean == 0.0:
        print('NEUTRO')
    else:
        print('NEGATIVE')

In [None]:
if __name__ == "__main__":
    query = input("Entre a query de analise: ")
    items = int(input("Entre a qtd de analise: "))
    analysis = tweet_analysis(query,items,twitter_api('twitter.key'))

In [34]:
analysis.sample(2)

Unnamed: 0,created_at,id_str,text,user_id_str,polarity,subjectivity,weighted_polarity_mean,polarity_mean
1,2018-10-17 14:34:28,1052568530837757952,@diana1dlz @cloverc8 @dbongino Stop diverting ...,1010996827657854978,0.0,0.0,0.4,0.2
0,2018-10-17 14:34:28,1052568533832486912,Khashoggi was a friend of Osama Bin Laden. Kha...,47133990,0.4,0.9,0.4,0.2
