In [65]:
import re 
import tweepy 
from tweepy import OAuthHandler 
from textblob import TextBlob 
import pandas as pd
from retrying import retry
from tqdm import tqdm_notebook
from datetime import tzinfo, timedelta, datetime

In [146]:
# 1)     polarity: negative vs. positive    (-1.0 => +1.0)
# 2) subjectivity: objective vs. subjective (+0.0 => +1.0)

In [44]:
class TwitterClient(object): 

    def __init__(self):
        self.auth = None
        self.api = None
  
    def clean_tweet(self, tweet): 
        return' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
  
    def get_tweet_sentiment(self, tweet): 

        # create TextBlob object of passed tweet text 
        analysis = TextBlob(self.clean_tweet(tweet)) 
        # set sentiment 
        if analysis.sentiment.polarity > 0.01: 
            return('positive', analysis.sentiment)
        elif analysis.sentiment.polarity < -0.01: 
            return('negative', analysis.sentiment)
        else: 
            return('neutral', analysis.sentiment)
  
    #@retry(wait_fixed=960000) # Waits 16 minutes to ask again in case of exceeding limits
    def get_tweets(self, query, count): 
        
        # keys and tokens from the Twitter Dev Console 
        consumer_key = 'LikHDXdKm5uI5fnNVJqRcurmi'
        consumer_secret = 'kSvyN9dMKYMMbVd9IPsyPhWUEqdmzzVR0po6DUhwrmLARDB6y4'
        access_token = '236803367-0J9UCR2IOas4zlp6pDziQdmXq5yvkROpldq70a0O'
        access_token_secret = 'tOVubG5jpLvtw8WGEW4usjdeTSHXCmZboVCbx32VgmRrG'
  
        # attempt authentication 
        try: 
            # create OAuthHandler object 
            self.auth = OAuthHandler(consumer_key, consumer_secret) 
            # set access token and secret 
            self.auth.set_access_token(access_token, access_token_secret) 
            # create tweepy API object to fetch tweets 
            self.api = tweepy.API(self.auth) 
        except: 
            print("Error: Authentication Failed") 

        # empty list to store parsed tweets 
        tweets = {"Tweet": [],
                 "Sentiment": [],
                 "Kind": []}
  
        # Excluding retweets
        query = query + ' -filter:retweets'
        
        try: 
            # call twitter api to fetch tweets 
            fetched_tweets = tweepy.Cursor(self.api.search,
                                           q=query,
                                           lang = "en").items(count)
  
            # parsing tweets one by one 
            for tweet in fetched_tweets: 
  
                # saving text of tweet 
                parsed_tweet = tweet.text 
                # saving sentiment of tweet 
                kind, sentiment = self.get_tweet_sentiment(tweet.text) 
                
                tweets['Tweet'].append(parsed_tweet)
                tweets["Sentiment"].append(sentiment)
                tweets['Kind'].append(kind)
  
            # return parsed tweets 
            return tweets 
  
        except tweepy.TweepError as e: 
            # print error (if any) 
            print("Error : " + str(e)) 

In [73]:
def main(query_df, count_int): 
    
    # creating object of TwitterClient Class 
    api = TwitterClient() 
    
    # creating empty df
    tweets_df = pd.DataFrame(columns=['Tweet','Sentiment','Kind','Keyword','Link'])
    
    # calling function to get tweets 
    for each_kw, each_link in tqdm_notebook(zip(query_df.keyword, query_df.link)):
        player_tweets = api.get_tweets(query = each_kw, count = count_int) 
        player_tweets_df = pd.DataFrame(player_tweets)
        player_tweets_df["Keyword"] = each_kw
        player_tweets_df['Link'] = each_link
        tweets_df = pd.concat([tweets_df, player_tweets_df], axis=0)
        print(each_link)
    
    return(tweets_df)

def obtain_summary(tweets):
    
    players = list(tweets.Keyword.value_counts().index)
    
    # creating empty df
    sentiment_df = pd.DataFrame(columns=['Keyword', 'Link', 'Positive','Negative','Neutral'])
    
    for each in players:
        
        # percentage of positive tweets
        ptweets = [tweet for tweet in tweets.Tweet[(tweets.Keyword == each) & (tweets.Kind == 'positive')]]  
        pospercentage = (100*len(ptweets)/len(tweets.Tweet[tweets.Keyword == each]))
        # percentage of negative tweets 
        ntweets = [tweet for tweet in tweets.Tweet[(tweets.Keyword == each) & (tweets.Kind == 'negative')]]
        negpercentage = (100*len(ntweets)/len(tweets.Tweet[tweets.Keyword == each]))
        # percentage of neutral tweets 
        neupercentage = (100-pospercentage-negpercentage)
        
        link = tweets.Link[tweets.Keyword == each]
        
        summary = [each, link, pospercentage, negpercentage, neupercentage]
        
        sentiment_df.loc[len(sentiment_df)] = summary
            
    return(sentiment_df)

def print_status():
    
    consumer_key = ''
    consumer_secret = ''
    access_token = ''
    access_token_secret = ''


    auth = OAuthHandler(consumer_key, consumer_secret) 
    auth.set_access_token(access_token, access_token_secret) 
    api = tweepy.API(auth)
    
    data = api.rate_limit_status()
    remaining_tweets = data['resources']['search']['/search/tweets']['remaining']
    remaining_time = data['resources']['search']['/search/tweets']['reset']
    
    print('Remaining queries: ' + str(remaining_tweets))
    if remaining_tweets == 0:
        print('Remaining time: ' + datetime.utcfromtimestamp(remaining_time).strftime('%Y-%m-%d %H:%M:%S'))

### Running functions

In [85]:
final_summary = pd.DataFrame(columns=['Keyword', 'Link', 'Positive','Negative','Neutral'])

In [5]:
sampled_players = pd.read_csv('/Users/gonzaloferreiro/Documents/GA_Materials/DS_Inmersive/DSI8-lessons/projects/project-capstone/Moving_forward/Final_datasets/final_list_of_sampled_players.csv')

In [6]:
sampled_players.head()

Unnamed: 0,player,lastname,team,link_tm,link_ws
0,Max Clark,Clark,Vitesse,/max-clark/profil/spieler/183291,/Players/313996/Show/Max-Clark
1,Sergio Postigo,Postigo,Levante,/sergio-postigo/profil/spieler/158791,/Players/109896/Show/Sergio-Postigo
2,Stanislav Iljutcenko,Iljutcenko,Duisburg,/stanislav-iljutcenko/profil/spieler/191292,/Players/132901/Show/Stanislav-Iljutcenko
3,Iván Marcone,Marcone,Boca Juniors,/ivan-marcone/profil/spieler/90451,/Players/125246/Show/Iván-Marcone
4,Florent Mollet,Mollet,Montpellier,/florent-mollet/profil/spieler/222859,/Players/114169/Show/Florent-Mollet


In [52]:
# Creating query dataframe

query_dict = {'keyword':[], 'link':[]}
for each_team, each_name, each_link in zip(sampled_players.team, sampled_players.player, sampled_players.link_ws):
    concatenated = each_name + " " + each_team
    query_dict['keyword'].append(concatenated)
    query_dict['link'].append(each_link)

In [53]:
query_df = pd.DataFrame(query_dict)

In [135]:
pre_summary_df = obtain_summary(tweets_for_sentiment)

In [None]:
# Function to run query every 15 minutes

import time

its = 0
from_pos = 220
while True:
    to_pos = from_pos + 55
    tweets_for_sentiment = main(query_df.iloc[from_pos:to_pos], 200)
    pre_summary_df = obtain_summary(tweets_for_sentiment)
    final_summary = pd.concat([final_summary, pre_summary_df], axis=0)
    its = its + 1
    from_pos = from_pos + 55
    if its == 17:
        break
    else:
        print(from_pos)
        print(final_summary.shape)
        time.sleep(960)

In [147]:
print_status()

Remaining queries: 180


In [None]:
# Obtaining final players

tweets_for_sentiment = main(query_df.iloc[1188], 200)

In [135]:
pre_summary_df = obtain_summary(tweets_for_sentiment)

In [131]:
final_summary = pd.concat([final_summary, pre_summary_df], axis=0)

In [132]:
final_summary.head()

Unnamed: 0,Keyword,Link,Positive,Negative,Neutral
0,Trent Alexander-Arnold Liverpool,0 /Players/318871/Show/Trent-Alexander-Ar...,60.0,4.5,35.5
1,Pierre-Emerick Aubameyang Arsenal,0 /Players/44120/Show/Pierre-Emerick-Auba...,33.0,3.0,64.0
2,Neymar Paris Saint-Germain,0 /Players/50835/Show/Neymar 1 /Play...,24.0,10.5,65.5
3,Jordan Henderson Liverpool,0 /Players/68659/Show/Jordan-Henderson 1 ...,54.5,8.0,37.5
4,Shkodran Mustafi Arsenal,0 /Players/80921/Show/Shkodran-Mustafi 1 ...,40.397351,4.635762,54.966887


In [133]:
final_summary.shape

(551, 5)

In [144]:
final_uniques = final_summary.drop_duplicates(subset=['Keyword'])

In [145]:
final_uniques.shape

(408, 5)

### Saving and testing loading

In [163]:
final_uniques.to_csv(r'/Users/gonzaloferreiro/Documents/GA_Materials/DS_Inmersive/DSI8-lessons/projects/project-capstone/Moving_forward/Final_datasets/final_tweets.csv',index=False)

In [165]:
test_loading = pd.read_csv('/Users/gonzaloferreiro/Documents/GA_Materials/DS_Inmersive/DSI8-lessons/projects/project-capstone/Moving_forward/Final_datasets/final_tweets.csv')

In [167]:
final_uniques.head()

Unnamed: 0,Keyword,Link,Positive,Negative,Neutral
0,Trent Alexander-Arnold Liverpool,0 /Players/318871/Show/Trent-Alexander-Ar...,60.0,4.5,35.5
1,Pierre-Emerick Aubameyang Arsenal,0 /Players/44120/Show/Pierre-Emerick-Auba...,33.0,3.0,64.0
2,Neymar Paris Saint-Germain,0 /Players/50835/Show/Neymar 1 /Play...,24.0,10.5,65.5
3,Jordan Henderson Liverpool,0 /Players/68659/Show/Jordan-Henderson 1 ...,54.5,8.0,37.5
4,Shkodran Mustafi Arsenal,0 /Players/80921/Show/Shkodran-Mustafi 1 ...,40.397351,4.635762,54.966887


In [168]:
test_loading.head()

Unnamed: 0,Keyword,Link,Positive,Negative,Neutral
0,Trent Alexander-Arnold Liverpool,0 /Players/318871/Show/Trent-Alexander-Ar...,60.0,4.5,35.5
1,Pierre-Emerick Aubameyang Arsenal,0 /Players/44120/Show/Pierre-Emerick-Auba...,33.0,3.0,64.0
2,Neymar Paris Saint-Germain,0 /Players/50835/Show/Neymar\n1 /Pla...,24.0,10.5,65.5
3,Jordan Henderson Liverpool,0 /Players/68659/Show/Jordan-Henderson\n1...,54.5,8.0,37.5
4,Shkodran Mustafi Arsenal,0 /Players/80921/Show/Shkodran-Mustafi\n1...,40.397351,4.635762,54.966887


### Checking just in case players not founded 

In [155]:
test = query_df[~query_df.keyword.isin(final_uniques.Keyword)]

In [None]:
tweets_for_sentiment = main(test.iloc[:100], 200)

In [158]:
test_summary_df = obtain_summary(tweets_for_sentiment)

In [159]:
test_summary_df

Unnamed: 0,Keyword,Link,Positive,Negative,Neutral
0,Max Clark Vitesse,0 /Players/313996/Show/Max-Clark 1 /Play...,33.333333,66.666667,-1.421085e-14
