# Final Project

In [3]:
import re
import tweepy
from tweepy import OAuthHandler
from textblob import TextBlob
import json
import pandas as pd
import csv
import time

class TwitterClient(object):
        def __init__(self): 
            self.team_names=['DelhiDaredevils','KingsXIPunjab','KolkataKnightRiders','MumbaiIndians','RoyalChallengersBangalore','SunrisersHyderabad','PuneSupergiants','GujratLions']
            self.top_batsman=['Cullam','Rana','Pandey','Raina','Warner','Amla','Gambhir','Samson','Dhawan','Buttler','Smith','Uthappa','Vohra','Jadhav','Pollard','Maxwell','Kohli','Pant','Billings','Henriques','Villers','Gayle','Karthik','Patel','Lynn','Rahane','Finch','Stokes','Iyer','Tiwary']
            self.top_bowler=['Kumar','Morris','Rashid','Clenaghan','Tahir','Tye','Chahal','Cummins','Zahir','Woakes','Yadav','Kuldeep','ARPatel','Badree','CoulterNile','Aaron','Mishra','Pandey','Nehra','SandeepSharma','Bumrah','Nadeem','Thakur','Negi','Stokes','Narine','Malinga','MMSharma','Rajpoot']
            self.fname = 'raw_tweets_static.json'
            self.team_file=open(self.fname, 'r')
            self.batsman_file=open(self.fname, 'r')
            self.bowler_file=open(self.fname, 'r')
            self.tweets_by_date=open(self.fname, 'r')
        #Clean Tweets    
        def clean_tweet(self, tweet):
            return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
        #Get tweet sentiment
        def get_tweet_sentiment(self, tweet):
            # create TextBlob object of passed tweet text
            analysis = TextBlob(self.clean_tweet(tweet))
            # set sentiment
            if analysis.sentiment.polarity > 0:
                return 'positive'
            elif analysis.sentiment.polarity == 0:
                return 'neutral'
            else:
                return 'negative'
        #Set tweet count by team and date    
        def get_tweets_by_date(self):
            # empty list to store parsed tweets
            tweets_by_date = []
            try:
                for line in self.tweets_by_date:
                    tweet = json.loads(line)
                    for team in self.team_names:
                        if (team.lower() in str(tweet).lower()):
                            parsed_tweet = {}
                            ts = time.strftime('%Y-%m-%d', time.strptime(tweet['created_at'],'%a %b %d %H:%M:%S +0000 %Y'))
                            # saving popularity of batsman
                            parsed_tweet['team'] = team.lower()
                            parsed_tweet['Date'] = ts
                            tweets_by_date.append(parsed_tweet)
                        # return parsed tweets
                self.tweets_by_date.close()          
                return tweets_by_date
            except tweepy.TweepError as e:
                # print error (if any)
                print("Error : " + str(e))
        #Set tweet sentimental score by team        
        def get_team_tweets(self):
            # empty list to store parsed tweets
            tweets = []
            try:
                for line in self.team_file:
                    tweet = json.loads(line)
                    for team in self.team_names:
                        if (team.lower() in str(tweet).lower()):
                            parsed_tweet = {}
                            # saving text of tweet
                            parsed_tweet['text'] = tweet['text']
                            # saving sentiment of tweet
                            parsed_tweet['sentiment'] = self.get_tweet_sentiment(tweet['text'])
                            parsed_tweet['team'] = team.lower()
                            # appending parsed tweet to tweets list
                            if tweet['retweet_count'] > 0:
                                # if tweet has retweets, ensure that it is appended only once
                                if parsed_tweet not in tweets:
                                    tweets.append(parsed_tweet)
                            else:
                                tweets.append(parsed_tweet)
                        # return parsed tweets
                self.team_file.close()        
                return tweets
            except tweepy.TweepError as e:
                # print error (if any)
                print("Error : " + str(e))
        #Set batsman popularity        
        def get_batsman_tweets(self):
            # empty list to store parsed tweets
            batsmans = []
            try:
                for line in self.batsman_file:
                    tweet = json.loads(line)
                    for batsman in self.top_batsman:
                        if (batsman.lower() in str(tweet).lower()):
                            parsed_tweet = {}
                            # saving text of tweet
                            parsed_tweet['text'] = tweet['text']
                            # saving popularity of batsman
                            parsed_tweet['batsman'] = batsman.lower()
                            # appending parsed tweet to tweets list
                            if tweet['retweet_count'] > 0:
                                # if tweet has retweets, ensure that it is appended only once
                                if parsed_tweet not in batsmans:
                                    batsmans.append(parsed_tweet)
                            else:
                                batsmans.append(parsed_tweet)
                        # return parsed tweets
                self.batsman_file.close()          
                return batsmans
            except tweepy.TweepError as e:
                # print error (if any)
                print("Error : " + str(e))
        #Set bowler popularity        
        def get_bowler_tweets(self):
            # empty list to store parsed tweets
            bowlers = []
            try:
                for line in self.bowler_file:
                    tweet = json.loads(line)
                    for bowler in self.top_bowler:
                        if (bowler.lower() in str(tweet).lower()):
                            parsed_tweet = {}
                            # saving text of tweet
                            parsed_tweet['text'] = tweet['text']
                            # saving popularity of bowler
                            parsed_tweet['bowler'] = bowler.lower()
                            # appending parsed tweet to tweets list
                            if tweet['retweet_count'] > 0:
                                # if tweet has retweets, ensure that it is appended only once
                                if parsed_tweet not in bowlers:
                                    bowlers.append(parsed_tweet)
                            else:
                                bowlers.append(parsed_tweet)
                        # return parsed tweets
                self.bowler_file.close()        
                return bowlers
            except tweepy.TweepError as e:
                # print error (if any)
                print("Error : " + str(e))
        #Get team sentimental score        
        def get_team_rating(self):
            tweets = self.get_team_tweets()
            colnames = ('Team Name','PositiveCount','NegativeCount','NeutralCount','TotalTweet','PositivePercent','NegativePercent','NeutralPercent')
            team_review=[]
            for team in self.team_names:
                ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive' and tweet['team'] ==team.lower()]
                # picking negative tweets from tweets
                ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative' and tweet['team'] ==team.lower()]
                # picking neutral tweets from tweets
                neutweets = [tweet for tweet in tweets if tweet['sentiment'] == 'neutral' and tweet['team'] ==team.lower()]
                # percentage of positive tweets
                positive_percent=round((100*len(ptweets)/len(tweets)),2)
                # percentage of negative tweets
                negative_percent=round((100*len(ntweets)/len(tweets)),2)
                # percentage of neutral tweets
                neutral_percent=round((100*len(neutweets)/len(tweets)),2)
                team_review.append(dict(zip(colnames, [team,len(ptweets),len(ntweets),len(neutweets),len(tweets),positive_percent,negative_percent,neutral_percent])))
            team_df=pd.DataFrame(team_review).sort_values(['PositiveCount'],ascending=[False])
            team_df.to_csv("team_review.csv",index=False, na_rep="",columns=['Team Name','PositiveCount','NegativeCount','NeutralCount','TotalTweet','PositivePercent','NegativePercent','NeutralPercent'])
        # Get batsman popularity  
        def get_batsman_rating(self):
            batsmans= self.get_batsman_tweets()
            colnames = ('Batsman Name','Popularity Count')
            batsman_review=[]
            for batsman in self.top_batsman:
                popularity_count=0
                ptweets = [tweet for tweet in batsmans if tweet['batsman'] ==batsman.lower()]
                # percentage of positive tweets
                if (len(ptweets)>0):
                    popularity_count=len(ptweets)
                else:
                    popularity_count=0
                batsman_review.append(dict(zip(colnames, [batsman,popularity_count])))
            batsman_df=pd.DataFrame(batsman_review).sort_values(['Popularity Count'],ascending=[False])
            batsman_df.to_csv("batsman_review.csv",index=False, na_rep="",columns=['Batsman Name','Popularity Count'])
        #Get bowler popularity    
        def get_bowler_rating(self):
            bowlers= self.get_bowler_tweets()
            colnames = ('Bowler Name','Popularity Count')
            bowler_review=[]
            for bowler in self.top_bowler:
                popularity_count=0
                ptweets = [tweet for tweet in bowlers if tweet['bowler'] ==bowler.lower()]
                # percentage of positive tweets
                if (len(ptweets)>0):
                    popularity_count=len(ptweets)
                else:
                    popularity_count=0   
                bowler_review.append(dict(zip(colnames, [bowler,popularity_count])))
            bowler_df=pd.DataFrame(bowler_review).sort_values(['Popularity Count'],ascending=[False])
            bowler_df.to_csv("bowler_review.csv",index=False, na_rep="",columns=['Bowler Name','Popularity Count'])
        #Get tweet count by team and date    
        def get_tweets_by_date_count(self):
            tweets= self.get_tweets_by_date()
            #colnames = ('Team','Date','Count')
            year='2017'
            month='04'
            day=22
            tweets_by_date_count=[]
            while (day<=29):
                for team in self.team_names:
                    _date=year+"-"+month+"-"+str(day)
                    count=0
                    ptweets = [tweet for tweet in tweets if tweet['team'] ==team.lower() and tweet['Date'] ==_date]
                    # percentage of positive tweets
                    if (len(ptweets)>0):
                        count=len(ptweets)
                    else:
                        count=0
                    if (count>0):
                        tweets_by_date_count.append([team,_date,count])
                day=day+1
            tweets_by_date_df=pd.DataFrame(tweets_by_date_count)
            tweets_by_date_df.columns = ['Team','Date','Count']
            tweets_by_date_df.to_csv("tweets_by_date.csv",index=False,na_rep="",colnames = ['Team','Date','Count'])
                   
twitter_client=TwitterClient()
twitter_client.get_tweets_by_date_count()
twitter_client.get_team_rating()
twitter_client.get_batsman_rating()
twitter_client.get_bowler_rating()