# **Tweet Sentiment Analyzer**

Some necessary libray imports.

In [0]:
import pandas as pd
from pandas import DataFrame
import numpy as np
from matplotlib import style
from sklearn import preprocessing
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer




# Data loading and preprocessing.

In [48]:

df = pd.read_csv(r'train.csv',encoding='latin-1')
tweet, sentiment= df['text'],df['sentiment']



df.sentiment.value_counts()


df = df[~df.sentiment.str.contains('\|')]
df = df[df.sentiment != 'nocode']
df = df[df.sentiment != 'not-relevant']


tweet, sentiment= df['text'],df['sentiment']
df.sentiment.value_counts()


print(len (sentiment),len (tweet))


1267 1267


# Vectorizing Data using TF-IDF vectorizer

In [49]:
stopwords = set("from for are & of the a is we they @ in on an . if has had have and or as but was were he she it but and".split())

#vectorizer = CountVectorizer(ngram_range=(1,2), stop_words=stopwords.words('english'))
#vectorizer = TfidfVectorizer(ngram_range=(1,2), stop_words=stopwords.words('english'))
vectorizer = TfidfVectorizer(ngram_range=(1,2), stop_words=stopwords)
#features = vectorizer.fit_transform(review)
vectorizer = vectorizer.fit(tweet.values.astype('U'))
features = vectorizer.transform(tweet.values.astype('U'))
features_nd = features.toarray()


print(len(features_nd[0]))



15983


# Label Encoding and Train Test Split

In [0]:
encoder = preprocessing.LabelEncoder()
encoder = encoder.fit(sentiment)
labels = encoder.transform(sentiment)


x_train, x_test, y_train, y_test  = train_test_split(features_nd, labels, train_size=.70,random_state=134, stratify=df.sentiment.values)



# Naive Bayes

In [51]:

nb = MultinomialNB(alpha=0.21)
nb.fit(x_train, y_train)
nb_pred = nb.predict(x_test)
accuracy_score(y_test, nb_pred)





0.910761154855643

# Random Forest

In [52]:

rf = RandomForestClassifier(verbose=True)
rf.fit(x_train, y_train)
rf_pred = rf.predict(x_test)
accuracy_score(y_test, rf_pred)



[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    5.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


0.9081364829396326

# K-Nearest Neighbour

In [55]:
knn = KNeighborsClassifier( n_neighbors=5, weights='distance' )
knn.fit(x_train, y_train)
knn_pred = knn.predict(x_test)
accuracy_score(y_test, knn_pred)



0.910761154855643

# Gradient Boosting

In [54]:
gb = GradientBoostingClassifier(n_estimators=20, learning_rate=0.095 ,verbose=True)
gb.fit(x_train, y_train)
gb_pred = gb.predict(x_test)
accuracy_score(y_test, gb_pred)


      Iter       Train Loss   Remaining Time 
         1         302.4260           46.58s
         2         276.6801           42.83s
         3         255.7457           40.90s
         4         229.8184           38.62s
         5         207.3746           36.42s
         6         187.6178           34.18s
         7         171.8021           31.88s
         8         161.0296           29.49s
         9         150.8930           27.13s
        10         142.0803           24.57s
        20         105.9333            0.00s


0.9028871391076115

# Multi-Layer Perceptron (Neural Network)

In [58]:
mlp = MLPClassifier(hidden_layer_sizes=99, verbose=True)
mlp.fit(x_train, y_train)
mlp_pred = mlp.predict(x_test)
accuracy_score(y_test, mlp_pred)



Iteration 1, loss = 1.53906989
Iteration 2, loss = 1.43703120
Iteration 3, loss = 1.32792211
Iteration 4, loss = 1.20392135
Iteration 5, loss = 1.06970509
Iteration 6, loss = 0.93222188
Iteration 7, loss = 0.79679651
Iteration 8, loss = 0.67009562
Iteration 9, loss = 0.55796078
Iteration 10, loss = 0.46164664
Iteration 11, loss = 0.38386292
Iteration 12, loss = 0.32209132
Iteration 13, loss = 0.27393038
Iteration 14, loss = 0.23585467
Iteration 15, loss = 0.20558630
Iteration 16, loss = 0.18100567
Iteration 17, loss = 0.16031315
Iteration 18, loss = 0.14243863
Iteration 19, loss = 0.12686048
Iteration 20, loss = 0.11330128
Iteration 21, loss = 0.10133174
Iteration 22, loss = 0.09066126
Iteration 23, loss = 0.08137394
Iteration 24, loss = 0.07307426
Iteration 25, loss = 0.06593868
Iteration 26, loss = 0.05943189
Iteration 27, loss = 0.05396792
Iteration 28, loss = 0.04896791
Iteration 29, loss = 0.04463764
Iteration 30, loss = 0.04097735
Iteration 31, loss = 0.03767039
Iteration 32, los

0.916010498687664

# Support Vector Machine

In [57]:
lr = svm.SVC(C=1,degree=1, kernel = 'poly', verbose=True)
lr.fit(x_train, y_train)
lr_pred = lr.predict(x_test)
accuracy_score(y_test, lr_pred)


[LibSVM]

0.905511811023622

# Tweet Extractor

The following code is used to extract tweets using the Twitter API


Note: Please enter valid twitter API keys in the twitter_credentials.py file before running this app. 

In [0]:
from tweepy import API 
from tweepy import Cursor
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
 
import twitter_credentials
import numpy as np
import pandas as pd
import time


################################# ENTER VALID KEYS IN twitter_credentials.py BEFORE RUNNING THIS APP ###########################


class TwitterClient():
    def __init__(self, twitter_user=None):
        self.auth = TwitterAuthenticator().authenticate_twitter_app()
        self.twitter_client = API(self.auth)

        self.twitter_user = twitter_user

    def get_twitter_client_api(self):
        return self.twitter_client

    def get_user_timeline_tweets(self, num_tweets):
        tweets = []
        for tweet in Cursor(self.twitter_client.user_timeline, id=self.twitter_user).items(num_tweets):
            tweets.append(tweet)
        return tweets

    def get_friend_list(self, num_friends):
        friend_list = []
        for friend in Cursor(self.twitter_client.friends, id=self.twitter_user).items(num_friends):
            friend_list.append(friend)
        return friend_list

    def get_home_timeline_tweets(self, num_tweets):
        home_timeline_tweets = []
        for tweet in Cursor(self.twitter_client.home_timeline, id=self.twitter_user).items(num_tweets):
            home_timeline_tweets.append(tweet)
        return home_timeline_tweets


class TwitterAuthenticator():

    def authenticate_twitter_app(self):
        auth = OAuthHandler(twitter_credentials.CONSUMER_KEY, twitter_credentials.CONSUMER_SECRET)
        auth.set_access_token(twitter_credentials.ACCESS_TOKEN, twitter_credentials.ACCESS_TOKEN_SECRET)
        return auth


class TwitterStreamer():
    """
    Class for streaming and processing live tweets.
    """
    def __init__(self):
        self.twitter_autenticator = TwitterAuthenticator()    

    def stream_tweets(self, fetched_tweets_filename, hash_tag_list):
        # This handles Twitter authetification and the connection to Twitter Streaming API
        listener = TwitterListener(fetched_tweets_filename)
        auth = self.twitter_autenticator.authenticate_twitter_app() 
        stream = Stream(auth, listener)

        # This line filter Twitter Streams to capture data by the keywords: 
        stream.filter(track=hash_tag_list)


class TwitterListener(StreamListener):
    """
    This is a basic listener that just prints received tweets to stdout.
    """
    def __init__(self, fetched_tweets_filename):
        self.fetched_tweets_filename = fetched_tweets_filename

    def on_data(self, data):
        try:
            print(data)
            with open(self.fetched_tweets_filename, 'a') as tf:
                tf.write(data)
            return True
        except BaseException as e:
            print("Error on_data %s" % str(e))
        return True
          
    def on_error(self, status):
        if status == 420:
            # Returning False on_data method in case rate limit occurs.
            return False
        print(status)





def Twitter_Extractor( keyword , Number_of_tweets):
    twitter_client = TwitterClient()
    tweet_analyzer = TweetAnalyzer()

    api = twitter_client.get_twitter_client_api()


    tweets = api.search(q=keyword, lang='en' , count = Number_of_tweets)
    count=0;
    return df = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['text'])



#Driver Code which fetches 50 tweets with the keyword Trump in it.
Tweets= Twitter_Extractor('Trump' , 50)




In [59]:
#Tweets= tweet[1:50]
Tweets=Tweets['text']

features = vectorizer.transform(Tweets.values.astype('U'))
features_nd = features.toarray()
#print(Tweets)
print(len(features_nd[0]))

pred = mlp.predict(features_nd)

senti= encoder.inverse_transform(pred)

data= {'Tweet': Tweets  , 'Predicted Sentiment': senti}

result=  pd.DataFrame(data)
print(result)

15983
                                                 Tweet Predicted Sentiment
2    @SelectShowcase @Tate_StIves ... Replace with ...               happy
3    @Sofabsports thank you for following me back. ...               happy
4    @britishmuseum @TudorHistory What a beautiful ...               happy
5    @NationalGallery @ThePoldarkian I have always ...               happy
9    Lucky @FitzMuseum_UK! Good luck @MirandaStearn...               happy
12   Yr 9 art students are off to the @britishmuseu...               happy
18                @BarbyWT @britishmuseum so beautiful               happy
20                       @britishmuseum awesome museum               happy
21   @nationalgallery #AskTheGallery Why do you pay...               angry
26                        @britishmuseum soo beautiful               happy
27               @NationalGallery I do, I do, I do. :)               happy
28   @tateliverpool one of my favourite paintings! ...               happy
30   The Magna Cart