#                                      SENTIMENT ANALYSIS

## Importing libraries

In [1]:
import re
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns
import string
import nltk
import warnings 
warnings.filterwarnings("ignore", category=DeprecationWarning)

%matplotlib inline

## Reading training data 

In [2]:
train = pd.read_csv('https://raw.githubusercontent.com/dD2405/Twitter_Sentiment_Analysis/master/train.csv')
train_original=train.copy()
train.head()

Unnamed: 0,id,label,tweet
0,1,0,@user when a father is dysfunctional and is s...
1,2,0,@user @user thanks for #lyft credit i can't us...
2,3,0,bihday your majesty
3,4,0,#model i love u take with u all the time in ...
4,5,0,factsguide: society now #motivation


## Reading Test data

In [3]:
test = pd.read_csv('https://raw.githubusercontent.com/dD2405/Twitter_Sentiment_Analysis/master/test.csv')
test_original=test.copy()
test.head()

Unnamed: 0,id,tweet
0,31963,#studiolife #aislife #requires #passion #dedic...
1,31964,@user #white #supremacists want everyone to s...
2,31965,safe ways to heal your #acne!! #altwaystohe...
3,31966,is the hp and the cursed child book up for res...
4,31967,"3rd #bihday to my amazing, hilarious #nephew..."


## Combining data

In [4]:
data = train.append(test, ignore_index='true', sort='true')

In [5]:
data.head()

Unnamed: 0,id,label,tweet
0,1,0.0,@user when a father is dysfunctional and is s...
1,2,0.0,@user @user thanks for #lyft credit i can't us...
2,3,0.0,bihday your majesty
3,4,0.0,#model i love u take with u all the time in ...
4,5,0.0,factsguide: society now #motivation


In [6]:
data.tail()

Unnamed: 0,id,label,tweet
49154,49155,,thought factory: left-right polarisation! #tru...
49155,49156,,feeling like a mermaid ð #hairflip #neverre...
49156,49157,,#hillary #campaigned today in #ohio((omg)) &am...
49157,49158,,"happy, at work conference: right mindset leads..."
49158,49159,,"my song ""so glad"" free download! #shoegaze ..."


In [7]:
data.shape

(49159, 3)

## Removing handles 

In [18]:
def remove_handles(text,pattern):
    
    twitter_handles = re.findall(pattern,text)
    
    for each_handle in twitter_handles:
        text = re.sub(each_handle,"",text)
    
    return text

In [19]:
data['filtered_tweets']=np.vectorize(remove_handles)(data['tweet'],"@['\w']*")

In [20]:
data.head()

Unnamed: 0,id,label,tweet,filtered_tweets
0,1,0.0,@user when a father is dysfunctional and is s...,when a father is dysfunctional and is so sel...
1,2,0.0,@user @user thanks for #lyft credit i can't us...,thanks for #lyft credit i can't use cause th...
2,3,0.0,bihday your majesty,bihday your majesty
3,4,0.0,#model i love u take with u all the time in ...,#model i love u take with u all the time in ...
4,5,0.0,factsguide: society now #motivation,factsguide: society now #motivation


## Removing punctuations

In [30]:
data['filtered_tweets'] = data['filtered_tweets'].str.replace("[^a-zA-Z#]", " ")
data.head(5)

Unnamed: 0,id,label,tweet,filtered_tweets
0,1,0.0,@user when a father is dysfunctional and is s...,when a father is dysfunctional and is so sel...
1,2,0.0,@user @user thanks for #lyft credit i can't us...,thanks for #lyft credit i can t use cause th...
2,3,0.0,bihday your majesty,bihday your majesty
3,4,0.0,#model i love u take with u all the time in ...,#model i love u take with u all the time in ...
4,5,0.0,factsguide: society now #motivation,factsguide society now #motivation


## Removing Stop Words

In [33]:
data['filtered_tweets'] = data['filtered_tweets'].apply(lambda tweet: ' '.join([word for word in tweet.split() if len(tweet)>3]))
data.head()

Unnamed: 0,id,label,tweet,filtered_tweets
0,1,0.0,@user when a father is dysfunctional and is s...,when father dysfunctional selfish drags kids i...
1,2,0.0,@user @user thanks for #lyft credit i can't us...,thanks #lyft credit cause they offer wheelchai...
2,3,0.0,bihday your majesty,bihday your majesty
3,4,0.0,#model i love u take with u all the time in ...,#model love take with time
4,5,0.0,factsguide: society now #motivation,factsguide society #motivation


## Tokenisation

In [36]:
tweet_words =  data['filtered_tweets'].apply(lambda tweet:tweet.split())
tweet_words

0        [when, father, dysfunctional, selfish, drags, ...
1        [thanks, #lyft, credit, cause, they, offer, wh...
2                                  [bihday, your, majesty]
3                         [#model, love, take, with, time]
4                       [factsguide, society, #motivation]
                               ...                        
49154    [thought, factory, left, right, polarisation, ...
49155    [feeling, like, mermaid, #hairflip, #neverread...
49156    [#hillary, #campaigned, today, #ohio, used, wo...
49157    [happy, work, conference, right, mindset, lead...
49158    [song, glad, free, download, #shoegaze, #newmu...
Name: filtered_tweets, Length: 49159, dtype: object

## Stemming

In [40]:
from nltk import PorterStemmer

stem_helper = PorterStemmer()

tweet_words = tweet_words.apply(lambda words: [stem_helper.stem(i) for i in words])

tweet_words.head()

0    [when, father, dysfunct, selfish, drag, kid, i...
1    [thank, #lyft, credit, cau, they, offer, wheel...
2                              [bihday, your, majesti]
3                     [#model, love, take, with, time]
4                         [factsguid, societi, #motiv]
Name: filtered_tweets, dtype: object

## Reassembling stemmed words for each tweet

In [41]:
for i in range(len(tweet_words)):
    tweet_words[i]=' '.join(tweet_words[i])

    
data['filtered_tweets'] = tweet_words
data.head()

Unnamed: 0,id,label,tweet,filtered_tweets
0,1,0.0,@user when a father is dysfunctional and is s...,when father dysfunct selfish drag kid into dys...
1,2,0.0,@user @user thanks for #lyft credit i can't us...,thank #lyft credit cau they offer wheelchair v...
2,3,0.0,bihday your majesty,bihday your majesti
3,4,0.0,#model i love u take with u all the time in ...,#model love take with time
4,5,0.0,factsguide: society now #motivation,factsguid societi #motiv
