# Twitter Sentimental Analysis

### Imports

In [109]:
import pandas as pd
import numpy as np
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords , wordnet
import string
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from nltk.stem.wordnet import WordNetLemmatizer
from nltk import pos_tag
from nltk import FreqDist
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

### Loading and Cleaning Data

In [2]:
train_df = pd.read_csv('x_train.csv')
test_df = pd.read_csv('x_test.csv')

In [3]:
test_df.head()

Unnamed: 0,tweet_id,airline,airline_sentiment_gold,name,negativereason_gold,retweet_count,text,tweet_coord,tweet_created,tweet_location,user_timezone
0,569682010270101504,American,,zsalim03,,0,@AmericanAir In car gng to DFW. Pulled over 1h...,,2015-02-22 18:15:50 -0800,Texas,Central Time (US & Canada)
1,569608307184242688,American,,sa_craig,,0,"@AmericanAir after all, the plane didn’t land ...",,2015-02-22 13:22:57 -0800,"College Station, TX",Central Time (US & Canada)
2,567879304593408001,Southwest,,DanaChristos,,1,@SouthwestAir can't believe how many paying cu...,,2015-02-17 18:52:31 -0800,CT,Eastern Time (US & Canada)
3,569757651539660801,US Airways,,rossj987,,0,@USAirways I can legitimately say that I would...,,2015-02-22 23:16:24 -0800,"Washington, D.C.",Eastern Time (US & Canada)
4,569900705852608513,American,,tranpham18,,0,@AmericanAir still no response from AA. great ...,,2015-02-23 08:44:51 -0800,New York City,Eastern Time (US & Canada)


In [4]:
data = train_df.values
test = test_df.values

In [5]:
data.shape

(10980, 12)

In [6]:
Y_train = train_df['airline_sentiment'].values

In [7]:
tweets = train_df['text'].values
test_tweets = test_df['text'].values

In [113]:
# stopwords

punctuations = list(string.punctuation)
stop = stopwords.words('english')
stop += punctuations

In [9]:
lemmatizer = WordNetLemmatizer()

In [10]:
def get_pos_tag(word):
    tag = pos_tag([word])[0][1]
    if tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADJ
    else:
        return wordnet.NOUN
    

In [11]:
text = []
for i in range(len(tweets)):
    line = tweets[i]
    words = word_tokenize(line)
    sentence = ''
    for word in words:
        if word.lower() not in stop and word.isalpha() :
            word = lemmatizer.lemmatize(word, pos = get_pos_tag(word))
            sentence += word + ' '
    text.append(sentence)

In [12]:
text_test = []
for i in range(len(test_tweets)):
    line = test_tweets[i]
    words = word_tokenize(line)
    sentence = ''
    for word in words:
        if word.lower() not in stop and word.isalpha() :
            word = lemmatizer.lemmatize(word, pos = get_pos_tag(word))
            sentence += word + ' '
    text_test.append(sentence)

In [13]:
text

['SouthwestAir schedule morning day fact sure even flight one Cancelled Flightled ',
 'SouthwestAir see worker time time go beyond love fly guy Thank ',
 'united Flew ORD Miami back great crew service leg THANKS ',
 'SouthwestAir horse radish ',
 'united flight ORD delayed Air Force One last flight SBN min land ',
 'united load u fly sardine knew pilot hour Late Flight incompetent beyond belief ',
 'JetBlue stock response Delays frustrate poor cust serv amp told ppl wait amp come back ',
 'JetBlue nice Hoping rack enough mile take trip Seattle enjoy perfect latte city coffee ',
 'united frankly bad customer service ever Problems happen deal defines company Never United ',
 'SouthwestAir yeah haha Never one expensive much fun destinationdragons ',
 'SouthwestAir gt DCA flight almost full people screw Cancelled Flightation united USAirways Cancelled Flight ',
 'JetBlue easy way get ticket receipt get one check get one online Thanks ',
 'USAirways love change lounge cheese veggie olive ad

In [14]:
text_test

['AmericanAir car gng DFW Pulled ago icy road AA since Ca reach arpt Wat ',
 'AmericanAir plane land identical bad condition GRK accord METARs ',
 'SouthwestAir ca believe many pay customer left high dry reason flight Cancelled Flightlations Monday BDL Wow ',
 'USAirways legitimately say would rather driven cross country flown US Airways ',
 'AmericanAir still response AA great job guy ',
 'united developer fly tmrw morn min layover early flight layover move ',
 'USAirways hello Anyone ',
 'USAirways husainhaqqani Husain u shld protest well one ur party member Rehman Malik delayed PIA flight ',
 'USAirways likely flightaware say plane still Durango depart ',
 'AmericanAir even give option say line busy Plz try Late Flightr ',
 'united announcement pre boarding address mobility disability require travel lot stuff preboard ',
 'USAirways really embarrass ask complimentary detailed http amp argue ',
 'SouthwestAir passport time trip Could still fly photo ID thingsishouldknow ifeeldumb ',


In [168]:
# k is no. of features
k = 1300

In [169]:
tf_vector = TfidfVectorizer(max_features=k , max_df=0.8, ngram_range=(1,2))

In [170]:
X_train = tf_vector.fit_transform(text)
X_train = X_train.toarray()

In [171]:
X_test = tf_vector.transform(text_test)

X_test = X_test.toarray()

In [172]:
X_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [173]:
# classify by Multinomial classifier

In [None]:
clf1 = MultinomialNB()

clf1.fit(X_train, Y_train)
ypred1 = clf1.predict(X_test)

np.savetxt('YPred1.csv', ypred1, fmt='%s')

In [174]:
clf1 = MultinomialNB()

In [175]:
clf1.fit(X_train, Y_train)
ypred1 = clf1.predict(X_test)

In [176]:
np.savetxt('YPred1.csv', ypred1, fmt='%s')

In [177]:
# classify by svm

In [181]:
clf3 = SVC(C  = 6000, gamma=0.0001)

In [182]:
clf3.fit(X_train, Y_train)
ypred3 = clf3.predict(X_test)

In [183]:
np.savetxt('YPred3_SVM.csv', ypred3, fmt = '%s')