
# **Classification Twitter Airline Sentiment Menggunakan Naive Bayes Classifier**







## Insert Library

In [1]:
import pandas as pd
import numpy as np
import os
import re
import string
import nltk
from nltk.tokenize import ToktokTokenizer
from nltk.corpus import stopwords
from nltk.stem import LancasterStemmer
import copy
import spacy
from nltk import classify
from nltk import NaiveBayesClassifier
from sklearn.model_selection import train_test_split
import collections
from nltk.metrics import *


In [17]:
tokenizer = ToktokTokenizer()
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

## Function

### Data Cleaner
Lowercase, hapus angka, hapus punctuation, hapus
whitespace, hapus special character, dan hapus sesuatu yang tidak diperlukan.

In [3]:
def cleaner(text, state):
  if state == True:
    text = text.lower()
    text = re.sub(r'\s\s+',' ', text)
    text = re.sub(r'http\S+', '', text) #hapus link http
    text = re.sub(r'@\S+', '', text) #hapus mention
    text = re.sub(r'Membalas', '', text) #hapus membalas
    text = re.sub(r'RT', '', text) #hapus RT
    text = re.sub(r"\.", '', text) #hapus titik
    text = re.sub(r",", '', text) #hapus koma
    text = re.sub(r":", '', text) #hapus titik dua
    text = re.sub(r"\?", '', text) #hapus tanda tanya
    text = re.sub(r"!", '', text) #hapus tanda seru
    text = re.sub(r"#", '', text) #hapus hashtag
    text = re.sub(r"\(", '', text) #hapus (
    text = re.sub(r"\)", '', text) #hapus )
    text = re.sub(r"\[", '', text) #hapus [
    text = re.sub(r"\]", '', text) #hapus ]
    for j in range(0,10):
      text = re.sub(r"{}".format(j), '', text) #hapus ]
    text = re.sub('(\\b[A-Za-z] \\b|\\b [A-Za-z]\\b)', '', text)
    text = re.sub(r"[^A-Za-z0-9().,<_>!?\'`]", ' ', text) #hapus entah apa ini :v
    text = re.sub(r"\s{2,}", ' ', text) #hapus entah apa ini :v (2)
    text = re.sub(r"[^a-zA-z.,!?/:;\"\'\s]", ' ', text) #hapus entah apa ini :v (2)
  else:
    text = ''.join([c for c in text if c not in string.punctuation])

  return text

### Stopwords
Menghapus kata-kata khusus berdasarkan situasi yang ada untuk membuatnya benar secara tata bahasa.


In [4]:
def remove_stopwords(text):
    tokens = tokenizer.tokenize(text)
    tokens = [token.strip() for token in tokens]
    t = [token for token in tokens if token.lower() not in stopword_list]
    text = ' '.join(t)    
    return text

### Stemming
Proses mereduksi kata-kata menjadi bentuk kata dasar.

In [5]:
def get_stem(text):
    tokens = tokenizer.tokenize(text)
    stemmer = LancasterStemmer()
    text = ' '.join([stemmer.stem(word) for word in tokens])
    return text

### Lemmatizing
Proses lanjutan dari stemming. Stemming mungkin tidak menghasilkan kata yang sebenarnya, sedangkan lemmatization melakukan konversi dengan benar dengan penggunaan kosakata, biasanya bertujuan untuk menghilangkan
akhiran infleksional saja dan untuk mengembalikan bentuk dasar dari sebuah kata.

In [6]:
nlp = spacy.load('en',parse=True,tag=True, entity=True)
def get_lem(text):
    text = nlp(text)
    text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])
    return text

### Bag Of Words

In [7]:
def bag_of_words(text):
    text = tokenizer.tokenize(text)
    words_dictionary = dict([word, True] for word in text)    
    return words_dictionary

### Call all function

In [8]:
def allin1(text):
  text = cleaner(text, True)
  text = remove_stopwords(text)
  text = cleaner(text, False)
  text = get_lem(text)
  text = get_stem(text)
  text = bag_of_words(text)
  return text

In [9]:
def allin2(text):
  text = cleaner(text, True)
  text = remove_stopwords(text)
  text = cleaner(text, False)
  text = get_stem(text)
  text = bag_of_words(text)
  return text

In [10]:
def allin3(text):
  text = cleaner(text, True)
  text = remove_stopwords(text)
  text = cleaner(text, False)
  text = get_stem(text)
  text = get_lem(text)
  text = bag_of_words(text)
  return text

In [11]:
def allin4(text):
  text = cleaner(text, True)
  text = remove_stopwords(text)
  text = cleaner(text, False)
  text = get_lem(text)
  text = bag_of_words(text)
  return text

## Access File dan Preprocessing

In [12]:
# akses file 
df = pd.read_csv(r'/content/drive/MyDrive/Colab Notebooks/Tweets.csv')
df

Unnamed: 0,tweet_id,airline_sentiment,airline_sentiment_confidence,negativereason,negativereason_confidence,airline,airline_sentiment_gold,name,negativereason_gold,retweet_count,text,tweet_coord,tweet_created,tweet_location,user_timezone
0,570306133677760513,neutral,1.0000,,,Virgin America,,cairdin,,0,@VirginAmerica What @dhepburn said.,,2015-02-24 11:35:52 -0800,,Eastern Time (US & Canada)
1,570301130888122368,positive,0.3486,,0.0000,Virgin America,,jnardino,,0,@VirginAmerica plus you've added commercials t...,,2015-02-24 11:15:59 -0800,,Pacific Time (US & Canada)
2,570301083672813571,neutral,0.6837,,,Virgin America,,yvonnalynn,,0,@VirginAmerica I didn't today... Must mean I n...,,2015-02-24 11:15:48 -0800,Lets Play,Central Time (US & Canada)
3,570301031407624196,negative,1.0000,Bad Flight,0.7033,Virgin America,,jnardino,,0,@VirginAmerica it's really aggressive to blast...,,2015-02-24 11:15:36 -0800,,Pacific Time (US & Canada)
4,570300817074462722,negative,1.0000,Can't Tell,1.0000,Virgin America,,jnardino,,0,@VirginAmerica and it's a really big bad thing...,,2015-02-24 11:14:45 -0800,,Pacific Time (US & Canada)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14635,569587686496825344,positive,0.3487,,0.0000,American,,KristenReenders,,0,@AmericanAir thank you we got on a different f...,,2015-02-22 12:01:01 -0800,,
14636,569587371693355008,negative,1.0000,Customer Service Issue,1.0000,American,,itsropes,,0,@AmericanAir leaving over 20 minutes Late Flig...,,2015-02-22 11:59:46 -0800,Texas,
14637,569587242672398336,neutral,1.0000,,,American,,sanyabun,,0,@AmericanAir Please bring American Airlines to...,,2015-02-22 11:59:15 -0800,"Nigeria,lagos",
14638,569587188687634433,negative,1.0000,Customer Service Issue,0.6659,American,,SraJackson,,0,"@AmericanAir you have my money, you change my ...",,2015-02-22 11:59:02 -0800,New Jersey,Eastern Time (US & Canada)


In [13]:
#hapus column yang tidak perlu
df_drop = df.drop(['tweet_id', 'airline_sentiment_confidence', 'negativereason', 'negativereason_confidence', 'airline', 'airline_sentiment_gold', 'name', 'negativereason_gold', 'retweet_count', 'tweet_coord', 'tweet_created', 'tweet_location', 'user_timezone'], axis=1)
df_drop

Unnamed: 0,airline_sentiment,text
0,neutral,@VirginAmerica What @dhepburn said.
1,positive,@VirginAmerica plus you've added commercials t...
2,neutral,@VirginAmerica I didn't today... Must mean I n...
3,negative,@VirginAmerica it's really aggressive to blast...
4,negative,@VirginAmerica and it's a really big bad thing...
...,...,...
14635,positive,@AmericanAir thank you we got on a different f...
14636,negative,@AmericanAir leaving over 20 minutes Late Flig...
14637,neutral,@AmericanAir Please bring American Airlines to...
14638,negative,"@AmericanAir you have my money, you change my ..."


In [14]:
#hapus row yang nan
df_drop.dropna()
df = df_drop.reset_index(drop=True)
df

Unnamed: 0,airline_sentiment,text
0,neutral,@VirginAmerica What @dhepburn said.
1,positive,@VirginAmerica plus you've added commercials t...
2,neutral,@VirginAmerica I didn't today... Must mean I n...
3,negative,@VirginAmerica it's really aggressive to blast...
4,negative,@VirginAmerica and it's a really big bad thing...
...,...,...
14635,positive,@AmericanAir thank you we got on a different f...
14636,negative,@AmericanAir leaving over 20 minutes Late Flig...
14637,neutral,@AmericanAir Please bring American Airlines to...
14638,negative,"@AmericanAir you have my money, you change my ..."


In [15]:
for index, i in df.iterrows():
  i[1] = cleaner(i[1],True)

df

Unnamed: 0,airline_sentiment,text
0,neutral,what said
1,positive,plus you've added commercials to the experien...
2,neutral,didn'today must mean need to take another trip
3,negative,it'really aggressive to blast obnoxious enter...
4,negative,and it'really big bad thing about it
...,...,...
14635,positive,thank you we got on different flight to chicago
14636,negative,leaving over minutes late flight no warnings ...
14637,neutral,please bring american airlines to blackberry
14638,negative,you have my money you change my flight and do...


In [18]:
stopword_list = set(stopwords.words('english'))
for index, i in df.iterrows():
  i[1] = remove_stopwords(i[1])

df

Unnamed: 0,airline_sentiment,text
0,neutral,said
1,positive,plus ' added commercials experience tacky
2,neutral,' today must mean need take another trip
3,negative,' really aggressive blast obnoxious entertainm...
4,negative,' really big bad thing
...,...,...
14635,positive,thank got different flight chicago
14636,negative,leaving minutes late flight warnings communica...
14637,neutral,please bring american airlines blackberry
14638,negative,money change flight ' answer phones suggestion...


In [19]:
for index, i in df.iterrows():
  i[1] = cleaner(i[1], False)
df

Unnamed: 0,airline_sentiment,text
0,neutral,said
1,positive,plus added commercials experience tacky
2,neutral,today must mean need take another trip
3,negative,really aggressive blast obnoxious entertainme...
4,negative,really big bad thing
...,...,...
14635,positive,thank got different flight chicago
14636,negative,leaving minutes late flight warnings communica...
14637,neutral,please bring american airlines blackberry
14638,negative,money change flight answer phones suggestions...


In [20]:
df1 = copy.deepcopy(df)
df2 = copy.deepcopy(df)
df3 = copy.deepcopy(df)
df4 = copy.deepcopy(df)

### DF 1 use lemmatizing

In [21]:
for index, i in df1.iterrows():
  i[1] = get_lem(i[1])

df1

Unnamed: 0,airline_sentiment,text
0,neutral,say
1,positive,plus add commercial experience tacky
2,neutral,today must mean nee take another trip
3,negative,really aggressive blast obnoxious entertainm...
4,negative,really big bad thing
...,...,...
14635,positive,thank get different flight chicago
14636,negative,leave minute late flight warning communication...
14637,neutral,please bring american airlines blackberry
14638,negative,money change flight answer phone suggestion ...


In [22]:
# split dataset jadi train sama test
x_train1, x_test1, y_train1, y_test1 = train_test_split(df1['text'], df1['airline_sentiment'], test_size = 0.3, random_state = 0)

In [23]:
xtr1 = []
for index, i in x_train1.items():
  i = bag_of_words(i)
  xtr1.append(i)

xts1 = []
for index, i in x_test1.items():
  i = bag_of_words(i)
  xts1.append(i)

In [24]:
tr1 = []
ts1 = []

for i,j in zip(xtr1, y_train1):
  tr1.append((i,j))
for i,j in zip(xts1, y_test1):
  ts1.append((i,j))

In [25]:
classifier1 = NaiveBayesClassifier.train(tr1)
accuracy1 = classify.accuracy(classifier1, ts1)

In [27]:
print(accuracy1)
print (classifier1.show_most_informative_features(10))

0.7627504553734062
Most Informative Features
                favorite = True           positi : negati =     31.7 : 1.0
               fantastic = True           positi : negati =     31.7 : 1.0
                    rock = True           positi : negati =     31.0 : 1.0
             outstanding = True           positi : negati =     29.2 : 1.0
                 helpful = True           positi : neutra =     24.7 : 1.0
                  dragon = True           neutra : negati =     24.4 : 1.0
                   kudos = True           positi : negati =     24.1 : 1.0
                   shout = True           positi : negati =     24.1 : 1.0
                    hold = True           negati : positi =     21.7 : 1.0
                   flyfi = True           positi : negati =     21.6 : 1.0
None


In [28]:
trs1 = collections.defaultdict(set)
tss1 = collections.defaultdict(set)
 
for i, (text, label) in enumerate(ts1):
    trs1[label].add(i)
    observed = classifier1.classify(text)
    tss1[observed].add(i)

print ('positive precision:', precision(trs1['positive'], tss1['positive']))
print ('positive recall:', recall(trs1['positive'], tss1['positive']))
print ('positive F-measure:', f_measure(trs1['positive'], tss1['positive']))
print ('===============================================')
print ('neutral precision:', precision(trs1['neutral'], tss1['neutral']))
print ('neutral recall:', recall(trs1['neutral'], tss1['neutral']))
print ('neutral F-measure:', f_measure(trs1['neutral'], tss1['neutral']))
print ('===============================================')
print ('negative precision:', precision(trs1['negative'], tss1['negative']))
print ('negative recall:', recall(trs1['negative'], tss1['negative']))
print ('negative F-measure:', f_measure(trs1['negative'], tss1['negative']))

positive precision: 0.7532956685499058
positive recall: 0.583941605839416
positive F-measure: 0.6578947368421053
neutral precision: 0.6863207547169812
neutral recall: 0.31664853101196955
neutral F-measure: 0.4333581533879375
negative precision: 0.7736398021530404
negative recall: 0.9537302725968436
negative F-measure: 0.85429718875502


### DF 2 use lemmatizing then stemming

In [29]:
for index, i in df2.iterrows():
  i[1] = get_lem(i[1])

df2

Unnamed: 0,airline_sentiment,text
0,neutral,say
1,positive,plus add commercial experience tacky
2,neutral,today must mean nee take another trip
3,negative,really aggressive blast obnoxious entertainm...
4,negative,really big bad thing
...,...,...
14635,positive,thank get different flight chicago
14636,negative,leave minute late flight warning communication...
14637,neutral,please bring american airlines blackberry
14638,negative,money change flight answer phone suggestion ...


In [30]:
for index, i in df2.iterrows():
  i[1] = get_stem(i[1])

df2

Unnamed: 0,airline_sentiment,text
0,neutral,say
1,positive,plu ad commerc expery tacky
2,neutral,today must mean nee tak anoth trip
3,negative,real aggress blast obnoxy entertain guest fac ...
4,negative,real big bad thing
...,...,...
14635,positive,thank get diff flight chicago
14636,negative,leav minut lat flight warn commun minut lat fl...
14637,neutral,pleas bring am airlin blackberry
14638,negative,money chang flight answ phon suggest mak commit


In [31]:
# split dataset jadi train sama test
x_train2, x_test2, y_train2, y_test2 = train_test_split(df2['text'], df2['airline_sentiment'], test_size = 0.3, random_state = 0)

In [32]:
xtr2 = []
for index, i in x_train2.items():
  i = bag_of_words(i)
  xtr2.append(i)

xts2 = []
for index, i in x_test2.items():
  i = bag_of_words(i)
  xts2.append(i)

In [33]:
tr2 = []
ts2 = []

for i,j in zip(xtr2, y_train2):
  tr2.append((i,j))
for i,j in zip(xts2, y_test2):
  ts2.append((i,j))

In [34]:
classifier2 = NaiveBayesClassifier.train(tr2)
accuracy2 = classify.accuracy(classifier2, ts2)

In [35]:
print(accuracy2)
print (classifier2.show_most_informative_features(10))

0.755464480874317
Most Informative Features
                    kudo = True           positi : negati =     46.9 : 1.0
                 favorit = True           positi : negati =     31.7 : 1.0
                 fantast = True           positi : negati =     31.7 : 1.0
                outstand = True           positi : negati =     29.2 : 1.0
                  beauty = True           positi : negati =     26.6 : 1.0
                  dragon = True           neutra : negati =     24.4 : 1.0
                   shout = True           positi : negati =     24.1 : 1.0
                    flyf = True           positi : negati =     21.6 : 1.0
                  awesom = True           positi : negati =     21.3 : 1.0
                     hrs = True           negati : positi =     20.5 : 1.0
None


In [36]:
trs2 = collections.defaultdict(set)
tss2 = collections.defaultdict(set)
 
for i, (text, label) in enumerate(ts2):
    trs2[label].add(i)
    observed = classifier2.classify(text)
    tss2[observed].add(i)

print ('positive precision:', precision(trs2['positive'], tss2['positive']))
print ('positive recall:', recall(trs2['positive'], tss2['positive']))
print ('positive F-measure:', f_measure(trs2['positive'], tss2['positive']))
print ('===============================================')
print ('neutral precision:', precision(trs2['neutral'], tss2['neutral']))
print ('neutral recall:', recall(trs2['neutral'], tss2['neutral']))
print ('neutral F-measure:', f_measure(trs2['neutral'], tss2['neutral']))
print ('===============================================')
print ('negative precision:', precision(trs2['negative'], tss2['negative']))
print ('negative recall:', recall(trs2['negative'], tss2['negative']))
print ('negative F-measure:', f_measure(trs2['negative'], tss2['negative']))

positive precision: 0.7624750499001997
positive recall: 0.5576642335766423
positive F-measure: 0.6441821247892074
neutral precision: 0.6920980926430518
neutral recall: 0.2763873775843308
neutral F-measure: 0.3950233281493002
negative precision: 0.761066969353008
negative recall: 0.9619799139167863
negative F-measure: 0.8498098859315589


### DF 3 use stemming

In [37]:
for index, i in df3.iterrows():
  i[1] = get_stem(i[1])

df3

Unnamed: 0,airline_sentiment,text
0,neutral,said
1,positive,plu ad commerc expery tacky
2,neutral,today must mean nee tak anoth trip
3,negative,real aggress blast obnoxy entertain guest fac ...
4,negative,real big bad thing
...,...,...
14635,positive,thank got diff flight chicago
14636,negative,leav minut lat flight warn commun minut lat fl...
14637,neutral,pleas bring am airlin blackberry
14638,negative,money chang flight answ phon suggest mak commit


In [38]:
# split dataset jadi train sama test
x_train3, x_test3, y_train3, y_test3 = train_test_split(df3['text'], df3['airline_sentiment'], test_size = 0.3, random_state = 0)

In [39]:
xtr3 = []
for index, i in x_train3.items():
  i = bag_of_words(i)
  xtr3.append(i)

xts3 = []
for index, i in x_test3.items():
  i = bag_of_words(i)
  xts3.append(i)

In [40]:
tr3 = []
ts3 = []

for i,j in zip(xtr3, y_train3):
  tr3.append((i,j))
for i,j in zip(xts3, y_test3):
  ts3.append((i,j))

In [41]:
classifier3 = NaiveBayesClassifier.train(tr3)
accuracy3 = classify.accuracy(classifier3, ts3)

In [42]:
print(accuracy3)
print (classifier3.show_most_informative_features(10))

0.7552367941712204
Most Informative Features
                    kudo = True           positi : negati =     46.9 : 1.0
                 favorit = True           positi : negati =     31.7 : 1.0
                 fantast = True           positi : negati =     31.7 : 1.0
                outstand = True           positi : negati =     29.2 : 1.0
                  beauty = True           positi : negati =     26.6 : 1.0
                  dragon = True           neutra : negati =     24.4 : 1.0
                   shout = True           positi : negati =     24.1 : 1.0
                    flyf = True           positi : negati =     21.6 : 1.0
                  awesom = True           positi : negati =     21.3 : 1.0
                     hrs = True           negati : positi =     21.0 : 1.0
None


In [43]:
trs3 = collections.defaultdict(set)
tss3 = collections.defaultdict(set)
 
for i, (text, label) in enumerate(ts3):
    trs3[label].add(i)
    observed = classifier3.classify(text)
    tss3[observed].add(i)

print ('positive precision:', precision(trs3['positive'], tss3['positive']))
print ('positive recall:', recall(trs3['positive'], tss3['positive']))
print ('positive F-measure:', f_measure(trs3['positive'], tss3['positive']))
print ('===============================================')
print ('neutral precision:', precision(trs3['neutral'], tss3['neutral']))
print ('neutral recall:', recall(trs3['neutral'], tss3['neutral']))
print ('neutral F-measure:', f_measure(trs3['neutral'], tss3['neutral']))
print ('===============================================')
print ('negative precision:', precision(trs3['negative'], tss3['negative']))
print ('negative recall:', recall(trs3['negative'], tss3['negative']))
print ('negative F-measure:', f_measure(trs3['negative'], tss3['negative']))

positive precision: 0.7624750499001997
positive recall: 0.5576642335766423
positive F-measure: 0.6441821247892074
neutral precision: 0.6844919786096256
neutral recall: 0.27856365614798695
neutral F-measure: 0.39597834493426143
negative precision: 0.761728746090418
negative recall: 0.9609038737446198
negative F-measure: 0.8498017446471053


### DF 4 use stemming then lemmatizing

In [44]:
for index, i in df4.iterrows():
  i[1] = get_stem(i[1])

df4

Unnamed: 0,airline_sentiment,text
0,neutral,said
1,positive,plu ad commerc expery tacky
2,neutral,today must mean nee tak anoth trip
3,negative,real aggress blast obnoxy entertain guest fac ...
4,negative,real big bad thing
...,...,...
14635,positive,thank got diff flight chicago
14636,negative,leav minut lat flight warn commun minut lat fl...
14637,neutral,pleas bring am airlin blackberry
14638,negative,money chang flight answ phon suggest mak commit


In [45]:
for index, i in df4.iterrows():
  i[1] = get_lem(i[1])

df4

Unnamed: 0,airline_sentiment,text
0,neutral,say
1,positive,plu ad commerc expery tacky
2,neutral,today must mean nee tak anoth trip
3,negative,real aggress blast obnoxy entertain guest fac ...
4,negative,real big bad thing
...,...,...
14635,positive,thank get diff flight chicago
14636,negative,leav minut lat flight warn commun minut lat fl...
14637,neutral,plea bring be airlin blackberry
14638,negative,money chang flight answ phon suggest mak commit


In [46]:
# split dataset jadi train sama test
x_train4, x_test4, y_train4, y_test4 = train_test_split(df4['text'], df4['airline_sentiment'], test_size = 0.3, random_state = 0)

In [47]:
xtr4 = []
for index, i in x_train4.items():
  i = bag_of_words(i)
  xtr4.append(i)

xts4 = []
for index, i in x_test4.items():
  i = bag_of_words(i)
  xts4.append(i)

In [48]:
tr4 = []
ts4 = []

for i,j in zip(xtr4, y_train4):
  tr4.append((i,j))
for i,j in zip(xts4, y_test4):
  ts4.append((i,j))

In [49]:
classifier4 = NaiveBayesClassifier.train(tr4)
accuracy4 = classify.accuracy(classifier4, ts4)

In [50]:
print(accuracy4)
print (classifier4.show_most_informative_features(10))

0.7547814207650273
Most Informative Features
                    kudo = True           positi : negati =     46.9 : 1.0
                 favorit = True           positi : negati =     31.7 : 1.0
                 fantast = True           positi : negati =     31.7 : 1.0
                outstand = True           positi : negati =     29.2 : 1.0
                  beauty = True           positi : negati =     26.6 : 1.0
                  dragon = True           neutra : negati =     24.4 : 1.0
                   shout = True           positi : negati =     24.1 : 1.0
                    flyf = True           positi : negati =     21.6 : 1.0
                  awesom = True           positi : negati =     21.3 : 1.0
                     hrs = True           negati : positi =     20.8 : 1.0
None


In [51]:
trs4 = collections.defaultdict(set)
tss4 = collections.defaultdict(set)
 
for i, (text, label) in enumerate(ts4):
    trs4[label].add(i)
    observed = classifier4.classify(text)
    tss4[observed].add(i)

print ('positive precision:', precision(trs4['positive'], tss4['positive']))
print ('positive recall:', recall(trs4['positive'], tss4['positive']))
print ('positive F-measure:', f_measure(trs4['positive'], tss4['positive']))
print ('===============================================')
print ('neutral precision:', precision(trs4['neutral'], tss4['neutral']))
print ('neutral recall:', recall(trs4['neutral'], tss4['neutral']))
print ('neutral F-measure:', f_measure(trs4['neutral'], tss4['neutral']))
print ('===============================================')
print ('negative precision:', precision(trs4['negative'], tss4['negative']))
print ('negative recall:', recall(trs4['negative'], tss4['negative']))
print ('negative F-measure:', f_measure(trs4['negative'], tss4['negative']))

positive precision: 0.7599206349206349
positive recall: 0.5591240875912409
positive F-measure: 0.6442388561816652
neutral precision: 0.6818181818181818
neutral recall: 0.27747551686615884
neutral F-measure: 0.3944315545243619
negative precision: 0.7618099032441662
negative recall: 0.9601865136298422
negative F-measure: 0.8495715645826721


### check

In [52]:
df['airline_sentiment'].value_counts()

negative    9178
neutral     3099
positive    2363
Name: airline_sentiment, dtype: int64

In [56]:
print(x_train1)

12647    space seat thank pilot   back mn early ohare  ...
3652     change fee exact flight atl hou charge math   add
11570    bad experience ever unable help phone tell las...
1278       book flight reserve seat go check find seat wtf
6052                                             okay sign
                               ...                        
13123                                           followback
3264            thank help wish phone rep could accomidate
9845                          bad ever dca customerservice
10799                             look another apology fly
2732     far bad airline plane delay round trip flight ...
Name: text, Length: 10248, dtype: object


In [54]:
print('rows in test set: ' + str(x_test1.shape))
print('rows in train set: ' + str(x_train1.shape))

rows in test set: (4392,)
rows in train set: (10248,)


## Try it on Custom Tweet

In [57]:
custom_tweet = "I’ve been flying for the past 22 years with reputed companies. But my experience with @emirateshas been the best so far. The food, the service, the staff who are super friendly and always looking forward to serve their passengers with a genuine smile. I’m extremely delighted. Folded hands"

In [59]:
custom_tweet_set = allin1(custom_tweet)
prob_result = classifier1.prob_classify(custom_tweet_set)

print("Classified as: ", prob_result)
print("Classification category: ", prob_result.max())
print("Negative probability : ", prob_result.prob("negative"))
print("Positive probability : ", prob_result.prob("positive"))
print("Neutral probability : ", prob_result.prob("neutral"))

Classified as:  <ProbDist with 3 samples>
Classification category:  positive
Negative probability :  0.0036016969081674975
Positive probability :  0.9962544871022817
Neutral probability :  0.000143815989550069


In [60]:
custom_tweet_set = allin2(custom_tweet)
prob_result = classifier2.prob_classify(custom_tweet_set)

print("Classified as: ", prob_result)
print("Classification category: ", prob_result.max())
print("Negative probability : ", prob_result.prob("negative"))
print("Positive probability : ", prob_result.prob("positive"))
print("Neutral probability : ", prob_result.prob("neutral"))

Classified as:  <ProbDist with 3 samples>
Classification category:  positive
Negative probability :  0.007731762077774628
Positive probability :  0.9922681057624413
Neutral probability :  1.3215977057935248e-07


In [61]:
custom_tweet_set = allin3(custom_tweet)
prob_result = classifier3.prob_classify(custom_tweet_set)

print("Classified as: ", prob_result)
print("Classification category: ", prob_result.max())
print("Negative probability : ", prob_result.prob("negative"))
print("Positive probability : ", prob_result.prob("positive"))
print("Neutral probability : ", prob_result.prob("neutral"))

Classified as:  <ProbDist with 3 samples>
Classification category:  positive
Negative probability :  0.003457511190397746
Positive probability :  0.9965422326390511
Neutral probability :  2.561705645829048e-07


In [62]:
custom_tweet_set = allin4(custom_tweet)
prob_result = classifier4.prob_classify(custom_tweet_set)

print("Classified as: ", prob_result)
print("Classification category: ", prob_result.max())
print("Negative probability : ", prob_result.prob("negative"))
print("Positive probability : ", prob_result.prob("positive"))
print("Neutral probability : ", prob_result.prob("neutral"))

Classified as:  <ProbDist with 3 samples>
Classification category:  positive
Negative probability :  0.080786285233816
Positive probability :  0.9144191893623851
Neutral probability :  0.004794525403803674
