In this, we will see how to dp sentiment analysis of text data using Deep Neural Networks.

In [16]:
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.cross_validation import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
#from textblob import TextBlob, Word
from nltk.stem.snowball import SnowballStemmer
%matplotlib inline
from importlib import reload
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
tf.set_random_seed(42)

### Read the dataset (tweets.csv)

In [17]:
def converttoutf8(a):
    return unicode(a, "utf-8")

In [18]:
data = pd.read_csv('tweets.csv', engine = 'python')

In [19]:
data.shape

(9093, 3)

In [20]:
data.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion


### Consider only rows having Positive emotion and Negative emotion and remove other rows from the dataframe.

In [21]:
data1=data.copy(deep=True)

In [22]:
data2 = data1[~data1['is_there_an_emotion_directed_at_a_brand_or_product'].isin(["No emotion toward brand or product", "I can't tell"])]

In [23]:
data2.is_there_an_emotion_directed_at_a_brand_or_product.unique()

array(['Negative emotion', 'Positive emotion'], dtype=object)

### Change the labels for Positive and Negative emotions as 1 and 0 respectively.

Hint: use map on that column and give labels `or` You can use labelEncoder also.

In [24]:
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
data2['is_there_an_emotion_directed_at_a_brand_or_product'] = enc.fit_transform(data2['is_there_an_emotion_directed_at_a_brand_or_product'])

In [25]:
data2.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,0
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,1
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,1
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,0
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,1


In [28]:
text=data2["tweet_text"]

In [29]:
text.head()

0    .@wesley83 I have a 3G iPhone. After 3 hrs twe...
1    @jessedee Know about @fludapp ? Awesome iPad/i...
2    @swonderlin Can not wait for #iPad 2 also. The...
3    @sxsw I hope this year's festival isn't as cra...
4    @sxtxstate great stuff on Fri #SXSW: Marissa M...
Name: tweet_text, dtype: object

In [30]:
senti=data2['is_there_an_emotion_directed_at_a_brand_or_product']
senti.head()

0    0
1    1
2    1
3    0
4    1
Name: is_there_an_emotion_directed_at_a_brand_or_product, dtype: int64


### Convert Text Into numbers

In [27]:
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=3000)

#### Build Keras Tokenizer and fit on the text using `fit_on_texts` with no.of words=3000

In [31]:
tokenizer.fit_on_texts(text)

In [32]:
len(tokenizer.word_index)

6230


#### Convert Text Into numbers using `texts_to_matrix` with `TF-IDF` mode

In [35]:
features = tokenizer.texts_to_matrix(text, mode='tfidf')

In [36]:
features.shape

(3548, 3000)

### Build the Graph

#### Normalize the data using BatchNormalization layer, add fully connected layers with `200, 100, 60, 30, 1` neurons  with `relu` activations for hidden layers and `sigmoid` activation for the output layer. Use `binary_crossentropy` loss and `adam` optimizer for training the model. And, report the final validation accuracy.

In [37]:
#Initialize model, reshape & normalize data
model = tf.keras.models.Sequential()

#normalize data
model.add(tf.keras.layers.BatchNormalization(input_shape=(3000,)))

#Add Dense Layers
model.add(tf.keras.layers.Dense(200, activation='relu'))
model.add(tf.keras.layers.Dense(100, activation='relu'))
model.add(tf.keras.layers.Dense(60, activation='relu'))
model.add(tf.keras.layers.Dense(30, activation='relu'))
#Output layer
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

In [38]:
#Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [39]:
tensorboard = tf.keras.callbacks.TensorBoard(log_dir='/tmp/sentiment/dnn_v1')

In [42]:
#Train the model
model.fit(features,senti,          
          validation_split=0.2,
          callbacks=[tensorboard],
          epochs=30,
          batch_size=32)

Train on 2838 samples, validate on 710 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x26e0c828208>

In [44]:
score = model.evaluate(features, senti,batch_size=32, verbose=1)

print('Test accuracy:', score[1])

Test accuracy: 0.9695603157379985
