In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.feature_extraction.text import CountVectorizer
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from sklearn.utils import resample
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix,classification_report
import re


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [35]:
data = pd.read_csv('Sentiment.csv')

data = data[['text','sentiment']]

In [36]:
data[0:5]

Unnamed: 0,text,sentiment
0,RT @NancyLeeGrahn: How did everyone feel about...,Neutral
1,RT @ScottWalker: Didn't catch the full #GOPdeb...,Positive
2,RT @TJMShow: No mention of Tamir Rice and the ...,Neutral
3,RT @RobGeorge: That Carly Fiorina is trending ...,Positive
4,RT @DanScavino: #GOPDebate w/ @realDonaldTrump...,Positive


In [37]:
data['text'].apply(lambda x: str(x).lower())

0       rt @nancyleegrahn: how did everyone feel about...
1       rt @scottwalker: didn't catch the full #gopdeb...
2       rt @tjmshow: no mention of tamir rice and the ...
3       rt @robgeorge: that carly fiorina is trending ...
4       rt @danscavino: #gopdebate w/ @realdonaldtrump...
5       rt @gregabbott_tx: @tedcruz: "on my first day ...
6       rt @warriorwoman91: i liked her and was happy ...
7       going on #msnbc live with @thomasaroberts arou...
8       deer in the headlights rt @lizzwinstead: ben c...
9       rt @nancyosborne180: last night's debate prove...
10      @jgreendc @realdonaldtrump in all fairness #bi...
11      rt @waynedupreeshow: just woke up to tweet thi...
12      me reading my family's comments about how grea...
13      rt @arcticfox2016: rt @allenwestrepub "dear @j...
14      rt @pattonoswalt: i loved scott walker as mark...
15      hey @chrischristie exploiting the tragedy of 9...
16      rt @carolcnn: #donaldtrump under fire for comm...
17      rt @jo

In [38]:

#data = data[data.sentiment != "Neutral"]
data['text'] = data['text'].apply(lambda x: str(x).lower())
# removing special chars
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]','',x)))
#
data.head()

Unnamed: 0,text,sentiment
0,rt nancyleegrahn how did everyone feel about t...,Neutral
1,rt scottwalker didnt catch the full gopdebate ...,Positive
2,rt tjmshow no mention of tamir rice and the go...,Neutral
3,rt robgeorge that carly fiorina is trending h...,Positive
4,rt danscavino gopdebate w realdonaldtrump deli...,Positive


In [39]:
print(data[ data['sentiment'] == 'Positive'].size)
print(data[ data['sentiment'] == 'Negative'].size)
print(data[ data['sentiment'] == 'Neutral'].size)
for idx,row in data.iterrows():
    row[0] = row[0].replace('rt','')
data.head()

1586
5846
2576


Unnamed: 0,text,sentiment
0,nancyleegrahn how did everyone feel about the...,Neutral
1,scottwalker didnt catch the full gopdebate la...,Positive
2,tjmshow no mention of tamir rice and the gopd...,Neutral
3,robgeorge that carly fiorina is trending hou...,Positive
4,danscavino gopdebate w realdonaldtrump delive...,Positive


In [40]:
max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)
X = pad_sequences(X)
X[:2]

array([[   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0, 1357,   46,   60,  223,  384,   19,    2,  231,  198,
         102,   15,   25,  745,    1],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0,  271,  108,
        1855,    2,  529,    1,   15,   25,  224,   27,  141,    5,  127,
         899,    9, 1193,  849,  746]], dtype=int32)

In [44]:
embed_dim = 128
lstm_out = 196

model = Sequential()
model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(3,activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 27, 128)           256000    
_________________________________________________________________
spatial_dropout1d_3 (Spatial (None, 27, 128)           0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 196)               254800    
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 591       
Total params: 511,391
Trainable params: 511,391
Non-trainable params: 0
_________________________________________________________________
None


In [42]:
Y = pd.get_dummies(data['sentiment']).values
Y

array([[0, 1, 0],
       [0, 0, 1],
       [0, 1, 0],
       ...,
       [0, 0, 1],
       [0, 1, 0],
       [1, 0, 0]], dtype=uint8)

In [43]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.20, random_state = 42)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)

(4003, 27) (4003, 3)
(1001, 27) (1001, 3)


In [45]:
batch_size = 128
model.fit(X_train, Y_train, epochs = 15, batch_size=batch_size, verbose = 1)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f7eb01e6780>

In [32]:
class_weights = {0: 1 ,
                1: 1.6/2 }
model.fit(X_train, Y_train, epochs = 15, batch_size=batch_size, verbose = 1,
          class_weight=class_weights)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f7ed3be58d0>