In [11]:
import pandas as pd
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
import re


In [12]:
# import data
data = pd.read_csv("Sentiment.csv" ,usecols = ["text", "sentiment"])
data.text = data.text.apply(lambda x: x.lower()) 
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))
data.head()


Unnamed: 0,sentiment,text
0,Neutral,rt nancyleegrahn how did everyone feel about t...
1,Positive,rt scottwalker didnt catch the full gopdebate ...
2,Neutral,rt tjmshow no mention of tamir rice and the go...
3,Positive,rt robgeorge that carly fiorina is trending h...
4,Positive,rt danscavino gopdebate w realdonaldtrump deli...


In [13]:
print( 'Positive: ', data[data['sentiment'] == 'Positive'].size)
print( 'Negative: ', data[data['sentiment'] == 'Negative'].size)
print( 'Neutral : ', data[data['sentiment'] == 'Neutral'].size)

Positive:  4472
Negative:  16986
Neutral :  6284


In [14]:
for idx, row in data.iterrows():
    row[1] = row[1].replace('rt', ' ')

In [15]:
data.head()

Unnamed: 0,sentiment,text
0,Neutral,nancyleegrahn how did everyone feel about th...
1,Positive,scottwalker didnt catch the full gopdebate l...
2,Neutral,tjmshow no mention of tamir rice and the gop...
3,Positive,robgeorge that carly fiorina is trending ho...
4,Positive,danscavino gopdebate w realdonaldtrump deliv...


In [16]:
max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)
X = pad_sequences(X)


In [17]:
embed_dim = 128
lstm_out = 196
def createmodel():
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
    return model

In [18]:
labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)

In [19]:
model = KerasClassifier(build_fn=createmodel, verbose=0)
batch_size = [32, 64]
epochs = [1, 2]
param_grid = dict(batch_size=batch_size, epochs=epochs)

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(X_train, Y_train)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))


Best: 0.680188 using {'batch_size': 64, 'epochs': 2}
