In [2]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import re
from sklearn.preprocessing import LabelEncoder
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [3]:
#Reading the Data
data = pd.read_csv('Sentiment.csv')

# Keeping only the neccessary columns
data = data[['text','sentiment']]

In [4]:
data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))

In [5]:
for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ')

In [6]:
max_fatures = 2000

#Splitting the data and using Tokenizer
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)

X = pad_sequences(X)
print(X)

[[   0    0    0 ...   51 1039    1]
 [   0    0    0 ... 1577 1356  847]
 [   0    0    0 ...   10  696  518]
 ...
 [   0    0    0 ...   68   62    3]
 [   0    0    0 ... 1112 1588   81]
 [   0    0    0 ...  196    3  880]]


In [8]:
embed_dim = 128
lstm_out = 196

#Creating The Model
def createmodel():
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
    return model

In [9]:
labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])

#Obtaining X Train, Y Train, Xtest and Y Test
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)

In [10]:
batch_size = 40

#Applying the Keras Classifier
model = KerasClassifier(build_fn=createmodel,verbose=0)

In [11]:
epochs = [1, 2, 3]
param_grid= dict(epochs=epochs)
grid  = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result= grid.fit(X_train, Y_train,batch_size=40)

#Obtaining the scores
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.680297 using {'epochs': 2}
