### Importing important libraries


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [60]:
import tensorflow as tf
import keras
from keras.layers import Dense, CuDNNLSTM, Dropout, Embedding, SpatialDropout1D, Recurrent
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras import Sequential
from keras.utils import to_categorical
import re

### Importing the dataset


In [113]:
df = pd.read_csv('Restaurant_Reviews.tsv', delimiter='\t')
df.columns = ['text', 'sentiment']

In [114]:
df.head()

Unnamed: 0,text,sentiment
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


### Text Preprocessing
1. we have to convert all the text to lower case
2. remove all the punctuations and digits
3. we tokenize the sentences using the Keras Tokenizer
4. after tokenizing we convert the sentences into sequence
5. then pad the sequence (by default it is "pre" padding)


In [18]:
df['text'] = df.text.str.lower()    #converting all text to lower case

In [20]:
df['text'] = df['text'].apply((lambda x : re.sub('[^a-zA-Z]', ' ', x)))   #removing all punctuatations and digits

In [89]:
max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(df['text'].values)
X = tokenizer.texts_to_sequences(df['text'].values)
X = pad_sequences(X)

In [90]:
embed_dim = 128
lstm_out = 196

### Train & validation set splitting

In [91]:
Y = to_categorical(df['sentiment'].values)
from sklearn.model_selection import train_test_split
X_train, X_val, Y_train, Y_val = train_test_split(X, Y)

### Model creation

In [110]:
model = Sequential()
model.add(Embedding(max_features, embed_dim, input_length=X.shape[1]))
model.add(Dropout(0.5))
model.add(CuDNNLSTM(lstm_out))
model.add(Dropout(0.5))

model.add(Dense(2, activation="softmax"))
model.compile(optimizer="adam", loss = "categorical_crossentropy", metrics = ["accuracy"])


In [115]:
model.fit(X_train, Y_train, epochs=10, batch_size=64, validation_data=(X_val, Y_val))

Train on 750 samples, validate on 250 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f22f65655f8>