In [1]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import re
from sklearn.preprocessing import LabelEncoder

Using TensorFlow backend.


# New Section

In [0]:
data = pd.read_csv('Sentiment.csv')
# Keeping only the neccessary columns
data = data[['text','sentiment']]

In [0]:
data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))

In [0]:
for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ')

In [0]:
max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)

X = pad_sequences(X)

In [0]:
def createmodel():
    model = Sequential()
    lstm_out=196
    embed_dim=128
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
    return model
    #print(model.summary())

In [7]:
labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
print(integer_encoded)
print(data['sentiment'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)

[1 2 1 ... 2 0 2]
0         Neutral
1        Positive
2         Neutral
3        Positive
4        Positive
           ...   
13866    Negative
13867    Positive
13868    Positive
13869    Negative
13870    Positive
Name: sentiment, Length: 13871, dtype: object


In [8]:
batch_size = 32
model = createmodel()
model.fit(X_train, Y_train, epochs = 20, batch_size=batch_size, verbose = 2)
score,acc = model.evaluate(X_test,Y_test,verbose=2,batch_size=batch_size)
print('score',score)
print('accuracy',acc)
print('Model Metrics Names',model.metrics_names)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/20
 - 17s - loss: 0.8292 - accuracy: 0.6368
Epoch 2/20
 - 17s - loss: 0.6871 - accuracy: 0.7080
Epoch 3/20
 - 17s - loss: 0.6229 - accuracy: 0.7386
Epoch 4/20
 - 17s - loss: 0.5807 - accuracy: 0.7572
Epoch 5/20
 - 17s - loss: 0.5423 - accuracy: 0.7745
Epoch 6/20
 - 18s - loss: 0.5025 - accuracy: 0.7939
Epoch 7/20
 - 17s - loss: 0.4727 - accuracy: 0.8085
Epoch 8/20
 - 17s - loss: 0.4381 - accuracy: 0.8198
Epoch 9/20
 - 16s - loss: 0.4054 - accuracy: 0.8373
Epoch 10/20
 - 17s - loss: 0.3783 - accuracy: 0.8454
Epoch 11/20
 - 17s - loss: 0.3625 - accuracy: 0.8513
Epoch 12/20
 - 17s - loss: 0.3370 - accuracy: 0.8629
Epoch 13/20
 - 17s - loss: 0.3184 - accuracy: 0.8698
Epoch 14/20
 - 16s - loss: 0.3009 - accuracy: 0.8767
Epoch 15/20
 - 16s - loss: 0.2874 - accuracy: 0.8834
Epoch 16/20
 - 16s - loss: 0.2775 - accuracy: 0.8883
Epoch 17/20
 - 17s - loss: 0.2689 - accuracy: 0.8929
Epoch 18/20
 - 17s - loss: 0.2538 - accuracy: 0.8964
Epoch 19/20
 - 17s - loss: 0.2482 - accuracy: 0.8953
Ep

In [0]:
model.save('Chakradhar.h5')

In [10]:
from keras.models import load_model
model= load_model('Chakradhar.h5')

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [11]:
import numpy as np
sentence = ['A lot of good things are happening. We are respected again throughout the world, and that is a great thing']
#vectorizing the tweet by the pre-fitted tokenizer instance
sentence = tokenizer.texts_to_sequences(sentence)
#padding the tweet to have exactly the same shape as `embedding_2` input
sentence = pad_sequences(sentence, maxlen=28, dtype='int32', value=0)
sentiment = model.predict_classes(sentence,batch_size=1,verbose = 2)[0]
print(sentiment)
if sentiment == 1:
  print("Neutral")
elif sentiment == 0:
  print("Negative")
elif sentiment == 2:
  print("Positive")
else:
  print("Can not be determined")

2
Positive


2. Apply GridSearchCV on the source code provided in the class

In [12]:
from keras.wrappers.scikit_learn import KerasClassifier
model = KerasClassifier(build_fn=createmodel,verbose=2)
batch_size= [10, 20]
epochs = [1, 2]
param_grid= {'batch_size':batch_size, 'epochs':epochs}
from sklearn.model_selection import GridSearchCV
grid  = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result= grid.fit(X_train, y=Y_train)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/1
 - 43s - loss: 0.8376 - accuracy: 0.6395
Epoch 1/1
 - 42s - loss: 0.8302 - accuracy: 0.6480
Epoch 1/1
 - 45s - loss: 0.8298 - accuracy: 0.6449
Epoch 1/1
 - 43s - loss: 0.8317 - accuracy: 0.6386
Epoch 1/1
 - 44s - loss: 0.8280 - accuracy: 0.6456
Epoch 1/2
 - 44s - loss: 0.8296 - accuracy: 0.6442
Epoch 2/2
 - 43s - loss: 0.6880 - accuracy: 0.7113
Epoch 1/2
 - 45s - loss: 0.8279 - accuracy: 0.6481
Epoch 2/2
 - 44s - loss: 0.6919 - accuracy: 0.7094
Epoch 1/2
 - 46s - loss: 0.8303 - accuracy: 0.6415
Epoch 2/2
 - 45s - loss: 0.6847 - accuracy: 0.7123
Epoch 1/2
 - 44s - loss: 0.8320 - accuracy: 0.6421
Epoch 2/2
 - 44s - loss: 0.6827 - accuracy: 0.7080
Epoch 1/2
 - 43s - loss: 0.8245 - accuracy: 0.6428
Epoch 2/2
 - 42s - loss: 0.6808 - accuracy: 0.7099
Epoch 1/1
 - 22s - loss: 0.8350 - accuracy: 0.6423
Epoch 1/1
 - 22s - loss: 0.8332 - accuracy: 0.6383
Epoch 1/1
 - 23s - loss: 0.8356 - accuracy: 0.6379
Epoch 1/1
 - 23s - loss: 0.8325 - accuracy: 0.6385
Epoch 1/1
 - 23s - loss: 0.8292

3. Apply the code on spamdata set availablein thesourcecode (text classification on the spam.csvdata set)

In [0]:
data1 = pd.read_csv('/content/spam.csv',encoding="ISO-8859-1")

# Keeping only the neccessary columns
data1 = data1[['v1','v2']]

In [0]:
data1['v2'] = data1['v2'].apply(lambda x: x.lower())
data1['v2'] = data1['v2'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))

In [0]:
for idx, row in data1.iterrows():
    row[0] = row[0].replace('rt', ' ')

In [0]:
max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data1['v2'].values)
X = tokenizer.texts_to_sequences(data1['v2'].values)

X = pad_sequences(X)

In [0]:
labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data1['v1'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)

In [0]:
def createmodel1():
  embed_dim=128
  lstm_out=196
  model = Sequential()
  model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
  model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
  model.add(Dense(2,activation='sigmoid'))
  model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
  return model

In [19]:
batch_size = 32
model1 = createmodel1()
model1.fit(X_train, Y_train, epochs = 5, batch_size=batch_size, verbose = 2)
score,acc = model1.evaluate(X_test,Y_test,verbose=2,batch_size=batch_size)
print(score)
print(acc)
print(model1.metrics_names)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/5
 - 33s - loss: 0.1687 - accuracy: 0.9397
Epoch 2/5
 - 33s - loss: 0.0369 - accuracy: 0.9869
Epoch 3/5
 - 33s - loss: 0.0192 - accuracy: 0.9938
Epoch 4/5
 - 34s - loss: 0.0112 - accuracy: 0.9965
Epoch 5/5
 - 33s - loss: 0.0064 - accuracy: 0.9979
0.1101426605290043
0.9809679388999939
['loss', 'accuracy']
