In [32]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import re

from sklearn.preprocessing import LabelEncoder

data = pd.read_csv('Sentiment.csv')
# Keeping only the neccessary columns
data = data[['text','sentiment']]

data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))

for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ')

max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)

X = pad_sequences(X)

embed_dim = 128
lstm_out = 196
def createmodel():
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
    return model
# print(model.summary())

labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)

batch_size = 32
model = createmodel()
model.fit(X_train, Y_train, epochs = 1, batch_size=batch_size, verbose = 2)
score,acc = model.evaluate(X_test,Y_test,verbose=2,batch_size=batch_size)
print(score)
print(acc)
print(model.metrics_names)


  row[0] = row[0].replace('rt', ' ')
  row[0] = row[0].replace('rt', ' ')


291/291 - 25s - 85ms/step - accuracy: 0.6403 - loss: 0.8359
144/144 - 4s - 30ms/step - accuracy: 0.6680 - loss: 0.7538
0.7538431286811829
0.6679772734642029
['loss', 'compile_metrics']


In [33]:
model.save('sentimentAnalysis.keras')

In [34]:
from keras.models import load_model
model= load_model('sentimentAnalysis.keras')
print(integer_encoded)
print(data['sentiment'])

[1 2 1 ... 2 0 2]
0         Neutral
1        Positive
2         Neutral
3        Positive
4        Positive
           ...   
13866    Negative
13867    Positive
13868    Positive
13869    Negative
13870    Positive
Name: sentiment, Length: 13871, dtype: object


In [35]:
sentence = ['A lot of good things are happening. We are respected again throughout the world, and that\'s a great thing.@realDonaldTrump']
sentence = tokenizer.texts_to_sequences(sentence)
sentence = pad_sequences(sentence, maxlen=28, dtype='int32', value=0)
sentiment_probs = model.predict(sentence, batch_size=1, verbose=2)[0]
sentiment = np.argmax(sentiment_probs)

print(sentiment_probs)
if sentiment == 0:
    print("Neutral")
elif sentiment < 0:
    print("Negative")
elif sentiment > 0:
    print("Positive")
else:
    print("Cannot be determined")

1/1 - 0s - 240ms/step
[0.6358445  0.16726568 0.19688971]
Neutral


In [52]:
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV
from tensorflow import keras
from tensorflow.keras.layers import Dense
def createmodel(optimizer='adam'):
    """
    Creates and returns a compiled Keras sequential model.
    """
    model = keras.Sequential([
        Dense(12, input_shape=(X_train.shape[1],), activation='relu'),
        Dense(8, activation='relu'),
        Dense(3, activation='softmax')

    ])
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model
model = KerasClassifier(model=createmodel,verbose=2)
batch_size= [10, 20, 40]
epochs = [1, 2]
param_grid= {'batch_size':batch_size, 'epochs':epochs, 'model__optimizer': ['adam', 'rmsprop']}
grid  = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result= grid.fit(X_train,Y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 3s - 3ms/step - accuracy: 0.4404 - loss: 8.0728
186/186 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 3ms/step - accuracy: 0.4580 - loss: 23.8758
186/186 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 3ms/step - accuracy: 0.4832 - loss: 24.2315
186/186 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 3ms/step - accuracy: 0.5167 - loss: 23.8616
186/186 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 3s - 4ms/step - accuracy: 0.4629 - loss: 32.0518
186/186 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 2ms/step - accuracy: 0.5082 - loss: 39.0573
186/186 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 2ms/step - accuracy: 0.5250 - loss: 18.0290
186/186 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 2ms/step - accuracy: 0.4755 - loss: 24.3952
186/186 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 3ms/step - accuracy: 0.4323 - loss: 34.7261
186/186 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 2ms/step - accuracy: 0.5077 - loss: 16.1130
186/186 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 3ms/step - accuracy: 0.4477 - loss: 33.4638
Epoch 2/2
744/744 - 1s - 1ms/step - accuracy: 0.4922 - loss: 4.0621
186/186 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 3ms/step - accuracy: 0.4489 - loss: 13.0033
Epoch 2/2
744/744 - 2s - 2ms/step - accuracy: 0.5907 - loss: 1.1010
186/186 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 3ms/step - accuracy: 0.5336 - loss: 8.6064
Epoch 2/2
744/744 - 1s - 2ms/step - accuracy: 0.5990 - loss: 1.1643
186/186 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 3ms/step - accuracy: 0.4927 - loss: 20.6209
Epoch 2/2
744/744 - 1s - 1ms/step - accuracy: 0.5961 - loss: 1.7844
186/186 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 3ms/step - accuracy: 0.5100 - loss: 22.1801
Epoch 2/2
744/744 - 2s - 2ms/step - accuracy: 0.6004 - loss: 1.9880
186/186 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 3ms/step - accuracy: 0.4206 - loss: 37.5820
Epoch 2/2
744/744 - 1s - 2ms/step - accuracy: 0.4954 - loss: 4.8419
186/186 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 2ms/step - accuracy: 0.4687 - loss: 34.4718
Epoch 2/2
744/744 - 1s - 2ms/step - accuracy: 0.4999 - loss: 3.5372
186/186 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 2ms/step - accuracy: 0.4824 - loss: 15.8124
Epoch 2/2
744/744 - 1s - 2ms/step - accuracy: 0.5927 - loss: 1.4073
186/186 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 3s - 4ms/step - accuracy: 0.5194 - loss: 18.5985
Epoch 2/2
744/744 - 1s - 1ms/step - accuracy: 0.5964 - loss: 1.2614
186/186 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 2ms/step - accuracy: 0.4799 - loss: 19.2390
Epoch 2/2
744/744 - 1s - 1ms/step - accuracy: 0.5879 - loss: 1.5048
186/186 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 2s - 4ms/step - accuracy: 0.5533 - loss: 26.7119
93/93 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 2s - 4ms/step - accuracy: 0.3865 - loss: 90.4810
93/93 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 2s - 5ms/step - accuracy: 0.4196 - loss: 39.6575
93/93 - 0s - 3ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 2s - 5ms/step - accuracy: 0.4605 - loss: 28.4120
93/93 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 2s - 4ms/step - accuracy: 0.3961 - loss: 42.1058
93/93 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 3ms/step - accuracy: 0.3889 - loss: 78.0988
93/93 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 3ms/step - accuracy: 0.5078 - loss: 25.3225
93/93 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 3ms/step - accuracy: 0.4518 - loss: 22.0363
93/93 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 2s - 4ms/step - accuracy: 0.4395 - loss: 44.5336
93/93 - 0s - 3ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 2s - 5ms/step - accuracy: 0.4779 - loss: 26.9182
93/93 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 2s - 4ms/step - accuracy: 0.4169 - loss: 61.4988
Epoch 2/2
372/372 - 1s - 2ms/step - accuracy: 0.4882 - loss: 7.9128
93/93 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 3s - 9ms/step - accuracy: 0.4810 - loss: 26.7632
Epoch 2/2
372/372 - 1s - 2ms/step - accuracy: 0.4853 - loss: 3.4741
93/93 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 2s - 5ms/step - accuracy: 0.3956 - loss: 122.9918
Epoch 2/2
372/372 - 1s - 2ms/step - accuracy: 0.4673 - loss: 18.7424
93/93 - 0s - 3ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 2s - 7ms/step - accuracy: 0.4803 - loss: 52.5100
Epoch 2/2
372/372 - 1s - 3ms/step - accuracy: 0.4636 - loss: 13.0117
93/93 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 2s - 5ms/step - accuracy: 0.4443 - loss: 33.6356
Epoch 2/2
372/372 - 1s - 2ms/step - accuracy: 0.6017 - loss: 1.7470
93/93 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 4ms/step - accuracy: 0.4556 - loss: 20.7431
Epoch 2/2
372/372 - 1s - 4ms/step - accuracy: 0.6049 - loss: 1.3118
93/93 - 0s - 3ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 3ms/step - accuracy: 0.4938 - loss: 42.5803
Epoch 2/2
372/372 - 1s - 2ms/step - accuracy: 0.5307 - loss: 3.7698
93/93 - 0s - 3ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 2s - 5ms/step - accuracy: 0.4416 - loss: 19.2253
Epoch 2/2
372/372 - 1s - 1ms/step - accuracy: 0.5764 - loss: 1.3625
93/93 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 3ms/step - accuracy: 0.4664 - loss: 28.7092
Epoch 2/2
372/372 - 1s - 2ms/step - accuracy: 0.5598 - loss: 1.9552
93/93 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 3ms/step - accuracy: 0.5560 - loss: 8.5412
Epoch 2/2
372/372 - 0s - 1ms/step - accuracy: 0.6067 - loss: 1.0744
93/93 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 8ms/step - accuracy: 0.4159 - loss: 60.0366
47/47 - 0s - 3ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 7ms/step - accuracy: 0.4013 - loss: 53.7771
47/47 - 0s - 3ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 2s - 9ms/step - accuracy: 0.4535 - loss: 12.5877
47/47 - 0s - 4ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 2s - 9ms/step - accuracy: 0.3323 - loss: 156.3919
47/47 - 0s - 3ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 7ms/step - accuracy: 0.4130 - loss: 76.8344
47/47 - 0s - 3ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 5ms/step - accuracy: 0.4720 - loss: 41.0690
47/47 - 0s - 3ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 6ms/step - accuracy: 0.4364 - loss: 59.4260
47/47 - 0s - 3ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 5ms/step - accuracy: 0.4492 - loss: 22.3193
47/47 - 0s - 3ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 6ms/step - accuracy: 0.4697 - loss: 29.5599
47/47 - 0s - 3ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 6ms/step - accuracy: 0.4106 - loss: 68.6645
47/47 - 0s - 3ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 2s - 10ms/step - accuracy: 0.4105 - loss: 65.1865
Epoch 2/2
186/186 - 1s - 3ms/step - accuracy: 0.5347 - loss: 9.3579
47/47 - 0s - 4ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 2s - 8ms/step - accuracy: 0.4149 - loss: 61.5512
Epoch 2/2
186/186 - 0s - 1ms/step - accuracy: 0.4832 - loss: 15.2841
47/47 - 0s - 3ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 7ms/step - accuracy: 0.4174 - loss: 69.7202
Epoch 2/2
186/186 - 0s - 2ms/step - accuracy: 0.4942 - loss: 14.8806
47/47 - 0s - 3ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 8ms/step - accuracy: 0.4549 - loss: 42.8598
Epoch 2/2
186/186 - 0s - 2ms/step - accuracy: 0.5145 - loss: 5.2513
47/47 - 0s - 3ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 2s - 8ms/step - accuracy: 0.4691 - loss: 24.7538
Epoch 2/2
186/186 - 1s - 3ms/step - accuracy: 0.5286 - loss: 3.9106
47/47 - 0s - 3ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 6ms/step - accuracy: 0.5128 - loss: 23.2734
Epoch 2/2
186/186 - 0s - 2ms/step - accuracy: 0.6106 - loss: 1.4052
47/47 - 0s - 4ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 2s - 8ms/step - accuracy: 0.4735 - loss: 34.6471
Epoch 2/2
186/186 - 0s - 3ms/step - accuracy: 0.5024 - loss: 4.7018
47/47 - 0s - 5ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 7ms/step - accuracy: 0.4883 - loss: 58.1516
Epoch 2/2
186/186 - 0s - 2ms/step - accuracy: 0.3682 - loss: 9.8485
47/47 - 0s - 3ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 6ms/step - accuracy: 0.5071 - loss: 11.3799
Epoch 2/2
186/186 - 0s - 1ms/step - accuracy: 0.6019 - loss: 1.3048
47/47 - 0s - 3ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 3s - 17ms/step - accuracy: 0.4683 - loss: 31.0739
Epoch 2/2
186/186 - 0s - 2ms/step - accuracy: 0.4269 - loss: 2.7053
47/47 - 0s - 3ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


930/930 - 3s - 3ms/step - accuracy: 0.4537 - loss: 15.3592
Epoch 2/2
930/930 - 2s - 2ms/step - accuracy: 0.5291 - loss: 1.9162
Best: 0.601422 using {'batch_size': 10, 'epochs': 2, 'model__optimizer': 'adam'}
