In [None]:
!pip install contractions
!pip install textsearch
!pip install tqdm
import nltk
nltk.download('punkt')

In [None]:
import pandas as pd
import os
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv1D, LSTM, GRU, ConvLSTM1D, Bidirectional, Dropout
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from sklearn.preprocessing import LabelEncoder


import matplotlib
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report


seed = 42
np.random.seed(seed)

In [None]:
DATA_PATH = './data/IMDB/'
OUTPUT_MODELS_PATH = './models/'

## Load Dataset

In [None]:
#dataset = pd.read_csv(os.path.join(DATA_PATH, 'data.csv'))
#dataset.info()

In [None]:
dataset = pd.read_csv('../input/gamereview/review_train.csv')
#dataset = pd.read_csv('../input/imdb-data/data.csv')

In [None]:
dataset.info()

In [None]:
dataset = dataset.dropna()

In [None]:
print(dataset.shape)
dataset.head()

### Prepare Train Test Split

In [None]:
# build train and test datasets
# reviews = dataset['clean_text'].values
# sentiments = dataset['category'].values

reviews = dataset['content'].values
sentiments = dataset['score'].values


# reviews = dataset['review'].values
# sentiments = dataset['sentiment'].values

train_reviews = reviews[:19000]
train_sentiments = sentiments[:19000]

test_reviews = reviews[19000:]
test_sentiments = sentiments[19000:]

# Text Wrangling & Normalization

In [None]:
import contractions
from bs4 import BeautifulSoup
import numpy as np
import re
import tqdm
import unicodedata


def strip_html_tags(text):
  soup = BeautifulSoup(text, "html.parser")
  [s.extract() for s in soup(['iframe', 'script'])]
  stripped_text = soup.get_text()
  stripped_text = re.sub(r'[\r|\n|\r\n]+', '\n', stripped_text)
  return stripped_text

def remove_accented_chars(text):
  text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')
  return text

def pre_process_corpus(docs):
  norm_docs = []
  for doc in tqdm.tqdm(docs):
    doc = strip_html_tags(doc)
    doc = doc.translate(doc.maketrans("\n\t\r", "   "))
    doc = doc.lower()
    doc = remove_accented_chars(doc)
    doc = contractions.fix(doc)
    # lower case and remove special characters\whitespaces
    doc = re.sub(r'[^a-zA-Z0-9\s]', '', doc, re.I|re.A)
    doc = re.sub(' +', ' ', doc)
    doc = doc.strip()  
    norm_docs.append(doc)
  
  return norm_docs

In [None]:
demoji.download_codes()


In [None]:
%%time

norm_train_reviews = pre_process_corpus(train_reviews)
norm_test_reviews = pre_process_corpus(test_reviews)

## Preprocessing

To prepare text data for our deep learning model, we transform each review into a sequence.
Every word in the review is mapped to an integer index and thus the sentence turns into a sequence of numbers.

To perform this transformation, keras provides the ```Tokenizer```

In [None]:
t = Tokenizer(oov_token='<UNK>')
t.fit_on_texts(norm_train_reviews)
t.word_index['<PAD>'] = 0

In [None]:
max([(k, v) for k, v in t.word_index.items()], key = lambda x:x[1]), min([(k, v) for k, v in t.word_index.items()], key = lambda x:x[1]), t.word_index['<UNK>']

In [None]:
train_sequences = t.texts_to_sequences(norm_train_reviews)

In [None]:
test_sequences = t.texts_to_sequences(norm_test_reviews)

In [None]:
print("Vocabulary size={}".format(len(t.word_index)))
print("Number of Documents={}".format(t.document_count))

### Sequence Normalization

Not all reviews are of same length. To handle this difference in length of reviews, we define a maximum length.
For reviews which are smaller than this length, we pad them with zeros which longer ones are truncated

In [None]:
MAX_SEQUENCE_LENGTH = 5000

In [None]:
# pad dataset to a maximum review length in words
X_train = sequence.pad_sequences(train_sequences, maxlen=MAX_SEQUENCE_LENGTH)
X_test = sequence.pad_sequences(test_sequences, maxlen=MAX_SEQUENCE_LENGTH)
X_train.shape, X_test.shape

### Encoding Labels

If the dataset contains labels of the form positive/negative we helps computer to figure it out. The following step encodes the labels using ```sklearn's``` ```LabelEncoder```

In [None]:
le = LabelEncoder()
num_classes=5 # positive -> 1, negative -> 0 if there neutral then 3

In [None]:
y_train = le.fit_transform(train_sentiments)
y_test = le.transform(test_sentiments)

In [None]:
y_train

In [None]:
train_sentiments

In [None]:
train_sentiments[7]

In [None]:
VOCAB_SIZE = len(t.word_index)

## Prepare the Models

Since textual data is a sequence of words, we utilize ```1D``` convolutions to scan through the sentences.
The model first transforms each word into lower dimensional embedding/vector space followed by 1d convolutions and then passing the data through dense layers before the final layer for classification

In [None]:
EMBED_SIZE = 500
EPOCHS=20
BATCH_SIZE=100

In [None]:
  models = {
    'CNN1': Sequential([Embedding(VOCAB_SIZE, EMBED_SIZE, input_length=MAX_SEQUENCE_LENGTH),
                        Conv1D(filters=256, kernel_size=4, padding='same', activation='relu'),
                        MaxPooling1D(pool_size=4),
                        Flatten(),
                        Dense(64, activation='relu'), 
                        Dropout(rate=0.5),
                        Dense(1, activation='sigmoid')]),
    'CNN3': Sequential([Embedding(VOCAB_SIZE, EMBED_SIZE, input_length=MAX_SEQUENCE_LENGTH),
                        Conv1D(filters=256, kernel_size=4, padding='same', activation='relu'),
                        MaxPooling1D(pool_size=4),
                        Conv1D(filters=128, kernel_size=4, padding='same', activation='relu'),
                        MaxPooling1D(pool_size=4),
                        Conv1D(filters=64, kernel_size=4, padding='same', activation='relu'),
                        MaxPooling1D(pool_size=2),
                        Flatten(),
                        Dense(256, activation='relu'), 
                        Dropout(rate=0.5),
                        Dense(1, activation='sigmoid')]),
    'CNN5': Sequential([Embedding(VOCAB_SIZE, EMBED_SIZE, input_length=MAX_SEQUENCE_LENGTH),
                        Conv1D(filters=512, kernel_size=4, padding='same', activation='relu'),
                        MaxPooling1D(pool_size=4),
                        Conv1D(filters=256, kernel_size=4, padding='same', activation='relu'),
                        MaxPooling1D(pool_size=2),
                        Conv1D(filters=128, kernel_size=4, padding='same', activation='relu'),
                        MaxPooling1D(pool_size=2),
                        Conv1D(filters=64, kernel_size=4, padding='same', activation='relu'),
                        MaxPooling1D(pool_size=2),
                        Conv1D(filters=32, kernel_size=4, padding='same', activation='relu'),
                        MaxPooling1D(pool_size=2), 
                        Flatten(),
                        Dense(256, activation='relu'), 
                        Dropout(rate=0.5),
                        Dense(1, activation='sigmoid')]),
    'LSTM1': Sequential([Embedding(VOCAB_SIZE, EMBED_SIZE, input_length=MAX_SEQUENCE_LENGTH),
                        LSTM(units=64),
                        Dense(64, activation='relu'), 
                        Dropout(rate=0.5),
                        Dense(1, activation='sigmoid')]),
    'LSTM2': Sequential([Embedding(VOCAB_SIZE, EMBED_SIZE, input_length=MAX_SEQUENCE_LENGTH),
                        LSTM(units=64, return_sequences=True),
                        LSTM(units=64),
                        Dense(64, activation='relu'), 
                        Dropout(rate=0.5),
                        Dense(1, activation='sigmoid')]),
    'LSTMCNN': Sequential([Embedding(VOCAB_SIZE, EMBED_SIZE, input_length=MAX_SEQUENCE_LENGTH),
                        Conv1D(filters=128, kernel_size=8, padding='same', activation='relu'),
                        MaxPooling1D(pool_size=4),
                        LSTM(units=64),
                        Dense(64, activation='relu'), 
                        Dropout(rate=0.5),
                        Dense(1, activation='sigmoid')]),
    'GRU1': Sequential([Embedding(VOCAB_SIZE, EMBED_SIZE, input_length=MAX_SEQUENCE_LENGTH),
                        GRU(units=64),
                        Dense(64, activation='relu'), 
                        Dropout(rate=0.5),
                        Dense(1, activation='sigmoid')]),
    'GRU2': Sequential([Embedding(VOCAB_SIZE, EMBED_SIZE, input_length=MAX_SEQUENCE_LENGTH),
                        GRU(units=64, return_sequences=True),
                        GRU(units=64),
                        Dense(64, activation='relu'), 
                        Dropout(rate=0.5),
                        Dense(1, activation='sigmoid')]),
    'GRUCNN': Sequential([Embedding(VOCAB_SIZE, EMBED_SIZE, input_length=MAX_SEQUENCE_LENGTH),
                        Conv1D(filters=128, kernel_size=8, padding='same', activation='relu'),
                        MaxPooling1D(pool_size=4),
                        GRU(units=64),
                        Dense(64, activation='relu'), 
                        Dropout(rate=0.5),
                        Dense(1, activation='sigmoid')])
    }

In [None]:
for model in models:
    models[model].compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

If needed to learn another models

In [None]:
from tensorflow.python.keras.layers.core import Dropout
# create the model
model = Sequential()
model.add(Embedding(VOCAB_SIZE, EMBED_SIZE, input_length=MAX_SEQUENCE_LENGTH))
model.add(Conv1D(filters=256, kernel_size=5, padding='same', activation='relu'))
model.add(LSTM(units=64, return_sequences=True))
#model.add(MaxPooling1D(pool_size=4))
model.add(GRU(units=256))
#model.add(GRU(units=128, return_sequences=True))
#model.add(LSTM(units=64, return_sequences=True))
#model.add(GRU(units=32))
#model.add(Conv1D(filters=128, kernel_size=4, padding='same', activation='relu'))
#model.add(MaxPooling1D(pool_size=2))
#model.add(Conv1D(filters=64, kernel_size=4, padding='same', activation='relu'))
#model.add(MaxPooling1D(pool_size=2))
#model.add(Conv1D(filters=32, kernel_size=4, padding='same', activation='relu'))
#model.add(MaxPooling1D(pool_size=2))
#model.add(Flatten())
#model.add(Dropout(rate=0.3))
model.add(Dense(256, activation='relu'))
model.add(Dropout(rate=0.5))
model.add(Dense(5, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['mae', 'accuracy'])
#sparse for 3
#binary
model.summary()

In [None]:
from tensorflow.python.keras.layers.core import Dropout
from tensorflow.python.keras.layers import Bidirectional

# create the model
model = Sequential()
model.add(Embedding(VOCAB_SIZE, EMBED_SIZE, input_length=MAX_SEQUENCE_LENGTH))
model.add(Conv1D(filters=256, kernel_size=5, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=4))
model.add(LSTM(units=128, return_sequences=True))
#model.add(GRU(units=256))
# model.add(GRU(units=128, return_sequences=True))
# model.add(GRU(units=128, return_sequences=True))
# model.add(GRU(units=128, return_sequences=True))
model.add(LSTM(units=64))
#model.add(GRU(units=32))
#model.add(Conv1D(filters=128, kernel_size=4, padding='same', activation='relu'))
#model.add(MaxPooling1D(pool_size=2))
#model.add(Conv1D(filters=64, kernel_size=4, padding='same', activation='relu'))
#model.add(MaxPooling1D(pool_size=2))
#model.add(Conv1D(filters=32, kernel_size=4, padding='same', activation='relu'))
#model.add(MaxPooling1D(pool_size=2))
#model.add(Flatten())
#model.add(Dropout(rate=0.3))
model.add(Dense(256, activation='relu'))
model.add(Dropout(rate=0.5))
model.add(Dense(5, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['mae', 'accuracy'])
#sparse for 3
#binary
model.summary()

In [None]:
history = model.fit(X_train, y_train, 
            validation_split=0.1,
            epochs=EPOCHS, 
            batch_size=BATCH_SIZE, 
            verbose=1)

In [None]:
scores = model.evaluate(X_test, test_sentiments, verbose=1)
print(scores)

In [None]:
from sklearn.metrics import mean_absolute_error

In [None]:
pred = model.predict(X_train)

In [None]:
pred + 0.2

In [None]:
mean_absolute_error(pred, train_sentiments)

In [None]:
scores += 1
mean_absolute_error(scores, test_sentiments)

In [None]:
test_data = pd.read_csv('../input/gamereview/review_test.csv')
test_data

In [None]:
test_data.iloc[219, ]

In [None]:
submission = pd.DataFrame()
test_X = test_data['content']

In [None]:
test_X.shape

In [None]:
test_reviews = pre_process_corpus(test_X)

In [None]:
import tensorflow as tf
from keras_preprocessing import sequence
from tensorflow import keras
from tensorflow.python.keras import Input
from tensorflow.python.keras.layers import Concatenate
rnn_cell_size = 128
class Attention(tf.keras.Model):
    def __init__(self, units):
        super(Attention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    def call(self, features, hidden):
        hidden_with_time_axis = tf.expand_dims(hidden, 1)
        score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))
        attention_weights = tf.nn.softmax(self.V(score), axis=1)
        context_vector = attention_weights * features
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights
    
    
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,))

embedded_sequences = Embedding(VOCAB_SIZE, input_length=MAX_SEQUENCE_LENGTH)(sequence_input)

import os
lstm = Bidirectional(LSTM
                                     (rnn_cell_size,
                                      dropout=0.3,
                                      return_sequences=True,
                                      return_state=True,
                                      recurrent_activation='relu'), name="bi_lstm_0")(embedded_sequences)

lstm, forward_h, forward_c, backward_h, backward_c = Bidirectional \
    (LSTM
     (rnn_cell_size,
      dropout=0.2,
      return_sequences=True,
      return_state=True,
      recurrent_activation='relu'))(lstm)

state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])

context_vector, attention_weights = attention(lstm, state_h)

output = Dense(1)(context_vector)

model = keras.Model(inputs=sequence_input, outputs=output)

# summarize layers
print(model.summary())

In [None]:
sq = t.texts_to_sequences(test_reviews)
test_X = sequence.pad_sequences(sq, maxlen=MAX_SEQUENCE_LENGTH)

In [None]:
len(sq)

In [None]:
predictions = model.predict(test_X)

In [None]:
predictions

In [None]:
submission = pd.DataFrame(predictions, columns=['score'])

In [None]:
submission.to_csv('./submission.csv', index=None)

In [None]:
for model in models:
    print(models[model].summary())

## Models Training

In [None]:
# Fit the models
import tensorflow as tf
tf.config.experimental_run_functions_eagerly(True)
from datetime import datetime
start = datetime.now()
histories = []
for model in models:
    history = models[model].fit(X_train, y_train, 
            validation_split=0.1,
            epochs=EPOCHS, 
            batch_size=BATCH_SIZE, 
            verbose=1)
    histories.append(history)
print(datetime.now() - start)

# Model Evaluation

In [None]:
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
model.pop()

In [None]:
model.summary()

In [None]:
check = model.predict(X_train)

In [None]:
check.shape

In [None]:
import sklearn
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
clf1 = GradientBoostingClassifier()
clf2 = SVC()
clf3 = RandomForestClassifier()
clf1.fit(check, y_train)
clf2.fit(check, y_train)
clf3.fit(check, y_train)

In [None]:
pr = model.predict(X_test)

In [None]:
int(0.5)

In [None]:
pred1 = clf1.predict(pr)
pred2 = clf2.predict(pr)
pred3 = clf3.predict(pr)
pred = [int((2*x+2*y+1.55*z)/4) for x,y,z in zip(pred1, pred2, pred3)]
print(len(pred))
print(len(y_test))
print(accuracy_score(pred, y_test))

# Models Evaluation

In [None]:
values = []
for model in models:
    values.append(models[model].evaluate(X_test, y_test, verbose=1))
    print(values[-1][1])

# Saving

In [None]:
for model in models:
  models[model].save_weights(os.path.join(OUTPUT_MODELS_PATH, f'{model}.h5'))

# Different Batches

In [None]:
batchs = [64, 128, 200]
histories_batches = []
for batch_size in batchs:
    model = Sequential()
    model.add(Embedding(VOCAB_SIZE, EMBED_SIZE, input_length=MAX_SEQUENCE_LENGTH))
    model.add(Conv1D(filters=128, kernel_size=4, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=4))
    model.add(Flatten())
    #model.add(GRU(units=256))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(rate=0.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    #model.summary()
    history = model.fit(X_train, y_train, 
          validation_split=0.1,
          epochs=5, 
          batch_size=batch_size, 
          verbose=1)
    histories_batches.append({'history': history, 'batch': batch_size})

# Different Optimizers

In [None]:
import tensorflow as tf
from tensorflow.python.keras.layers.core import Dropout
optimizers = [tf.optimizers.Adam(), tf.optimizers.Nadam(), tf.optimizers.RMSprop()]
histories_optimizers = []
for optimizer in optimizers:
    model = Sequential()
    model.add(Embedding(VOCAB_SIZE, EMBED_SIZE, input_length=MAX_SEQUENCE_LENGTH))
    model.add(GRU(units=256))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(rate=0.3))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    #model.summary()
    history = model.fit(X_train, y_train, 
          validation_split=0.1,
          epochs=5, 
          batch_size=BATCH_SIZE, 
          verbose=1)
    histories_optimizers.append({'history': history, 'oprimizer': optimizer})

# Plots

In [None]:
matplotlib.rcParams['figure.figsize'] = (20,10)

plt.plot(histories[7].history['accuracy'], label='CNN (1 слой)', marker='x', linestyle='-')
plt.plot(histories[1].history['accuracy'], label='CNN (3 слоя + 3 пулинг)', marker='.')
plt.plot(histories[2].history['accuracy'], label='CNN (5 слоев + 5 пулинг)', marker='v')
plt.plot(histories[3].history['accuracy'], label='LSTM (1 блок)', marker='^')
plt.plot(histories[4].history['accuracy'], label='LSTM (2 блока)', marker='4')
plt.plot(histories[5].history['accuracy'], label='LSTM + CNN', marker='s')
plt.plot(histories[6].history['accuracy'], label='GRU (1 блой)', marker='8')
plt.plot(histories[0].history['accuracy'], label='GRU (2 блока)', marker='P')
plt.plot(histories[8].history['accuracy'], label='GRU + CNN)', marker='*')
plt.legend()

In [None]:
matplotlib.rcParams['figure.figsize'] = (85,35)
x = np.arange(1,6,1)
plt.subplots(3,3)
i = 1
for i in range(1,4):
  plt.subplot(2,3,i)
  plt.title("Batch_size = {}".format(histories_batches[i-1]['batch']), fontsize=60)
  plt.plot(x, histories_batches[i-1]['history'].history['accuracy'], color='blue',
          label='Accuracy на обучающем наборе данных', marker='x')
  plt.plot(x, histories_batches[i-1]['history'].history['val_accuracy'], color='red',
          label='Accuracy на валидационном наборе данных',  marker='x')
  plt.xlabel('Эпоха обучения')
  plt.ylabel('Accuracy')
  plt.legend(fontsize=53)
for i in range(1,4):
  plt.subplot(2,3,i+3)
  plt.title("Batch_size = {}".format(histories_batches[i-1]['batch']), fontsize=60)
  plt.plot(x, histories_batches[i-1]['history'].history['loss'], color='blue',
          label='Loss на обучающем наборе данных', marker='x')
  plt.plot(x, histories_batches[i-1]['history'].history['val_loss'], color='red',
          label='Loss на валидационном наборе данных',  marker='x')
  plt.xlabel('Эпоха обучения')
  plt.ylabel('Loss')
  plt.legend(fontsize=53)
import matplotlib
matplotlib.rcParams.update({'font.size': 40})
plt.savefig('saved.png')

In [None]:
matplotlib.rcParams['figure.figsize'] = (30,10)
x = np.arange(1,6,1)
plt.subplots(2,3)
plt.subplot(2,3,1)
plt.title('Adam')
plt.plot(x, histories_optimizers[0]['history'].history['accuracy'], 
         label='Accuracy на обучающем наборе данных', marker='x')
plt.plot(x, histories_optimizers[0]['history'].history['val_accuracy'], 
         label='Accuracy на валидационном наборе данных',  marker='x')
plt.xlabel('Эпоха обучения')
plt.ylabel('Accuracy')
plt.legend()
plt.subplot(2,3,2)
plt.title('NAdam')
plt.plot(histories_optimizers[1]['history'].history['accuracy'], 
         label='Accuracy на обучающем наборе данных', marker='x')
plt.plot(histories_optimizers[1]['history'].history['val_accuracy'], 
         label='Accuracy на валидационном наборе данных',  marker='x')
plt.xlabel('Эпоха обучения')
plt.ylabel('Accuracy')
plt.legend()
plt.subplot(2,3,3)
plt.title('RMSProp')
plt.plot(histories_optimizers[-1]['history'].history['accuracy'], 
         label='Accuracy на обучающем наборе данных', marker='x')
plt.plot(histories_optimizers[-1]['history'].history['val_accuracy'], 
         label='Accuracy на валидационном наборе данных',  marker='x')
plt.xlabel('Эпоха обучения')
plt.ylabel('Accuracy')
plt.legend()
plt.subplot(2,3,4)
plt.title('Adam')
plt.plot(histories_optimizers[0]['history'].history['loss'], 
         label='Loss на обучающем наборе данных', marker='x')
plt.plot(histories_optimizers[0]['history'].history['val_loss'], 
         label='Loss на валидационном наборе данных',  marker='x')
plt.xlabel('Эпоха обучения')
plt.ylabel('Loss')
plt.legend()
plt.subplot(2,3,5)
plt.title('NAdam')
plt.plot(histories_optimizers[1]['history'].history['loss'], 
         label='Loss на обучающем наборе данных', marker='x')
plt.plot(histories_optimizers[1]['history'].history['val_loss'], 
         label='Loss на валидационном наборе данных',  marker='x')
plt.xlabel('Эпоха обучения')
plt.ylabel('Loss')
plt.legend()
plt.subplot(2,3,6)
plt.title('RMSProp')
plt.plot(histories_optimizers[-1]['history'].history['loss'], 
         label='Loss на обучающем наборе данных', marker='x')
plt.plot(histories_optimizers[-1]['history'].history['val_loss'], 
         label='Loss на валидационном наборе данных',  marker='x')
plt.xlabel('Эпоха обучения')
plt.ylabel('Loss')
plt.legend()
plt.show()


# Classification report and confusion matrix

In [None]:
predict_x=model.predict(X_test) 

In [None]:
y_test

In [None]:
if num_classes == 2:
  classes_x=np.argmax(predict_x,axis=1)
  classes_x = ['negative' if x < 0.5 else 'positive' for x in predict_x]
  print(classification_report(classes_x, test_sentiments))
  cm = confusion_matrix(test_sentiments, classes_x)
  print(cm)
else:
  #print(classification_report(predict_x.astype(int), y_test))
  cm = confusion_matrix(y_test, predict_x.argmax(axis=1))
cm = [[ 12003, 109, 306], [111, 9183, 52],[441,181,5583]]

In [None]:
matplotlib.rcParams['figure.figsize'] = (20,15)

#actual_data = test_sentiments

#predicted_data = classes_x 
#cm = confusion_matrix(y_test.values.argmax(axis=1), predict_x.argmax(axis=1))

ax = sns.heatmap(cm, annot=True, fmt='g',
                 #annot_kws={'size':18},
                 cmap='coolwarm', linecolor='green');
sns.set(font_scale=3)
ax.xaxis.set_ticklabels(['Positive', 'Neutral', 'Negative'], fontsize=20)
ax.yaxis.set_ticklabels(['Positive', 'Neutral', 'Negative'], fontsize=20)

## Finally call the matplotlib show() function to display the visualization
## of the Confusion Matrix.
plt.show()
ax.figure.savefig('saved_figure.png')