**Installation**

In [None]:
!pip install pandas
!pip install -q hazm
!pip install -q clean-text[gpl]
!pip install sklearn

**Import required packages**

In [None]:
import pandas as pd
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.layers import Dropout
from sklearn.metrics import classification_report, confusion_matrix
from hazm import *
from nltk.tokenize import RegexpTokenizer
import numpy as np
import math
import re
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.offline import iplot, plot, download_plotlyjs, init_notebook_mode
from sklearn import metrics
import seaborn as sns
from numpy import asarray
#import seaborn as sb
from mpl_toolkits.axes_grid1 import make_axes_locatable
from copy import deepcopy
from string import punctuation
import random

# Classification

**LSTM model**

In [None]:
model = Sequential()
model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.25))
model.add(LSTM(100, dropout=0.25, recurrent_dropout=0.25))
model.add(Dense(82, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

epochs = 6
batch_size = 64

history = model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size,validation_split=0.1,callbacks=[EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001)])


**Prediction**

In [None]:
Y_pred = model.predict(X_test)

**Evaluation**

In [None]:
cat = Corpus['Cat']

unique_cat = np.unique(cat)

#in the following lines of code we print how many posts are in each category
cat_news_count = dict.fromkeys(unique_cat, 0)
for cat in cat:
  cat_news_count[cat] += 1

for cat, cnt in cat_news_count.items():
  print('Category "{}" contains {} posts.'.format(cat, cnt))

Confusion matrix

In [None]:
new_Y_pred = np.argmax(Y_pred, axis=1)
new_Y_test = np.argmax(Y_test, axis=1)

## Plot confusion matrix
cm = metrics.confusion_matrix(new_Y_test, new_Y_pred)
fig, ax = plt.subplots()
sns.heatmap(cm, annot=True, fmt='d', ax=ax, cmap=plt.cm.Blues, 
            cbar=False)
ax.set(xlabel="Pred", ylabel="True", xticklabels=unique_cat, 
       yticklabels=unique_cat, title="Confusion matrix")
plt.yticks(rotation=0)
plt.show()

Accuracy

In [None]:
# Compute the accuracy of training data and validation data
corrects = 0
for i in range(len(new_Y_pred)):
    if int(new_Y_pred[i]) is int(new_Y_test[i]):
        corrects += 1
        
accuracy = float(corrects / len(new_Y_pred))*100
print('Accuracy (using "{}" column): {} %'.format ('Text', accuracy))

accr = model.evaluate(X_test,Y_test)
print('Test set\n  Loss: {:0.3f}\n  Accuracy: {:0.3f}'.format(accr[0],accr[1]))

**Test model**

In [None]:
random_X_test = []
random_Y_test = []
counter = 0

for i in range(100):
  rnd = random.randint(0, len(X_test)-1)
  random_X_test.append(X_test[rnd])
  random_Y_test.append(Y_test[rnd])

#print(random_X_test)
#print(random_Y_test)


for x in range(len(random_X_test)):
  Y_pred = model.predict(asarray([random_X_test[x]]))
  #print(Y_pred)
  pred_class = np.argmax(Y_pred)
  print(pred_class)
  
  if(pred_class == np.where(random_Y_test[x] == 1)):
    print('predicted correctly')
    counter += 1
  else: 
    print('predicted wrongly')

#print(counter)
print('Number of correct predictions: {}"'.format(counter))