In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)

import gensim.models.keyedvectors as word2vec
from nltk.tokenize import RegexpTokenizer
from keras.models import Sequential
from keras.layers.core import Dense, Dropout
from keras.layers.embeddings import Embedding
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.layers import LSTM
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, roc_curve,  roc_auc_score, classification_report
import logging

In [None]:
logging.basicConfig(format='%(asctime)s : %(levelname) s : %(message)s', level=logging.INFO)

In [None]:
np.random.seed(24)

In [None]:
df = pd.read_csv("FinalDataset.csv")

In [None]:
df.head()

In [None]:
tweets = df["finalCleanText"]
label_stance = df["STANCE"]

In [None]:
labels_count = label_stance.value_counts()
labels_count.plot(kind="bar")
print(label_stance.value_counts())

In [None]:
tkr = RegexpTokenizer('[a-zA-Z@]+')

In [None]:
tweets_split = []
for i, line in enumerate(tweets):
    #print(line)
    tweet = str(line).lower().split()
    tweet = tkr.tokenize(str(tweet))
    tweets_split.append(tweet)

In [None]:
print(tweets_split[1])

In [None]:
w2vModel = word2vec.KeyedVectors.load_word2vec_format(r'GoogleNews-vectors-negative300.bin', binary=True, limit=50000)

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(tweets_split)
X = tokenizer.texts_to_sequences(tweets_split)

In [None]:
maxlentweet = 30
X = pad_sequences(X, maxlen=maxlentweet)
print(X.shape)

In [None]:
embedding_layer = Embedding(input_dim=w2vModel.syn0.shape[0], output_dim=w2vModel.syn0.shape[1], weights=[w2vModel.syn0],
                            input_length=X.shape[1])

In [None]:
lstm_out = 150

model = Sequential()
model.add(embedding_layer)
model.add(Conv1D(filters=64, kernel_size=5, activation='relu', padding='causal'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.7))
model.add(LSTM(units=lstm_out))
model.add(Dropout(0.7))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, label_stance, test_size= 0.25, random_state = 13)
X_test = pad_sequences(X_test, maxlen=maxlentweet)
X_train = pad_sequences(X_train, maxlen=maxlentweet)

In [None]:
batch_size = 1000
history = model.fit(X_train, Y_train, epochs=35, verbose=1, batch_size=batch_size, validation_data =[X_test, Y_test])

In [None]:
score, acc = model.evaluate(X_test, Y_test, verbose = 2, batch_size=batch_size)
y_pred = model.predict(X_test)

In [None]:
y_pred = (y_pred > 0.5)

cm = confusion_matrix(Y_test, y_pred)
print(cm)



In [None]:
print("Testing Accuracy : ", acc)

In [None]:
#F1 Score, Recall and Precision
print(classification_report(Y_test, y_pred, target_names=['0', '1', '2']))

In [None]:
print(acc)

In [None]:
def oneHotToCategorical(y_hot):
    r,c = y_hot.shape
    y = [y_hot[i].argmax() for i in range(r)]
    return y
def createConfusionMatrix(y_true_c,y_pred_c,classifier_name):
    mapping = {2:"Anti Govt",0:"Neutral",1:"Pro Govt"}
    y_true_c = oneHotToCategorical(y_true_c)
    y_pred_c = oneHotToCategorical(y_pred_c)
    y_true = [mapping[i] for i in y_true_c]
    y_pred = [mapping[i] for i in y_pred_c]
    labels = [mapping[key] for key in mapping]
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    title = "Confusion Matrix of "+ classifier_name
    fig = plt.figure()
    ax= plt.subplot()
    fig.add_subplot(ax)
    sns.heatmap(cm, annot=True, ax = ax);
    ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
    ax.set_title(title);
    ax.xaxis.set_ticklabels(labels); ax.yaxis.set_ticklabels(labels);
    fig.savefig(title+"2.png",bbox_inches='tight')
    return

In [None]:
# acc_train = history.history['accuracy']
# acc_val = history.history['val_accuracy']
# createGraph(acc_train,'Training Accuracy',acc_val,'Validation Accuracy','Epochs','Accuracy','Training and Validation Accuracy Using Vanilla NN with Label Smoothing')
# plt.plot(epochs, acc_train, 'g', label='Training accuracy')
# plt.plot(epochs, acc_val, 'b', label='Validation accuracy')
# plt.title('Training and Validation accuracy')
# plt.xlabel('Epochs')
# plt.ylabel('Accuracy')
# plt.legend()
# plt.show()

In [None]:
import matplotlib.pyplot as plt
loss_train = history.history['loss']
loss_val = history.history['val_loss']
epochs = range(1,36)

# print(history.history)

In [None]:
def createGraph(acc_train,acc_train_label,acc_val,acc_val_label,x_label,y_label,title):
    x_len = max(len(acc_train),len(acc_val))
    x = [i for i in range(1,x_len+1)]
    plt.plot(x, acc_train, 'o-', label=acc_train_label)
    plt.plot(x, acc_val, 'o-', label=acc_val_label)
    plt.title(title)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.legend()
    plt.grid(True)
    plt.savefig(title+'2.png',bbox_inches='tight')
    plt.show()

In [None]:
createGraph(loss_train,'Training Loss',loss_val,'Validation Loss','Epochs','Loss','Training and Validation Loss Using LSTM')

In [None]:
acc_train = history.history['acc']
acc_val = history.history['val_acc']
createGraph(acc_train,'Training Accuracy',acc_val,'Validation Accuracy','Epochs','Accuracy','Training and Validation Accuracy Using LSTM')

In [None]:
def oneHot(y_inp):
    y = np.zeros((len(y_inp),3))
    for i in range(len(y_inp)):
        y[i][int(y_inp[i][0])] = 1
    return y

In [None]:
y_t = pd.Series(Y_test)
yt = y_t.values.reshape(-1,1)
print(yt.shape)

In [None]:
y_one_hot = oneHot(yt)

In [None]:
import seaborn as sns
createConfusionMatrix(y_one_hot,model.predict(X_test),"LSTM")

In [None]:
x = Y_test == 0

In [None]:
c = 0
for i in x:
    if i ==True:
        c+=1
print(c)

In [None]:
y_pred = np.array(y_pred)
Y_test = np.array(Y_test)
mapping = {2:"Anti Govt",0:"Neutral",1:"Pro Govt"}
labels = [mapping[key] for key in mapping]
cm = confusion_matrix(Y_test, y_pred)
title = "Confusion Matrix of LSTM"
fig = plt.figure()
ax= plt.subplot()
fig.add_subplot(ax)
sns.heatmap(cm, annot=True, ax = ax);
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title(title);
# ax.xaxis.set_ticklabels(labels); ax.yaxis.set_ticklabels(labels);
sns.heatmap(cm, annot=True)
fig.savefig(title+".png",bbox_inches='tight')
plt.show()