In [11]:
# !pip install keras-rectified-adam -q
# !pip install transformers -q
# !pip install tensorflow==2.1.0 -q
# !pip install keras==2.2.5 -q
# !pip install torch torchvision

In [1]:
import codecs
import glob
import os
import re

import numpy as np
import pandas as pd
import spacy
import unidecode
import tensorflow as tf
import torch

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Activation, Input, Embedding, LSTM, Bidirectional, Dense, Dropout, SpatialDropout1D
from keras.utils.np_utils import to_categorical
from keras_preprocessing.sequence import pad_sequences
from keras_preprocessing.text import Tokenizer
from keras_radam import RAdam
from random import randint, sample
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import *

Using TensorFlow backend.
I0123 18:45:40.547617 139637616662336 file_utils.py:35] PyTorch version 1.3.1+cpu available.
I0123 18:45:40.548414 139637616662336 file_utils.py:48] TensorFlow version 2.1.0 available.


In [2]:
def spacy_cleaner(text):
    try:
        decoded = unidecode.unidecode(codecs.decode(text, 'unicode_escape'))
    except:
        decoded = unidecode.unidecode(text)
    apostrophe_handled = re.sub("’", "'", decoded)
    expanded = ' '.join(
        [contraction_mapping[t] if t in contraction_mapping else t for t in apostrophe_handled.split(" ")])
    parsed = nlp(expanded)
    final_tokens = []
    for t in parsed:
        if t.is_punct or t.is_space or t.like_num or t.like_url or str(t).startswith('@'):
            pass
        else:
            if t.lemma_ == '-PRON-':
                final_tokens.append(str(t))
            else:
                sc_removed = re.sub("[^a-zA-Z]", '', str(t.lemma_))
                if len(sc_removed) > 1:
                    final_tokens.append(sc_removed)
    joined = ' '.join(final_tokens)
    spell_corrected = re.sub(r'(.)\1+', r'\1\1', joined)
    return spell_corrected

In [3]:
path = '2017_English_final/Subtask_A/'
all_files = glob.glob(path + "/twitter*.txt")
frame = pd.DataFrame()
list_ = []
for file_ in all_files:
    df = pd.read_csv(file_, index_col=None, sep='\t', header=None, names=['id', 'sentiment', 'text', 'to_delete'])
    list_.append(df.iloc[:, :-1])
df = pd.concat(list_)

In [4]:
df = df.drop_duplicates()
df = df.drop(columns=['id'])
df = df.reset_index(drop=True)
df = df[df['sentiment'] != 'neutral']
df = df[df['sentiment'] == 'negative'].append(df[df['sentiment'] == 'positive'].sample(frac =.003), ignore_index=True)

df['token_length'] = [len(x.split(" ")) for x in df.text]
print(max(df.token_length))

print(df.sentiment.value_counts())

contraction_mapping = open('mapping.txt', 'r').read()
contraction_mapping = eval(contraction_mapping)

nlp = spacy.load('en')

df['clean_text'] = [spacy_cleaner(t) for t in df.text]

43
negative    7713
positive      59
Name: sentiment, dtype: int64


  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


In [23]:
df = pd.read_csv('processed_sentiment.csv')

In [24]:
df['word_count'] = df['clean_text'].apply(lambda x: len(str(x).split(" ")))
df[['sentiment','clean_text','word_count']].sort_values('word_count').tail()

Unnamed: 0,sentiment,clean_text,word_count
17013,positive,I get to go see my endo tomorrow let just say ...,43
12508,positive,in Forbes rank Nestle as the th large public c...,110
11188,positive,Batman may of be the well man in our last enco...,367
15892,positive,Serena remain on top the young Williams bts Ve...,635
11608,positive,do you enjoy last night Life of Chris Brown ne...,832


In [25]:
from nltk.corpus import stopwords
stop = stopwords.words('english')

df['stopwords'] = df['clean_text'].apply(lambda x: len([x for x in x.split() if x in stop]))
df[['sentiment','clean_text','stopwords']].sort_values('stopwords').tail()

Unnamed: 0,sentiment,clean_text,stopwords
15262,positive,reply to this tweet with any challenge you may...,22
12508,positive,in Forbes rank Nestle as the th large public c...,29
11188,positive,Batman may of be the well man in our last enco...,138
15892,positive,Serena remain on top the young Williams bts Ve...,210
11608,positive,do you enjoy last night Life of Chris Brown ne...,277


In [8]:
common_words = pd.Series(' '.join(df['clean_text']).split()).value_counts()[:10].index.to_list()
common_words
# df['clean_text'] = df['clean_text'].apply(lambda x: " ".join(x for x in x.split() if x not in common_words))

['be', 'the', 'to', 'I', 'and', 'in', 'on', 'of', 'for', 'not']

In [27]:
tf1 = (df['clean_text'][11608:11609]).apply(lambda x: pd.value_counts(x.split(" "))).sum(axis = 0).reset_index()
tf1.columns = ['words','tf']
tf1

Unnamed: 0,words,tf
0,McGregor,38
1,Conor,37
2,positive,28
3,nd,13
4,night,11
5,he,11
6,not,11
7,UFC,10
8,neutral,10
9,win,9


In [28]:
for i,word in enumerate(tf1['words']):
  tf1.loc[i, 'idf'] = np.log(df.shape[0]/(len(df[df['clean_text'].str.contains(word)])))
tf1

Unnamed: 0,words,tf,idf
0,McGregor,38,6.274864
1,Conor,37,6.248196
2,positive,28,6.389274
3,nd,13,1.065673
4,night,11,2.231338
5,he,11,1.037560
6,not,11,1.951985
7,UFC,10,6.196902
8,neutral,10,8.276344
9,win,9,3.073437


In [29]:
tf1['tfidf'] = tf1['tf'] * tf1['idf']
tf1

Unnamed: 0,words,tf,idf,tfidf
0,McGregor,38,6.274864,238.444825
1,Conor,37,6.248196,231.183236
2,positive,28,6.389274,178.899677
3,nd,13,1.065673,13.853750
4,night,11,2.231338,24.544723
5,he,11,1.037560,11.413156
6,not,11,1.951985,21.471833
7,UFC,10,6.196902,61.969023
8,neutral,10,8.276344,82.763438
9,win,9,3.073437,27.660930


In [31]:
# dense = vectors.todense()
# tf_df = pd.DataFrame(dense.tolist(), columns=feature_names)

In [32]:
# roberta_tokenizer = RobertaTokenizer.from_pretrained('distilroberta-base')
# roberta_sequence = TFRobertaForSequenceClassification.from_pretrained('distilroberta-base')
# roberta_model = TFRobertaModel.from_pretrained('distilroberta-base')

# x_data = [roberta_tokenizer.encode(val, max_length=30, pad_to_max_length=True) for val in list(df['clean_text'])]

In [33]:
# inp = Input(shape=X[0].shape, dtype='int32')
# alb, _ = roberta_model(inp)
# lstm = LSTM(lstm_out, dropout=0.6, recurrent_dropout=0.6)(alb)
# dense_out = Dense(2, activation='softmax')(lstm)
# model = tf.keras.Model(inp, dense_out)
# # model.layers[1].layers[1] = Dropout(0.5)
# # model.layers[1].layers[2].activation = Activation('softmax')
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# model.summary()

# model.load_weights('model.h5')
# evaluatory_measures(model)

In [33]:
embed_dim = 300
lstm_out = 200
batch_size = 64
epochs = 10

tokenizer = Tokenizer(oov_token=1)
tokenizer.fit_on_texts(list(df['clean_text'].values))
x_data = tokenizer.texts_to_sequences(df['clean_text'].values)
x_data = pad_sequences(x_data, padding='post', maxlen=30, truncating='post')

In [34]:
y_data = df['sentiment'].values
encoder = LabelEncoder()
label = encoder.fit(y_data)
labels_lookup = label.transform(y_data)
print(labels_lookup)
negative_label_lookup = label.inverse_transform(labels_lookup)
negative_label_lookup = dict((x, y) for x, y in zip(negative_label_lookup, labels_lookup))
print(negative_label_lookup)
y_data = [negative_label_lookup[val] for val in y_data]

[0 0 0 ... 1 1 1]
{'negative': 0, 'positive': 1}


In [35]:
X_train, X_valid, Y_train, Y_valid = train_test_split(x_data, y_data, stratify=y_data, test_size=0.2, random_state=36)

def partition(data, n):
    division = len(data) / float(n)
    return np.array([data[int(round(division * i)): int(round(division * (i + 1)))] for i in range(n)])

positive_val = negative_label_lookup['positive']
negative_val = negative_label_lookup['negative']

train_data = zip(X_train, Y_train)
positive_data = [val for val in train_data if val[1] == positive_val]
train_data = zip(X_train, Y_train)
negative_data = [val for val in train_data if val[1] == negative_val]

In [36]:
def create_positive_batch(p_data, n_data, batch_size, pos_ex_per_batch):
    neg_ex_per_batch = int(len(n_data)/(batch_size - pos_ex_per_batch))
    data = partition(n_data, neg_ex_per_batch)
    # To create "neg_ex_per_batch" length of unique negative batches
    pos_batch_length = len(p_data)//pos_ex_per_batch
    # Since we have a loop running (n) times we need the outside loop to cater the index i.e.
    # i.e. outside loop runs 6 times and inside runs 10 times total positive_batch will be 60 but the batch size of loop will be 6
    batch = int(np.ceil(len(data)/pos_batch_length))
    # Counter for positive indexes.
    n_batch_i = 0
    for index in range(batch):
        # Array with indexes of length of negative data
        example_indexes = list(np.arange(len(p_data)))
        # Create batches of negative data and append each batch in each positive examples.
        for i in range(pos_batch_length):
            if n_batch_i >= len(data):
                break

            index_samp = sample(example_indexes, pos_ex_per_batch)
            for p_i in index_samp:
                data[n_batch_i].insert(randint(0, len(data[n_batch_i])), p_data[p_i])
                # Remove used indexes from the list (avoid repetition)
                example_indexes.remove(p_i)
            n_batch_i += 1
    return data

In [37]:
def create_negative_batch(p_data, n_data, batch_size, neg_ex_per_batch):
    pos_ex_per_batch = int(len(p_data)/(batch_size - neg_ex_per_batch))
    data = partition(p_data, pos_ex_per_batch)
    # To create "neg_ex_per_batch" length of unique negative batches
    neg_batch_length = len(n_data)//neg_ex_per_batch
    # Since we have a loop running (n) times we need the outside loop to cater the index i.e.
    # i.e. outside loop runs 6 times and inside runs 10 times total positive_batch will be 60 but the batch size of loop will be 6
    batch = int(np.ceil(len(data)/neg_batch_length))
    # Counter for positive indexes.
    p_batch_i = 0
    for index in range(batch):
        # Array with indexes of length of negative data
        example_indexes = list(np.arange(len(n_data)))
        # Create batches of negative data and append each batch in each positive examples.
        for i in range(neg_batch_length):
            if p_batch_i >= len(data):
                break

            index_samp = sample(example_indexes, neg_ex_per_batch)
            for n_i in index_samp:
                data[p_batch_i].insert(randint(0, len(data[p_batch_i])), n_data[n_i])
                # Remove used indexes from the list (avoid repetition)
                example_indexes.remove(n_i)
            p_batch_i += 1
    return data

In [38]:
neg_examples_per_batch = 10
pos_examples_per_batch = 10

data = create_positive_batch(positive_data, negative_data, batch_size, pos_examples_per_batch)

In [39]:
print('Positive values = {}'.format(len([val for val in data[-1] if val[1] == positive_val])))
print('Negative values = {}'.format(len([val for val in data[-1] if val[1] == negative_val])))
print('Total values = {}'.format(len(data[-1])))

Positive values = 10
Negative values = 54
Total values = 64


In [40]:
X, y = [], []

for element in data:
    for x_val, y_val in element:
        X.append(x_val)
        y.append(y_val)
X = np.array(X)
y = np.array(y)

Y = to_categorical(y)

In [48]:
from gensim.models import Word2Vec
from nltk import word_tokenize

sentences, sentiments = df['clean_text'].values, df['sentiment'].values
tokenized_words = [word_tokenize(sent) for sent in sentences]
model = Word2Vec(tokenized_words, size=300, window=8, min_count=1, negative=10)

vocab_size = len(model.wv.index2word) + 1

I0123 19:05:28.032369 139637616662336 word2vec.py:1588] collecting all words and their counts
I0123 19:05:28.033116 139637616662336 word2vec.py:1573] PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
I0123 19:05:28.061026 139637616662336 word2vec.py:1596] collected 15212 word types from a corpus of 152665 raw words and 7772 sentences
I0123 19:05:28.062661 139637616662336 word2vec.py:1647] Loading a fresh vocabulary
I0123 19:05:28.089599 139637616662336 word2vec.py:1671] effective_min_count=1 retains 15212 unique words (100% of original 15212, drops 0)
I0123 19:05:28.090431 139637616662336 word2vec.py:1677] effective_min_count=1 leaves 152665 word corpus (100% of original 152665, drops 0)
I0123 19:05:28.165688 139637616662336 word2vec.py:1736] deleting the raw counts dictionary of 15212 items
I0123 19:05:28.166849 139637616662336 word2vec.py:1739] sample=0.001 downsamples 50 most-common words
I0123 19:05:28.167459 139637616662336 word2vec.py:1742] downsampling leaves est

In [49]:
# word_sentiment = dict()
# for words, sentiment in zip(tokenized_words, sentiments):
#     for word in words:
# #         if word not in word_sentiment.keys():
#             word_sentiment.update({word: sentiment})

In [50]:
# with open('outfile.tsv', 'w', encoding='utf8') as file_vector:
#     with open('outfile.meta', 'w', encoding='utf8') as file_metadata:
#         for word, sentiment in word_sentiment.items():
#             if word in model.wv.index2word:
#                 file_metadata.write(sentiment + '-' + word + '\n')
#                 vector_row = '\t'.join(str(x) for x in model.wv[word])
#                 file_vector.write(vector_row + '\n')

In [51]:
# create a weight matrix for the Embedding layer from a loaded embedding
def get_weight_matrix(embedding, vocab_size):
    # define weight matrix dimensions with all 0
    weight_matrix = np.zeros((vocab_size, 300))
    # step vocab, store vectors using the Tokenizer's integer mapping
    for i, word in enumerate(embedding.wv.index2word):
        weight_matrix[i] = embedding.wv[word]
    return weight_matrix

embedding_vectors = get_weight_matrix(model, vocab_size)

In [52]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix


def evaluatory_measures(model):
    # model.evaluate(X_valid, Y_valid)
    y_probs = model.predict(X_valid, verbose=0)
    y_true = Y_valid
#     y_pred = [int(round(x[0])) for x in y_probs]
    
#     y_true = [np.argmax(val, axis=None) for val in Y_valid]
    y_pred = [np.argmax(val, axis=None) for val in y_probs]
    # accuracy: (tp + tn) / (p + n)
    accuracy = accuracy_score(y_true, y_pred)
    print('Accuracy: %f' % accuracy)
    # precision tp / (tp + fp)
    precision = precision_score(y_true, y_pred)
    print('Precision: %f' % precision)
    # recall: tp / (tp + fn)
    recall = recall_score(y_true, y_pred)
    print('Recall: %f' % recall)
    # f1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_true, y_pred)
    print('F1 score: %f' % f1)
    # kappa
    kappa = cohen_kappa_score(y_true, y_pred)
    print('Cohens kappa: %f' % kappa)
    # ROC AUC
    auc = roc_auc_score(y_true, y_pred)
    print('ROC AUC: %f' % auc)
    # confusion matrix
    matrix = confusion_matrix(y_true, y_pred)
    print(matrix)

    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    print('TN:{}, FP:{}, FN:{}, TP:{}'.format(tn, fp, fn, tp))

In [59]:
model = Sequential()
model.add(Input(shape=X[0].shape))
model.add(Embedding(vocab_size, embed_dim, weights=[embedding_vectors], trainable=False))
model.add(LSTM(lstm_out, dropout=0.6, recurrent_dropout=0.6))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
class_weight = {0: 0.1,
                1: 0.6}
model.fit(X, Y, batch_size=batch_size, 
          epochs=3, class_weight=class_weight)
evaluatory_measures(model)

W0123 19:12:44.146194 139637616662336 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']


Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 30, 300)           4563900   
_________________________________________________________________
lstm_4 (LSTM)                (None, 200)               400800    
_________________________________________________________________
dense_4 (Dense)              (None, 2)                 402       
Total params: 4,965,102
Trainable params: 401,202
Non-trainable params: 4,563,900
_________________________________________________________________
Train on 7310 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy: 0.010289
Precision: 0.007737
Recall: 1.000000
F1 score: 0.015355
Cohens kappa: 0.000040
ROC AUC: 0.501296
[[   4 1539]
 [   0   12]]
TN:4, FP:1539, FN:0, TP:12


In [60]:
class_weight = {0: 30,
                1: 0.05}
model.fit(X, Y, batch_size=batch_size, epochs=5, class_weight=class_weight)
evaluatory_measures(model)

In [43]:
class_weight = {0: 20,
                1: 0.01}
model.fit(X, Y, batch_size=batch_size, epochs=25, class_weight=class_weight)
evaluatory_measures(model)

W0121 15:00:25.766878 140453695219520 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']


Train on 18547 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 0.790076
Precision: 0.996146
Recall: 0.792337
F1 score: 0.882629
Cohens kappa: -0.000278
ROC AUC: 0.496169
[[   3   12]
 [ 813 3102]]
TN:3, FP:12, FN:813, TP:3102


In [37]:
class_weight = {0: 20,
                1: 0.1}
model.fit(X, Y, batch_size=batch_size, epochs=15, class_weight=class_weight)
evaluatory_measures(model)

W0121 11:17:04.128931 140453695219520 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']


Train on 18547 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Accuracy: 0.836896
Precision: 0.996663
Recall: 0.839080
F1 score: 0.911108
Cohens kappa: 0.004906
ROC AUC: 0.552874
[[   4   11]
 [ 630 3285]]
TN:4, FP:11, FN:630, TP:3285


In [18]:
model = Sequential()
model.add(Embedding(vocab_size, embed_dim, weights=[embedding_vectors], trainable=False))
model.add(LSTM(lstm_out, dropout=0.6, recurrent_dropout=0.6))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
class_weight = {0: 6.0,
                1: 1.0}
model.fit(X, Y, batch_size=batch_size, epochs=5, class_weight=class_weight)
evaluatory_measures(model)

W0120 16:39:07.494138 140453695219520 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 300)         8041800   
_________________________________________________________________
bidirectional (Bidirectional (None, 400)               801600    
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 802       
Total params: 8,844,202
Trainable params: 802,402
Non-trainable params: 8,041,800
_________________________________________________________________
Train on 18552 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 0.930025
Precision: 0.995913
Recall: 0.933589
F1 score: 0.963744
Cohens kappa: -0.007270
ROC AUC: 0.466794
[[   0   15]
 [ 260 3655]]
TN:0, FP:15, FN:260, TP:3655


In [32]:
model = Sequential()
model.add(Embedding(vocab_size, embed_dim, weights=[embedding_vectors], trainable=False))
model.add(LSTM(lstm_out, dropout=0.6, recurrent_dropout=0.6))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
class_weight = {0: 30.0,
                1: 1.0}
model.fit(X, Y, batch_size=batch_size, epochs=5, class_weight=class_weight)
evaluatory_measures(model)

W0120 15:00:42.146871 139930723264320 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']


Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_9 (Embedding)      (None, None, 300)         8041800   
_________________________________________________________________
lstm_9 (LSTM)                (None, 200)               400800    
_________________________________________________________________
dense_9 (Dense)              (None, 2)                 402       
Total params: 8,443,002
Trainable params: 401,202
Non-trainable params: 8,041,800
_________________________________________________________________
Train on 18547 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 0.136641
Precision: 1.000000
Recall: 0.133333
F1 score: 0.235294
Cohens kappa: 0.001173
ROC AUC: 0.566667
[[  15    0]
 [3393  522]]
TN:15, FP:0, FN:3393, TP:522


In [34]:
model.fit(X, Y, batch_size=batch_size, epochs=10, class_weight=class_weight)
evaluatory_measures(model)

W0120 15:19:24.688891 139930723264320 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']


Train on 18547 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.861578
Precision: 0.997051
Recall: 0.863602
F1 score: 0.925541
Cohens kappa: 0.010703
ROC AUC: 0.598467
[[   5   10]
 [ 534 3381]]
TN:5, FP:10, FN:534, TP:3381


In [26]:
model = Sequential()
model.add(Embedding(vocab_size, embed_dim, weights=[embedding_vectors], trainable=False))
model.add(LSTM(lstm_out, dropout=0.6, recurrent_dropout=0.6))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
class_weight = {0: 6.0,
                1: 1.0}
model.fit(X, Y, batch_size=batch_size, epochs=5, class_weight=class_weight)
evaluatory_measures(model)

W0120 13:14:28.164659 139930723264320 data_adapter.py:1091] sample_weight modes were coerced from
  ...
    to  
  ['...']


Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (None, None, 300)         8041800   
_________________________________________________________________
lstm_6 (LSTM)                (None, 200)               400800    
_________________________________________________________________
dense_6 (Dense)              (None, 2)                 402       
Total params: 8,443,002
Trainable params: 401,202
Non-trainable params: 8,041,800
_________________________________________________________________
Train on 18547 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 0.620102
Precision: 0.997535
Recall: 0.620179
F1 score: 0.764845
Cohens kappa: 0.004388
ROC AUC: 0.610089
[[   9    6]
 [1487 2428]]
TN:9, FP:6, FN:1487, TP:2428
