In [1]:
from keras import backend as K
from keras.engine.topology import Layer
from keras import initializers, regularizers, constraints

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

from keras.models import Model
from keras.layers import Dense, Embedding, Input
from keras.layers import LSTM, Bidirectional, Dropout,Flatten,GRU

import keras.layers as layers
from keras.models import Model
from keras import backend as K 

import itertools 

import matplotlib.pyplot as plt

from keras.preprocessing import text, sequence

import numpy as np
import pandas as pd
from keras.callbacks import EarlyStopping, ModelCheckpoint
np.random.seed(7)

from keras.layers import TimeDistributed

import os, re, csv, math, codecs


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight((input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        return None

    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return input_shape[0],  self.features_dim



def BidGRU(maxlen, max_features, embed_size, embedding_matrix):
    inp = Input(shape=(maxlen, ))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix],
                  trainable=False)(inp)
    x = Bidirectional(GRU(300, return_sequences=True, dropout=0.25,
                           recurrent_dropout=0.25))(x)
    x = Attention(maxlen)(x)
#    x = Flatten(x)
    x = Dense(256, activation="relu")(x)
    x = Dropout(0.25)(x)
    x = Dense(2, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)

    return model

def BidGRUNoAtt(maxlen, max_features, embed_size, embedding_matrix):
    inp = Input(shape=(maxlen, ))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix],
                  trainable=False)(inp)
    x = Bidirectional(GRU(300,  dropout=0.25,
                           recurrent_dropout=0.25))(x)
    #x = Attention(maxlen)(x)
    #x = Flatten(x)
    x = Dense(256, activation="relu")(x)
    x = Dropout(0.25)(x)
    x = Dense(2, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)

    return model
  
def BidLSTMNoAtt(maxlen, max_features, embed_size, embedding_matrix):
    inp = Input(shape=(maxlen, ))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix],
                  trainable=False)(inp)
    x = Bidirectional(LSTM(300,  dropout=0.25, return_sequences=True,
                           recurrent_dropout=0.25))(x)
    #x = Attention(maxlen)(x)
    #x = Flatten(x)
    x = TimeDistributed(Dense(256, activation="relu"))(x)
    x = TimeDistributed(Dropout(0.25))(x)
    x = Flatten()(x)
    x = Dense(2, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)

    return model

def BidLstm(maxlen, max_features, embed_size, embedding_matrix):
    inp = Input(shape=(maxlen, ))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix],
                  trainable=False)(inp)
    x = Bidirectional(LSTM(300, return_sequences=True, dropout=0.25,
                           recurrent_dropout=0.25))(x)
    x = Attention(maxlen)(x)
#    x = Flatten(x)
    x = Dense(256, activation="relu")(x)
    x = Dropout(0.25)(x)
    x = Dense(2, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)

    return model


def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
    plt.tight_layout()

def print_cm(y_test,y_pred):
    true_test_labels = ['negative','positive']
    cnf_matrix = confusion_matrix(y_test, y_pred)
    np.set_printoptions(precision=2)

    plt.figure()
    plot_confusion_matrix(cnf_matrix, classes=true_test_labels,
                      title='Confusion matrix, without normalization')

    # Plot normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix, classes=true_test_labels, normalize=True,
                      title='Normalized confusion matrix')

    plt.show()

def LstmCnn(maxlen, max_features, embed_size, embedding_matrix):
    inp = Input(shape=(maxlen, ))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix],
                  trainable=False)(inp)
    x = Bidirectional(GRU(300, return_sequences=True, dropout=0.25,
                           recurrent_dropout=0.25))(x)
    x = Attention(maxlen)(x)
#    x = Flatten(x)
    x = Dense(256, activation="relu")(x)
    x = Dropout(0.25)(x)
    x = Dense(1, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)
    
    inp1 = Input(shape=(512,))
    x1 = Embedding(512, 512, weights=[embedding_matrix],
                  trainable=False)(inp1)
    x1=Conv1D(16, kernel_size=3, activation='elu', padding='same',
                             input_shape=(vector_size, 1))(x1)
    x1=Dense(512, activation='relu',input_shape=(vector_size, 1))(x1)
    x1=Dense(64, activation='relu')(x1)
    x1=Dense(8, activation='relu')(x1)
    x1=Flatten()(x1)
    x1=Dense(2, activation='softmax')(x1)
    
    model_cnn = Model(inputs=inp1, outputs=x1)

    model_cnn.add(Conv1D(16, kernel_size=3, activation='elu', padding='same',
                             input_shape=(vector_size, 1)))
    model_cnn.add(Dense(512, activation='relu',input_shape=(vector_size, 1)))
    #model.add(Dropout(0.2))
    model_cnn.add(Dense(64, activation='relu'))
    #model.add(Dropout(0.25))
    model_cnn.add(Dense(8, activation='relu'))
    #model.add(Dropout(0.25))
    model_cnn.add(Flatten())
    model_cnn.add(Dense(2, activation='softmax'))
    
    combined_model = Sequential()
    combined_model.add(Merge([model, model_cnn], mode='concat', concat_axis=1))

    return combined_model

def make_df(train_path, test_path, max_features, maxlen, list_classes, word_index):
    train = pd.read_csv(train_path)
    test = pd.read_csv(test_path)
    train = train.sample(frac=1)

    list_sentences_train = train["message"].fillna("unknown").values
    y = train[list_classes].values
    
    y_test = test[list_classes].values
    
    y=np.where(y == 'Bullish', 1.0, 0.0)
    y_test=np.where(y_test == 'Bullish', 1.0, 0.0)
    list_sentences_test = test["spans"].fillna("unknown").values

    tokenizer = text.Tokenizer(num_words=max_features)
    tokenizer.word_index = word_index
    #tokenizer.fit_on_texts(list(list_sentences_train))
    list_tokenized_train = tokenizer.texts_to_sequences(list_sentences_train)
    #print(list_tokenized_train[0])
    list_tokenized_test = tokenizer.texts_to_sequences(list_sentences_test)
    X_t = sequence.pad_sequences(list_tokenized_train, maxlen=maxlen)
    X_te = sequence.pad_sequences(list_tokenized_test, maxlen=maxlen)

    #word_index = tokenizer.word_index
    
    return X_t, X_te, y, y_test

def create_sequence(word_index, sent, maxlen):
    token = text.Tokenizer()
    token.word_index=word_index
    tokenized_text = token.texts_to_sequences(sent)
    X_text = sequence.pad_sequences(tokenized_text, maxlen=maxlen)
    return X_text

def make_glovevec(glovepath, max_features, embed_size):
    embedding_matrix = np.zeros((max_features+1, embed_size))
    f = open(glovepath, encoding="utf8")
    word_index = {}
    count=0
    for line in f:
        count+=1
        if count > max_features:
            break
        else:
            values = line.split()
            word_index[values[0]]=count
            #print(values)
            #word = ' '.join(values[:-embed_size])
            coefs = np.asarray(values[-embed_size:], dtype='float32')
            embedding_matrix[count]=coefs.reshape(-1)
        #print(embeddings_index[word])
    f.close()
    
    return embedding_matrix, word_index




In [3]:
!wget https://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M.vec.zip

--2020-04-07 20:46:59--  https://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M.vec.zip
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 104.22.75.142, 104.22.74.142, 2606:4700:10::6816:4a8e, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|104.22.75.142|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 681808098 (650M) [application/zip]
Saving to: ‘wiki-news-300d-1M.vec.zip’


2020-04-07 20:48:03 (10.4 MB/s) - ‘wiki-news-300d-1M.vec.zip’ saved [681808098/681808098]



In [4]:
! unzip wiki-news-300d-1M.vec.zip

Archive:  wiki-news-300d-1M.vec.zip
  inflating: wiki-news-300d-1M.vec   


In [5]:
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.tokenize import RegexpTokenizer 
tokenizer = RegexpTokenizer(r'\w+')
stop_words = set(stopwords.words('english'))
stop_words.update(['.', ',', '"', "'", ':', ';', '(', ')', '[', ']', '{', '}'])

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/users/kostadin.mishev/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [6]:
#load embeddings

from tqdm import tqdm
print('loading word embeddings...')
embeddings_index = {}
f = codecs.open('/home/users/kostadin.mishev/datasets/fasttext/wiki-news-300d-1M.vec', encoding='utf-8')
for line in tqdm(f):
    values = line.rstrip().rsplit(' ')
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('found %s word vectors' % len(embeddings_index))

677it [00:00, 6768.47it/s]

loading word embeddings...


999995it [02:28, 6736.08it/s]

found 999995 word vectors





In [8]:
#load 
test_df = pd.read_csv(r"/home/users/kostadin.mishev/phd/dataset/dev/dev.tsv", names=['id','sent','a','text'],sep='\t')
train_df = pd.read_csv(r"/home/users/kostadin.mishev/phd/dataset/train/train.tsv", names=['id','sent','a','text'],sep='\t')
test_df = test_df.fillna('_NA_')

print("num train: ", train_df.shape[0])
print("num test: ", test_df.shape[0])

label_names = ["sent"]
y_train = train_df[label_names].values
y_test = test_df[label_names].values

num train:  1748
num test:  438


In [9]:
from keras.preprocessing.text import Tokenizer
max_seq_len=64
MAX_NB_WORDS = 100000
raw_docs_train = train_df['text'].tolist()
raw_docs_test = test_df['text'].tolist() 

print("pre-processing train data...")
processed_docs_train = []
for doc in tqdm(raw_docs_train):
    tokens = tokenizer.tokenize(doc)
    filtered = [word for word in tokens if word not in stop_words]
    processed_docs_train.append(" ".join(filtered))
#end for

processed_docs_test = []
for doc in tqdm(raw_docs_test):
    tokens = tokenizer.tokenize(doc)
    filtered = [word for word in tokens if word not in stop_words]
    processed_docs_test.append(" ".join(filtered))
#end for

print("tokenizing input data...")
tokenizer = Tokenizer(num_words=MAX_NB_WORDS, lower=True, char_level=False)
tokenizer.fit_on_texts(processed_docs_train + processed_docs_test)  #leaky
word_seq_train = tokenizer.texts_to_sequences(processed_docs_train)
word_seq_test = tokenizer.texts_to_sequences(processed_docs_test)
word_index = tokenizer.word_index
print("dictionary size: ", len(word_index))


100%|██████████| 1748/1748 [00:00<00:00, 83224.29it/s]
100%|██████████| 438/438 [00:00<00:00, 88043.00it/s]

pre-processing train data...
tokenizing input data...
dictionary size:  5738





In [10]:
#pad sequences
word_seq_train = sequence.pad_sequences(word_seq_train, maxlen=max_seq_len)
word_seq_test = sequence.pad_sequences(word_seq_test, maxlen=max_seq_len)

In [11]:
#model parameters
num_filters = 64 
embed_dim = 300 
weight_decay = 1e-4

#embedding matrix
print('preparing embedding matrix...')
words_not_found = []
nb_words = min(MAX_NB_WORDS, len(word_index)+1)
embedding_matrix = np.zeros((nb_words, embed_dim))

for word, i in word_index.items():
    if i >= nb_words:
        continue
    embedding_vector = embeddings_index.get(word)
    if (embedding_vector is not None) and len(embedding_vector) > 0:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector
    else:
        words_not_found.append(word)
print('number of null word embeddings: %d' % np.sum(np.sum(embedding_matrix, axis=1) == 0))


print("sample words not found: ", np.random.choice(words_not_found, 10))

preparing embedding matrix...
number of null word embeddings: 931
sample words not found:  ['bilfinger' 'sal1v' 'breakingviews' 'sneed' 'cumerio' 'affectogenimap'
 'maritim' 'mothahir' 'hearst' 'cnc1v']


In [45]:
xtrain = []
for sentence in word_seq_train:
    sen = [embedding_matrix[w] for w in sentence]
    xtrain.append(sen)
xtrain = np.asarray(xtrain)

In [46]:
xtrain_mean = np.average(xtrain,axis=1)

In [33]:
xtrain = xtrain.reshape(xtrain.shape[0],xtrain.shape[1]*xtrain.shape[2])

In [34]:
xtrain.shape

(1748, 19200)

In [49]:
xtest = []
for sentence in word_seq_test:
    sen = [embedding_matrix[w] for w in sentence]
    xtest.append(sen)
xtest = np.asarray(xtest)
xtest_mean = np.average(xtest,axis=1)
xtest = xtest.reshape(xtest.shape[0],xtest.shape[1]*xtest.shape[2])

In [50]:
xtest_mean.shape

(438, 300)

In [24]:
y = train_df['sent'].values
y_test = test_df['sent'].values

In [30]:
y = y.reshape(1748,1)
y_test = y_test.reshape(y_test.shape[0],1)

In [56]:
from sklearn.svm import LinearSVC
from sklearn.svm import SVC # "Support vector classifier"
from sklearn.model_selection import GridSearchCV

from sklearn.metrics import confusion_matrix
from sklearn.metrics import matthews_corrcoef

for c in [0.0025,0.025,0.1,0.25,1,10,50,100,150,200,1000,2000,5000,10000,20000,30000,50000,100000]:
    print(c)
    model = SVC(kernel='linear', C=c, gamma=0.001)
    model.fit(xtrain_mean, y)
    Y_pred = model.predict(xtest_mean)
    tn, fp, fn, tp = confusion_matrix(y_test,Y_pred).ravel() 
    mcc = matthews_corrcoef(y_test, Y_pred)

    print(tp)
    print(tn)
    print(fp)
    print(fn)
    print(mcc)
    print()

0.0025


  y = column_or_1d(y, warn=True)


42
203
16
177
0.1751328248846775

0.025


  y = column_or_1d(y, warn=True)


42
203
16
177
0.1751328248846775

0.1


  y = column_or_1d(y, warn=True)


42
203
16
177
0.1751328248846775

0.25


  y = column_or_1d(y, warn=True)


42
203
16
177
0.1751328248846775

1


  y = column_or_1d(y, warn=True)


58
201
18
161
0.24115646544478436

10


  y = column_or_1d(y, warn=True)


134
184
35
85
0.46431819480091957

50


  y = column_or_1d(y, warn=True)


166
177
42
53
0.5669256408685777

100


  y = column_or_1d(y, warn=True)


172
172
47
47
0.5707762557077626

150


  y = column_or_1d(y, warn=True)


171
173
46
48
0.570800058879536

200


  y = column_or_1d(y, warn=True)


168
173
46
51
0.5572228726219256

1000


  y = column_or_1d(y, warn=True)


173
174
45
46
0.5844809791708456

2000


  y = column_or_1d(y, warn=True)


173
175
44
46
0.5890656607636812

5000


  y = column_or_1d(y, warn=True)


173
174
45
46
0.5844809791708456

10000


  y = column_or_1d(y, warn=True)


172
175
44
47
0.5845297326412703

20000


  y = column_or_1d(y, warn=True)


173
175
44
46
0.5890656607636812

30000


  y = column_or_1d(y, warn=True)


175
173
46
44
0.5890656607636812

50000


  y = column_or_1d(y, warn=True)


174
174
45
45
0.589041095890411

100000


  y = column_or_1d(y, warn=True)


174
174
45
45
0.589041095890411



In [54]:
import xgboost as xgb

#Train the XGboost Model for Classification
model1 = xgb.XGBClassifier()
model2 = xgb.XGBClassifier(n_estimators=10000, max_depth=256, learning_rate=0.01)

xgb_model = model1.fit(xtrain_mean, y)

from sklearn.metrics import confusion_matrix
from sklearn.metrics import matthews_corrcoef

Y_pred = xgb_model.predict(xtest_mean)



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [55]:
tn, fp, fn, tp = confusion_matrix(y_test,Y_pred).ravel() 
mcc = matthews_corrcoef(y_test, Y_pred)

print(tp)
print(tn)
print(fp)
print(fn)
print(mcc)

162
167
52
57
0.5024140654787853


In [28]:
import keras
model_rnn = BidLstm(max_seq_len, nb_words, embed_dim, embedding_matrix)

model_rnn.compile(loss='binary_crossentropy', optimizer='adam',
              metrics=['accuracy'])
file_path = "./fasttext/FASTTEXT_BidLSTMNoAtt.hdf5"
ckpt = ModelCheckpoint(file_path, monitor='val_loss', verbose=1,
                       save_best_only=True, mode='min')
early = EarlyStopping(monitor="val_loss", mode="min", patience=15)

y_train_rnn = keras.utils.to_categorical(y_train, 2)
history = model_rnn.fit(word_seq_train, y_train_rnn, batch_size=15, epochs=50, validation_split=0.05, callbacks=[ckpt, early])


Train on 1660 samples, validate on 88 samples
Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.63448, saving model to ./fasttext/FASTTEXT_BidLSTMNoAtt.hdf5
Epoch 2/50

Epoch 00002: val_loss improved from 0.63448 to 0.61865, saving model to ./fasttext/FASTTEXT_BidLSTMNoAtt.hdf5
Epoch 3/50

Epoch 00003: val_loss improved from 0.61865 to 0.50786, saving model to ./fasttext/FASTTEXT_BidLSTMNoAtt.hdf5
Epoch 4/50

Epoch 00004: val_loss improved from 0.50786 to 0.49227, saving model to ./fasttext/FASTTEXT_BidLSTMNoAtt.hdf5
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.49227
Epoch 6/50

Epoch 00006: val_loss improved from 0.49227 to 0.45884, saving model to ./fasttext/FASTTEXT_BidLSTMNoAtt.hdf5
Epoch 7/50

Epoch 00007: val_loss improved from 0.45884 to 0.43184, saving model to ./fasttext/FASTTEXT_BidLSTMNoAtt.hdf5
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.43184
Epoch 9/50

Epoch 00009: val_loss did not improve from 0.43184
Epoch 10/50

Epoch 00010: val_loss di

In [25]:
model_rnn.load_weights(file_path)

In [16]:
word_seq_train[0]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,   50,   32,   44, 1361, 1362,    7,  575,  253], dtype=int32)

In [26]:
y_pred = model_rnn.predict(word_seq_test)
y_pred_labels=np.argmax(y_pred,axis=1)

In [0]:
import pickle
with open("./results_BiLSTM+FastText+noAtt.pickle","wb") as f:
    pickle.dump(y_pred_labels,f)

In [27]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import matthews_corrcoef

print(accuracy_score(y_test, y_pred_labels))
print(precision_score(y_test, y_pred_labels))
print(recall_score(y_test, y_pred_labels))
print(f1_score(y_test, y_pred_labels))
print(matthews_corrcoef(y_test, y_pred_labels))

#FP = confusion_matrix(y_test, y_pred_labels).sum(axis=0) - np.diag(confusion_matrix)  
#FN = confusion_matrix(y_test, y_pred_labels).sum(axis=1) - np.diag(confusion_matrix)
#TP = np.diag(confusion_matrix(y_test, y_pred_labels))
#TN = confusion_matrix(y_test, y_pred_labels).values.sum() - (FP + FN + TP)

print(confusion_matrix(y_test, y_pred_labels))
#print(FN)
#print(TP)
#print(TN)

0.8333333333333334
0.8509615384615384
0.8082191780821918
0.82903981264637
0.6675092223130027
[[188  31]
 [ 42 177]]
