In [None]:
from numpy import array
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from sklearn.utils.class_weight import compute_class_weight
from keras.models import Sequential
from keras.models import Model
from keras.layers import Input
import torch
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold
from numpy import array
from numpy import asarray
from numpy import zeros
from keras.utils.np_utils import to_categorical
from keras.utils.vis_utils import plot_model
import matplotlib.pyplot as plt
import keras.backend as K
from itertools import product

In [None]:
def getModel():
  input2 = Input(shape=(4,))
  dense_layer_1 = Dense(50, activation='tanh')(input2)
  dense_layer_2 = Dense(20, activation='tanh')(dense_layer_1)
  output = Dense(4, activation='softmax')(dense_layer_2)

  return Model(inputs=input2, outputs=output)

In [None]:
def weighted_categorical_crossentropy(weights):
    """
    A weighted version of keras.objectives.categorical_crossentropy

    Variables:
        weights: numpy array of shape (C,) where C is the number of classes

    Usage:
        weights = np.array([0.5,2,10]) # Class one at 0.5, class 2 twice the normal weights, class 3 10x.
        loss = weighted_categorical_crossentropy(weights)
        model.compile(loss=loss,optimizer='adam')
    """

    weights = K.variable(weights)

    def loss(y_true, y_pred):
        # scale predictions so that the class probas of each sample sum to 1
        y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
        # clip to prevent NaN's and Inf's
        y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
        # calc
        loss = y_true * K.log(y_pred) * weights
        loss = -K.sum(loss, -1)
        return loss

    return loss

In [None]:
train_path = 'Training_set.csv'

# Read the dataset
col_names = ['triggerTitle','triggerChannelTitle','actionChannelTitle','actionTitle','title', 'desc', 'target']
train_final = pd.read_csv(train_path,skiprows=1,sep=',',names=col_names,encoding = "ISO-8859-1")

del train_final['title']
del train_final['desc']

In [None]:
# label_encoder object knows how to understand word labels.
label_encoder = preprocessing.LabelEncoder()

# Encode labels in column 'species'.
train_final['triggerTitle'] = label_encoder.fit_transform(train_final['triggerTitle'])
train_final['triggerChannelTitle'] = label_encoder.fit_transform(train_final['triggerChannelTitle'])
train_final['actionChannelTitle'] = label_encoder.fit_transform(train_final['actionChannelTitle'])
train_final['actionTitle'] = label_encoder.fit_transform(train_final['actionTitle'])

In [None]:
X_train = train_final.drop('target', axis=1)

y_train = train_final['target']

In [None]:
test_path = 'Test_set.csv'

# Read the dataset
col_names = ['triggerTitle','triggerChannelTitle','actionChannelTitle','actionTitle','title', 'desc', 'target']
test_final = pd.read_csv(test_path,skiprows=1,sep=';',names=col_names,encoding = "ISO-8859-1")

In [None]:
del test_final['title']
del test_final['desc']

# label_encoder object knows how to understand word labels.
label_encoder = preprocessing.LabelEncoder()

# Encode labels in column 'species'.
test_final['triggerTitle'] = label_encoder.fit_transform(test_final['triggerTitle'])
test_final['triggerChannelTitle'] = label_encoder.fit_transform(test_final['triggerChannelTitle'])
test_final['actionChannelTitle'] = label_encoder.fit_transform(test_final['actionChannelTitle'])
test_final['actionTitle'] = label_encoder.fit_transform(test_final['actionTitle'])

In [None]:
X_test = test_final.drop('target', axis=1)

y_test = test_final['target']

In [None]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
# STRATIFIES K-FOLD CROSS VALIDATION { 4-fold }

splits = 4

# Create StratifiedKFold object.
skf = StratifiedKFold(n_splits=splits, shuffle=True, random_state=1)

class_weights_list = []

for train_index, test_index in skf.split(X_train, y_train):
    count = count + 1
    x_train_fold, x_test_fold = X_train.iloc[train_index], X_train.iloc[test_index]
    y_train_fold, y_test_fold = y_train.iloc[train_index], y_train.iloc[test_index]

    #compute the class weights
    class_weights = compute_class_weight('balanced', np.unique(y_train_fold),y = np.ravel(y_train_fold))

    class_weights_list.append(class_weights)

    ncce = weighted_categorical_crossentropy(weights=np.array(class_weights))

    model = getModel()

    model.compile(loss=ncce, optimizer='adam', metrics=['acc'])

    model.fit(x_train_fold, y_train_fold, batch_size=16, epochs=30, verbose=1)

    score = model.evaluate(x_test_fold, y_test_fold, verbose=0)

    print("Accuracy Validation: %.2f%%" % (score[1]*100))

In [None]:
ncce = weighted_categorical_crossentropy(weights=np.array(class_weights_list[best_class_weight]))

In [None]:
model = getModel()

In [None]:
model.compile(loss=ncce, optimizer='adam', metrics=['acc'])

In [None]:
history = model.fit(X_train, y_train, batch_size=16, epochs=30, verbose=1)

In [None]:
score = model.evaluate(X_test, y_test, verbose=0)

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

y_pred = model.predict(X_test)

y_pred_clean = np.zeros_like(y_pred)
for idx, i in enumerate(np.argmax(y_pred,axis=1)):
    y_pred_clean[idx][i] = 1

print(classification_report(y_test, y_pred_clean))