# This notebook experiments with finetuning deep neural networks

The following will be experimented:

- Number of neurons and Dense Layers
- Regularisation 
- Differnt Activation Functions
- Learning Rates and optimisers
- Batch Size

In [1]:
# Import all necessary packages

import ast
import nltk
import string
import numpy as np
import pandas as pd

from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize

from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, hamming_loss
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder

from keras.layers import Dropout
from keras.optimizers import Adam
from keras.layers import Embedding
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.regularizers import l1, l2
from keras.optimizers import SGD, RMSprop
from keras_preprocessing.sequence import pad_sequences

from matplotlib import pyplot as plt

from gensim.models import Word2Vec

from prettytable import PrettyTable

2023-07-27 14:53:31.232485: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Data Preperation

In [3]:
# Load the data
df = pd.read_csv('Data_Preprocessed_SubGroup_Selected7.csv')
df['Classification Codes'] = df['Classification Codes'].apply(ast.literal_eval)
df.head()

Unnamed: 0,Context,Classification Codes
0,processor compil system receiv predict model r...,[G06N20/00]
1,system method secur cloudbas physiolog data pr...,"[A61B5/00, G16H10/60]"
2,collim detector base medic imag system medic i...,[A61B6/00]
3,insul pedicl access system relat method pedicl...,"[A61B5/00, A61B17/00]"
4,ingest event marker data framework ingest even...,"[A61B5/00, G16H10/60]"


In [4]:
# As most multi-label classifiers requres each class to have multiple instances.
# All codes which appear less than 5 times will be removed

for i in range(10):

    # Determine the codes to be removed
    unique_codes = df['Classification Codes'].explode().value_counts()
    codes_to_remove = unique_codes[unique_codes < 5 ].index.tolist()

    # Remove rows with empty 'Classification Codes' column
    df = df.dropna(subset=['Classification Codes'])

    # Remove codes in the list codes_to_remove
    df['CodeCount'] = df['Classification Codes'].apply(lambda codes: sum(code in codes_to_remove for code in codes))
    df = df[df['CodeCount'] == 0].drop(columns=['CodeCount'])

In [5]:
# Encode "Classification Codes" column as multi-labels
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(df['Classification Codes'])

# Split the data into training(80%) and testing sets(20%)
X_train, X_test, y_train, y_test = train_test_split(df['Context'], y, test_size=0.2, random_state=42)

In [6]:
# Apply feature extraction using TfidfVectorizer
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [7]:
# Convert sparse matrices to dense NumPy arrays
X_train_dense = X_train_vectorized.toarray()
X_test_dense = X_test_vectorized.toarray()

# Addition of more Neurons and Dense Layers

The base model utilised, 2 layers with 64 and 32 neurons which achieved 0.3224

I will experiment with adding more neruons or layers to attempt to increase the accuracy.

## Increase the neruon size to 128 and 64

In [8]:
# Build model
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(X_train_dense.shape[1],)))
model.add(Dense(64, activation='relu'))
model.add(Dense(y.shape[1], activation='sigmoid'))  # Use sigmoid activation for multi-label classification

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_dense, y_train, batch_size=32, epochs=10, validation_split=0.2)

# Make predictions
y_pred_proba = model.predict(X_test_dense)
y_pred = (y_pred_proba > 0.5).astype(int)

# Evaluate the model
hamming_loss_value = hamming_loss(y_test, y_pred)

2023-07-27 14:53:39.979346: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [9]:
# Get accuracy and validation accuracy
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# Print the accuracy and validation accuracy for each epoch
for epoch in range(len(accuracy)):
    print(f"Epoch {epoch + 1} - Accuracy: {accuracy[epoch]:.4f}, Validation Accuracy: {val_accuracy[epoch]:.4f}")
    
# Print hamming loss
print("\nHamming Loss:", hamming_loss_value)

Epoch 1 - Accuracy: 0.6731, Validation Accuracy: 0.7147
Epoch 2 - Accuracy: 0.8745, Validation Accuracy: 0.7127
Epoch 3 - Accuracy: 0.9261, Validation Accuracy: 0.7085
Epoch 4 - Accuracy: 0.9379, Validation Accuracy: 0.7178
Epoch 5 - Accuracy: 0.9307, Validation Accuracy: 0.7116
Epoch 6 - Accuracy: 0.9400, Validation Accuracy: 0.7116
Epoch 7 - Accuracy: 0.9371, Validation Accuracy: 0.7106
Epoch 8 - Accuracy: 0.9351, Validation Accuracy: 0.7137
Epoch 9 - Accuracy: 0.9330, Validation Accuracy: 0.7096
Epoch 10 - Accuracy: 0.9302, Validation Accuracy: 0.7158

Hamming Loss: 0.09331607437044011


## Increase the neruon size to 256 and 128

In [10]:
# Build model
model = Sequential()
model.add(Dense(256, activation='relu', input_shape=(X_train_dense.shape[1],)))
model.add(Dense(128, activation='relu'))
model.add(Dense(y.shape[1], activation='sigmoid'))  # Use sigmoid activation for multi-label classification

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_dense, y_train, batch_size=32, epochs=10, validation_split=0.2)

# Make predictions
y_pred_proba = model.predict(X_test_dense)
y_pred = (y_pred_proba > 0.5).astype(int)

# Evaluate the model
hamming_loss_value = hamming_loss(y_test, y_pred)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
# Get accuracy and validation accuracy
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# Print the accuracy and validation accuracy for each epoch
for epoch in range(len(accuracy)):
    print(f"Epoch {epoch + 1} - Accuracy: {accuracy[epoch]:.4f}, Validation Accuracy: {val_accuracy[epoch]:.4f}")
    
# Print hamming loss
print("\nHamming Loss:", hamming_loss_value)

Epoch 1 - Accuracy: 0.6790, Validation Accuracy: 0.7116
Epoch 2 - Accuracy: 0.8818, Validation Accuracy: 0.7219
Epoch 3 - Accuracy: 0.9271, Validation Accuracy: 0.7209
Epoch 4 - Accuracy: 0.9325, Validation Accuracy: 0.7343
Epoch 5 - Accuracy: 0.9348, Validation Accuracy: 0.7106
Epoch 6 - Accuracy: 0.9402, Validation Accuracy: 0.7240
Epoch 7 - Accuracy: 0.9351, Validation Accuracy: 0.7127
Epoch 8 - Accuracy: 0.9420, Validation Accuracy: 0.7127
Epoch 9 - Accuracy: 0.9420, Validation Accuracy: 0.7209
Epoch 10 - Accuracy: 0.9433, Validation Accuracy: 0.7209

Hamming Loss: 0.09261002588844434


## Addition of another dense layer

In [12]:
# Build model
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(X_train_dense.shape[1],)))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(y.shape[1], activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_dense, y_train, batch_size=32, epochs=10, validation_split=0.2)

# Make predictions
y_pred_proba = model.predict(X_test_dense)
y_pred = (y_pred_proba > 0.5).astype(int)

# Evaluate the model
hamming_loss_value = hamming_loss(y_test, y_pred)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
# Get accuracy and validation accuracy
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# Print the accuracy and validation accuracy for each epoch
for epoch in range(len(accuracy)):
    print(f"Epoch {epoch + 1} - Accuracy: {accuracy[epoch]:.4f}, Validation Accuracy: {val_accuracy[epoch]:.4f}")
    
# Print hamming loss
print("\nHamming Loss:", hamming_loss_value)

Epoch 1 - Accuracy: 0.6512, Validation Accuracy: 0.7096
Epoch 2 - Accuracy: 0.8619, Validation Accuracy: 0.7147
Epoch 3 - Accuracy: 0.9150, Validation Accuracy: 0.7209
Epoch 4 - Accuracy: 0.9271, Validation Accuracy: 0.7209
Epoch 5 - Accuracy: 0.9338, Validation Accuracy: 0.7044
Epoch 6 - Accuracy: 0.9284, Validation Accuracy: 0.7044
Epoch 7 - Accuracy: 0.9317, Validation Accuracy: 0.6993
Epoch 8 - Accuracy: 0.9279, Validation Accuracy: 0.7085
Epoch 9 - Accuracy: 0.9258, Validation Accuracy: 0.7055
Epoch 10 - Accuracy: 0.9238, Validation Accuracy: 0.6982

Hamming Loss: 0.09861143798540833


## Summary

Overall after experimenting with neurons and dense layer, the optimal model consists of 2 dense layers, one with 64 neurons and one with 32 neurons.

# Regularisation

By using Dropout, the model is less likely to overfit the training data, as the neurons become less sensitive to the specific training examples and learn more robust features.

In [14]:
# Build model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train_dense.shape[1],)))
model.add(Dropout(0.3))  # Add a dropout layer after the first hidden layer with a dropout rate of 0.3 (30%)
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))  # Add another dropout layer after the second hidden layer with a dropout rate of 0.2 (20%)
model.add(Dense(y.shape[1], activation='sigmoid')) # Use sigmoid activation for multi-label classification

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_dense, y_train, batch_size=32, epochs=10, validation_split=0.2)

# Make predictions
y_pred_proba = model.predict(X_test_dense)
y_pred = (y_pred_proba > 0.5).astype(int)

# Evaluate the model
hamming_loss_value = hamming_loss(y_test, y_pred)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
# Get accuracy and validation accuracy
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# Print the accuracy and validation accuracy for each epoch
for epoch in range(len(accuracy)):
    print(f"Epoch {epoch + 1} - Accuracy: {accuracy[epoch]:.4f}, Validation Accuracy: {val_accuracy[epoch]:.4f}")
    
# Print hamming loss
print("\nHamming Loss:", hamming_loss_value)

Epoch 1 - Accuracy: 0.4317, Validation Accuracy: 0.6921
Epoch 2 - Accuracy: 0.6808, Validation Accuracy: 0.7281
Epoch 3 - Accuracy: 0.7602, Validation Accuracy: 0.7240
Epoch 4 - Accuracy: 0.8168, Validation Accuracy: 0.7394
Epoch 5 - Accuracy: 0.8555, Validation Accuracy: 0.7261
Epoch 6 - Accuracy: 0.8774, Validation Accuracy: 0.7333
Epoch 7 - Accuracy: 0.8849, Validation Accuracy: 0.7353
Epoch 8 - Accuracy: 0.8895, Validation Accuracy: 0.7333
Epoch 9 - Accuracy: 0.8941, Validation Accuracy: 0.7333
Epoch 10 - Accuracy: 0.9037, Validation Accuracy: 0.7415

Hamming Loss: 0.08943280771946341


## L1 Regularisation

In [16]:
# Build model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train_dense.shape[1],), kernel_regularizer=l1(0.01)))
model.add(Dense(32, activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dense(y.shape[1], activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_dense, y_train, batch_size=32, epochs=10, validation_split=0.2)

# Make predictions
y_pred_proba = model.predict(X_test_dense)
y_pred = (y_pred_proba > 0.5).astype(int)

# Evaluate the model
hamming_loss_value = hamming_loss(y_test, y_pred)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [17]:
# Get accuracy and validation accuracy
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# Print the accuracy and validation accuracy for each epoch
for epoch in range(len(accuracy)):
    print(f"Epoch {epoch + 1} - Accuracy: {accuracy[epoch]:.4f}, Validation Accuracy: {val_accuracy[epoch]:.4f}")
    
# Print hamming loss
print("\nHamming Loss:", hamming_loss_value)

Epoch 1 - Accuracy: 0.5314, Validation Accuracy: 0.5829
Epoch 2 - Accuracy: 0.5438, Validation Accuracy: 0.5747
Epoch 3 - Accuracy: 0.5546, Validation Accuracy: 0.5778
Epoch 4 - Accuracy: 0.5677, Validation Accuracy: 0.5870
Epoch 5 - Accuracy: 0.5755, Validation Accuracy: 0.5850
Epoch 6 - Accuracy: 0.5814, Validation Accuracy: 0.5695
Epoch 7 - Accuracy: 0.5817, Validation Accuracy: 0.5860
Epoch 8 - Accuracy: 0.5863, Validation Accuracy: 0.5808
Epoch 9 - Accuracy: 0.5891, Validation Accuracy: 0.5726
Epoch 10 - Accuracy: 0.5832, Validation Accuracy: 0.5860

Hamming Loss: 0.12473523181925159


## L2 Regularosation

In [18]:
# Build model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train_dense.shape[1],), kernel_regularizer=l2(0.01)))
model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dense(y.shape[1], activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_dense, y_train, batch_size=32, epochs=10, validation_split=0.2)

# Make predictions
y_pred_proba = model.predict(X_test_dense)
y_pred = (y_pred_proba > 0.5).astype(int)

# Evaluate the model
hamming_loss_value = hamming_loss(y_test, y_pred)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [19]:
# Get accuracy and validation accuracy
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# Print the accuracy and validation accuracy for each epoch
for epoch in range(len(accuracy)):
    print(f"Epoch {epoch + 1} - Accuracy: {accuracy[epoch]:.4f}, Validation Accuracy: {val_accuracy[epoch]:.4f}")
    
# Print hamming loss
print("\nHamming Loss:", hamming_loss_value)

Epoch 1 - Accuracy: 0.6690, Validation Accuracy: 0.7168
Epoch 2 - Accuracy: 0.7957, Validation Accuracy: 0.7147
Epoch 3 - Accuracy: 0.8150, Validation Accuracy: 0.7219
Epoch 4 - Accuracy: 0.8241, Validation Accuracy: 0.7158
Epoch 5 - Accuracy: 0.8266, Validation Accuracy: 0.7106
Epoch 6 - Accuracy: 0.8228, Validation Accuracy: 0.7034
Epoch 7 - Accuracy: 0.8326, Validation Accuracy: 0.7106
Epoch 8 - Accuracy: 0.8279, Validation Accuracy: 0.7137
Epoch 9 - Accuracy: 0.8300, Validation Accuracy: 0.7127
Epoch 10 - Accuracy: 0.8387, Validation Accuracy: 0.7096

Hamming Loss: 0.09366909861143799


# Summary

Use of dropout does increase the accuracy slightly and makes overfitting less likey, I will use this as both l1 and l2 reguarisation decreases the accuracy

# Experimenting with different activation functions

The following activation functions will be tested:
- relu
- elu
- selu
- tanh

In [20]:
# List of activation functions to test
activation_functions = ['relu', 'elu', 'selu', 'tanh']

for activation_func in activation_functions:
    print(f"Testing Activation Function: {activation_func}")

    # Build model with the current activation function
    model = Sequential()
    model.add(Dense(64, activation=activation_func, input_shape=(X_train_dense.shape[1],)))
    model.add(Dropout(0.3)) 
    model.add(Dense(32, activation=activation_func))
    model.add(Dropout(0.2))
    model.add(Dense(y.shape[1], activation='sigmoid'))
    
    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

    # Train the model
    history = model.fit(X_train_dense, y_train, batch_size=32, epochs=10, validation_split=0.2)

    # Make predictions
    y_pred_proba = model.predict(X_test_dense)
    y_pred = (y_pred_proba > 0.5).astype(int)

    # Evaluate the model
    hamming_loss_value = hamming_loss(y_test, y_pred)

    # Get accuracy and validation accuracy
    accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']

    # Print the accuracy and validation accuracy for each epoch
    for epoch in range(len(accuracy)):
        print(f"Epoch {epoch + 1} - Accuracy: {accuracy[epoch]:.4f}, Validation Accuracy: {val_accuracy[epoch]:.4f}")

    # Print hamming loss
    print("\nHamming Loss:", hamming_loss_value)

    print("\n-----------------------------\n")


Testing Activation Function: relu
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1 - Accuracy: 0.5077, Validation Accuracy: 0.6766
Epoch 2 - Accuracy: 0.6770, Validation Accuracy: 0.7199
Epoch 3 - Accuracy: 0.7609, Validation Accuracy: 0.7147
Epoch 4 - Accuracy: 0.8107, Validation Accuracy: 0.7147
Epoch 5 - Accuracy: 0.8442, Validation Accuracy: 0.7219
Epoch 6 - Accuracy: 0.8722, Validation Accuracy: 0.7199
Epoch 7 - Accuracy: 0.8846, Validation Accuracy: 0.7178
Epoch 8 - Accuracy: 0.8897, Validation Accuracy: 0.7209
Epoch 9 - Accuracy: 0.8941, Validation Accuracy: 0.7199
Epoch 10 - Accuracy: 0.8982, Validation Accuracy: 0.7250

Hamming Loss: 0.08872675923746765

-----------------------------

Testing Activation Function: elu
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1 - Accuracy: 0.5909, Validation Accuracy: 0.7374
Epoch 2 - Accuracy: 0.7615,

## Summary 

Based on the accuracy, relu proved to be the best activation function, followed by elu, selu and tanh

# Learning rates and Optimisers

The following learning rates will be experimented:
- 0.001
- 0.01
- 0.1

The following optimisers will be experimented:
- adam
- sdg
- rmsprop

In [21]:
# List of learning rates and optimizers to test
learning_rates = [0.001, 0.01, 0.1]
optimizers = ['adam', 'sgd', 'rmsprop']

for lr in learning_rates:
    for optimizer in optimizers:
        print(f"Testing Learning Rate: {lr}, Optimizer: {optimizer}")

        # Build model with the current learning rate and optimizer
        model = Sequential()
        model.add(Dense(64, activation=activation_func, input_shape=(X_train_dense.shape[1],)))
        model.add(Dropout(0.3)) 
        model.add(Dense(32, activation=activation_func))
        model.add(Dropout(0.2))
        model.add(Dense(y.shape[1], activation='sigmoid'))

        if optimizer == 'adam':
            optimizer = Adam(learning_rate=lr)
        elif optimizer == 'sgd':
            optimizer = SGD(learning_rate=lr)
        elif optimizer == 'rmsprop':
            optimizer = RMSprop(learning_rate=lr)

        # Compile the model with the selected optimizer
        model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

        # Train the model
        history = model.fit(X_train_dense, y_train, batch_size=32, epochs=10, validation_split=0.2)

        # Make predictions
        y_pred_proba = model.predict(X_test_dense)
        y_pred = (y_pred_proba > 0.5).astype(int)

        # Evaluate the model
        hamming_loss_value = hamming_loss(y_test, y_pred)

        # Get accuracy and validation accuracy
        accuracy = history.history['accuracy']
        val_accuracy = history.history['val_accuracy']

        # Print the accuracy and validation accuracy for each epoch
        for epoch in range(len(accuracy)):
            print(f"Epoch {epoch + 1} - Accuracy: {accuracy[epoch]:.4f}, Validation Accuracy: {val_accuracy[epoch]:.4f}")

        # Print hamming loss
        print("\nHamming Loss:", hamming_loss_value)

        print("\n-----------------------------\n")


Testing Learning Rate: 0.001, Optimizer: adam
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1 - Accuracy: 0.5523, Validation Accuracy: 0.7024
Epoch 2 - Accuracy: 0.7566, Validation Accuracy: 0.7281
Epoch 3 - Accuracy: 0.8326, Validation Accuracy: 0.7219
Epoch 4 - Accuracy: 0.8707, Validation Accuracy: 0.7333
Epoch 5 - Accuracy: 0.8921, Validation Accuracy: 0.7137
Epoch 6 - Accuracy: 0.9080, Validation Accuracy: 0.7302
Epoch 7 - Accuracy: 0.9158, Validation Accuracy: 0.7199
Epoch 8 - Accuracy: 0.9204, Validation Accuracy: 0.7230
Epoch 9 - Accuracy: 0.9194, Validation Accuracy: 0.7178
Epoch 10 - Accuracy: 0.9196, Validation Accuracy: 0.7209

Hamming Loss: 0.09178630265944929

-----------------------------

Testing Learning Rate: 0.001, Optimizer: sgd
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1 - Accuracy: 0.2924, Validation Accuracy: 0.4109
Epo

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1 - Accuracy: 0.4024, Validation Accuracy: 0.5726
Epoch 2 - Accuracy: 0.5312, Validation Accuracy: 0.6262
Epoch 3 - Accuracy: 0.5860, Validation Accuracy: 0.6385
Epoch 4 - Accuracy: 0.6262, Validation Accuracy: 0.6540
Epoch 5 - Accuracy: 0.6561, Validation Accuracy: 0.6632
Epoch 6 - Accuracy: 0.6726, Validation Accuracy: 0.6735
Epoch 7 - Accuracy: 0.6927, Validation Accuracy: 0.6941
Epoch 8 - Accuracy: 0.7117, Validation Accuracy: 0.7024
Epoch 9 - Accuracy: 0.7210, Validation Accuracy: 0.6952
Epoch 10 - Accuracy: 0.7202, Validation Accuracy: 0.7013

Hamming Loss: 0.09896446222640622

-----------------------------

Testing Learning Rate: 0.01, Optimizer: rmsprop
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1 - Accuracy: 0.5520, Validation Accuracy: 0.6571
Epoch 2 - Accuracy: 0.6734, Validation Accurac

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1 - Accuracy: 0.3539, Validation Accuracy: 0.1184
Epoch 2 - Accuracy: 0.4191, Validation Accuracy: 0.3079
Epoch 3 - Accuracy: 0.4387, Validation Accuracy: 0.4799
Epoch 4 - Accuracy: 0.4449, Validation Accuracy: 0.2729
Epoch 5 - Accuracy: 0.4657, Validation Accuracy: 0.5191
Epoch 6 - Accuracy: 0.4879, Validation Accuracy: 0.4655
Epoch 7 - Accuracy: 0.4820, Validation Accuracy: 0.4789
Epoch 8 - Accuracy: 0.4956, Validation Accuracy: 0.5129
Epoch 9 - Accuracy: 0.4760, Validation Accuracy: 0.4058
Epoch 10 - Accuracy: 0.4773, Validation Accuracy: 0.2678

Hamming Loss: 0.21440338903271358

-----------------------------



## Summary 

A learning rate of 0.001 and optimiser of rsmprop seems to be most prelevent

# Batch size

In [22]:
# List of batch sizes to test
batch_sizes = [16, 32, 64]

for batch_size in batch_sizes:
    print(f"Testing Batch Size: {batch_size}")

    # Build model with the best learning rate, optimizer, and batch size
    model = Sequential()
    model.add(Dense(64, activation=activation_func, input_shape=(X_train_dense.shape[1],)))
    model.add(Dropout(0.3)) 
    model.add(Dense(32, activation=activation_func))
    model.add(Dropout(0.2))
    model.add(Dense(y.shape[1], activation='sigmoid'))

    optimizer = RMSprop(learning_rate=0.001)

    # Compile the model with the selected optimizer
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # Train the model with the current batch size
    history = model.fit(X_train_dense, y_train, batch_size=batch_size, epochs=10, validation_split=0.2)

    # Make predictions
    y_pred_proba = model.predict(X_test_dense)
    y_pred = (y_pred_proba > 0.5).astype(int)

    # Evaluate the model
    hamming_loss_value = hamming_loss(y_test, y_pred)

    # Get accuracy and validation accuracy
    accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']

    # Print the accuracy and validation accuracy for each epoch
    for epoch in range(len(accuracy)):
        print(f"Epoch {epoch + 1} - Accuracy: {accuracy[epoch]:.4f}, Validation Accuracy: {val_accuracy[epoch]:.4f}")

    # Print hamming loss
    print("\nHamming Loss:", hamming_loss_value)

    print("\n-----------------------------\n")


Testing Batch Size: 16
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1 - Accuracy: 0.6244, Validation Accuracy: 0.7116
Epoch 2 - Accuracy: 0.7553, Validation Accuracy: 0.7405
Epoch 3 - Accuracy: 0.8016, Validation Accuracy: 0.7549
Epoch 4 - Accuracy: 0.8333, Validation Accuracy: 0.7508
Epoch 5 - Accuracy: 0.8514, Validation Accuracy: 0.7415
Epoch 6 - Accuracy: 0.8789, Validation Accuracy: 0.7291
Epoch 7 - Accuracy: 0.8910, Validation Accuracy: 0.7333
Epoch 8 - Accuracy: 0.8944, Validation Accuracy: 0.7322
Epoch 9 - Accuracy: 0.8993, Validation Accuracy: 0.7178
Epoch 10 - Accuracy: 0.9047, Validation Accuracy: 0.7281

Hamming Loss: 0.089668157213462

-----------------------------

Testing Batch Size: 32
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1 - Accuracy: 0.5884, Validation Accuracy: 0.6807
Epoch 2 - Accuracy: 0.7509, Validation Accuracy: 0

## Summary 

A batch size of 16 resulted in the highest accuracy

# LSTM

Can be considerd, computational time is not feasible

In [23]:
model = Sequential()
model.add(Embedding(input_dim=len(vectorizer.vocabulary_), output_dim=128, input_length=X_train_dense.shape[1]))
model.add(LSTM(64, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(y.shape[1], activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_dense, y_train, batch_size=32, epochs=10, validation_split=0.2)

# Make predictions
y_pred_proba = model.predict(X_test_dense)
y_pred = (y_pred_proba > 0.5).astype(int)

# Evaluate the model
hamming_loss_value = hamming_loss(y_test, y_pred)

Epoch 1/10
  5/122 [>.............................] - ETA: 2:26:37 - loss: 0.6810 - accuracy: 0.1500

KeyboardInterrupt: 

# Conclusion

After preprocessing the following set up proved to be the best for this case.

- 2 Dense Layers (64, 32) and 2 Dropout layers (0.3, 0.2)
- Activation Function -> relu
- Learning Rate -> 0.001
- Optimiser -> RMSprop
- Batch Size -> 16
