# Multi-layer Perceptron Neural Network Training - JMUBEN Dataset

In [1]:
import pandas as pd
import numpy as np
import os, time, random
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
DATASETS_FOLDER = "./datasets"
PATH_JMUBEN = DATASETS_FOLDER + "/jmuben"
PATH_JMUBEN_GRAYSCALE = PATH_JMUBEN + "/grayscale"
PATH_JMUBEN_HARALICK_FEATURES = PATH_JMUBEN + "/haralick_features.csv"
PATH_EVALUATIONS = "./evaluations"
PATH_EVALUATIONS_MLP = PATH_EVALUATIONS + "/RNAs-MLP"

## 1. Preparing data

### 1.1. Openning dataframe

In [4]:
df_haralick_features = pd.read_csv(PATH_JMUBEN_HARALICK_FEATURES)
df_haralick_features.head()

Unnamed: 0,name,H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,target
0,1 (1).jpg,0.001586,93.192069,0.96045,1177.657526,0.291168,249.064033,4617.438034,7.718249,10.967639,0.000635,3.740615,-0.391,0.995172,Miner
1,1 (10000).jpg,0.002062,97.0083,0.939763,805.227808,0.26165,211.017844,3123.902932,7.249024,10.589788,0.000597,3.729357,-0.336786,0.98596,Miner
2,1 (10001).jpg,0.000928,226.929118,0.967796,3523.245958,0.252188,295.117979,13866.054714,8.235735,11.816346,0.000534,4.198673,-0.393656,0.996458,Miner
3,1 (10002).jpg,0.002441,112.890236,0.986898,4306.632913,0.327462,302.522615,17113.641414,8.173147,11.235603,0.000619,3.749459,-0.458315,0.998801,Miner
4,1 (10003).jpg,0.000959,218.905658,0.965931,3213.701018,0.246127,281.627154,12635.898413,8.181094,11.783294,0.00053,4.202501,-0.385315,0.995845,Miner


### 1.2. Removing "name" column

In [5]:
df_haralick_features.drop(["name"], axis=1, inplace=True)
df_haralick_features.head()

Unnamed: 0,H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,target
0,0.001586,93.192069,0.96045,1177.657526,0.291168,249.064033,4617.438034,7.718249,10.967639,0.000635,3.740615,-0.391,0.995172,Miner
1,0.002062,97.0083,0.939763,805.227808,0.26165,211.017844,3123.902932,7.249024,10.589788,0.000597,3.729357,-0.336786,0.98596,Miner
2,0.000928,226.929118,0.967796,3523.245958,0.252188,295.117979,13866.054714,8.235735,11.816346,0.000534,4.198673,-0.393656,0.996458,Miner
3,0.002441,112.890236,0.986898,4306.632913,0.327462,302.522615,17113.641414,8.173147,11.235603,0.000619,3.749459,-0.458315,0.998801,Miner
4,0.000959,218.905658,0.965931,3213.701018,0.246127,281.627154,12635.898413,8.181094,11.783294,0.00053,4.202501,-0.385315,0.995845,Miner


### 1.3. Defining X and Y attributes

In [6]:
# Y = predictor attribute
# X = features

Y = df_haralick_features["target"]
df_haralick_features.drop(["target"], axis=1, inplace=True)
X = df_haralick_features

## 2. Defining Multi-layer Perceptron (MLP) architectures to train

### 2.1. MLP with one hidden layer

In [7]:
one_layer_architectures = [i for i in range(4, 17)]
one_layer_architectures

[4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]

### 2.2. MLP with two hidden layers

#### Geometric Pyramid Rule
Number of features = 13  
Number of classes = 5  

$N = \alpha \times \sqrt{\textrm{n. of features} \times \textrm{n. of classes}}$  
$0.5 \leq \alpha \leq 2$  
$4 \leq N \leq 16$

#### Defining number of architectures

In [8]:
count = 0
number_of_neurons = []

while count < 4:
    neurons = random.randint(4, 16)
    
    if neurons not in number_of_neurons:
        number_of_neurons.append(neurons)
        count += 1

In [9]:
print(f"Total of neurons: {number_of_neurons}")

Total of neurons: [15, 7, 13, 6]


In [10]:
two_layers_architectures = []

for neuron in number_of_neurons:
    for i in range(1, neuron):
        if (i, neuron-i) not in two_layers_architectures:
            two_layers_architectures.append((i, neuron-i))

In [11]:
print(f"Number of architectures: {len(two_layers_architectures)}")
print(f"Architectures: {two_layers_architectures}")

Number of architectures: 37
Architectures: [(1, 14), (2, 13), (3, 12), (4, 11), (5, 10), (6, 9), (7, 8), (8, 7), (9, 6), (10, 5), (11, 4), (12, 3), (13, 2), (14, 1), (1, 6), (2, 5), (3, 4), (4, 3), (5, 2), (6, 1), (1, 12), (2, 11), (3, 10), (4, 9), (5, 8), (6, 7), (7, 6), (8, 5), (9, 4), (10, 3), (11, 2), (12, 1), (1, 5), (2, 4), (3, 3), (4, 2), (5, 1)]


## 3. Training MLP architectures with Haralick Features

### 3.1. Defining function to calculate model total number of params

In [12]:
def calculate_total_params(model):    
    params_number = 0
    
    for coef in model.coefs_:
        params_number += coef.size

    params_number += sum(coef.shape[1] for coef in model.coefs_)
    return params_number

### 3.2. Defining function to calculate GFLOPS model consume

In [13]:
def calculate_gflops(model, training_time):    
    layers_number = len(model.coefs_)
    operations_number = 2 * sum(np.prod(coef.shape) for coef in model.coefs_)
    
    GFLOPS = (operations_number / training_time) * 1e-9
    return GFLOPS

### 3.3. Defining function to evaluate models

In [14]:
def evaluate_model(y_true, y_pred):    
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average="weighted")
    precision = precision_score(y_true, y_pred, average="weighted")
    recall = recall_score(y_true, y_pred, average="weighted")

    return accuracy, f1, precision, recall

### 3.4. Defining function to save model metrics in a dataframe

In [15]:
def generate_dataframe(i, architecture, params, gflops, training_time, accuracy, f1, precision, recall, path, model_name):    
    metrics = {
        "Experiment": [str(i)],
        "Architectures": str(architecture), 
        "Total Params": params, 
        "GFLOPS": gflops,
        "Training Time (sec)": training_time, 
        "Test Accuracy": accuracy, 
        "Test F1 Weightet": f1, 
        "Test Precision Weighted": precision, 
        "Test Recall Weighted": recall}
    
    df_new = pd.DataFrame(data=metrics)
    file_path = f"{path}/metrics_{model_name}.csv"
    
    if not os.path.exists(path):
        os.makedirs(path)
        
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        df = df.append(df_new)
        df.to_csv(file_path, header=True, index=False)
    else:
        df_new.to_csv(file_path, header=True, index=False)

### 3.5. Training MLP models

#### One hidden layer models

In [16]:
for i in range(1, 4):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42+i)
    
    for architecture in tqdm(one_layer_architectures, f"Round {i} - Training MLP models"):
        mlp_classifier = MLPClassifier(hidden_layer_sizes=architecture, max_iter=300, early_stopping=True)
        
        beginning = time.time()
        mlp_classifier.fit(X_train, y_train)
        end = time.time()
        
        training_time = end - beginning
        
        params = calculate_total_params(mlp_classifier)
        gflops = calculate_gflops(mlp_classifier, training_time)
        
        y_pred = mlp_classifier.predict(X_test)
        
        accuracy, f1, precision, recall = evaluate_model(y_test, y_pred)
        generate_dataframe(i, architecture, params, gflops, training_time, accuracy, f1, precision, recall, PATH_EVALUATIONS_MLP, "mlp_1hl_experiments")

Round 1 - Training MLP models: 100%|████████████████████████████████████████████████████████████████████████████████| 13/13 [00:30<00:00,  2.37s/it]
Round 2 - Training MLP models: 100%|████████████████████████████████████████████████████████████████████████████████| 13/13 [00:25<00:00,  1.97s/it]
Round 3 - Training MLP models: 100%|████████████████████████████████████████████████████████████████████████████████| 13/13 [00:28<00:00,  2.21s/it]


#### Two hidden layers models

In [17]:
for i in range(1, 4):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42+i)
    
    for architecture in tqdm(two_layers_architectures, f"Round {i} - Training MLP models"):
        mlp_classifier = MLPClassifier(hidden_layer_sizes=architecture, max_iter=300, early_stopping=True)
        
        beginning = time.time()
        mlp_classifier.fit(X_train, y_train)
        end = time.time()
        
        training_time = end - beginning
        
        params = calculate_total_params(mlp_classifier)
        gflops = calculate_gflops(mlp_classifier, training_time)
        
        y_pred = mlp_classifier.predict(X_test)
        
        accuracy, f1, precision, recall = evaluate_model(y_test, y_pred)
        generate_dataframe(i, architecture, params, gflops, training_time, accuracy, f1, precision, recall, PATH_EVALUATIONS_MLP, "mlp_2hl_experiments")

Round 1 - Training MLP models: 100%|████████████████████████████████████████████████████████████████████████████████| 37/37 [01:47<00:00,  2.92s/it]
Round 2 - Training MLP models: 100%|████████████████████████████████████████████████████████████████████████████████| 37/37 [01:33<00:00,  2.53s/it]
Round 3 - Training MLP models: 100%|████████████████████████████████████████████████████████████████████████████████| 37/37 [01:44<00:00,  2.84s/it]
