## Problem (a)

In [1]:
import os
import itertools
import numpy as np
import pandas as pd

import sklearn
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

import warnings
warnings.filterwarnings("ignore")

### Load training set and development set

In [2]:
TRAIN_X_PATH = "./trnX-3d.csv"
TRAIN_Y_PATH = "./trnY-3d.csv"

VALID_X_PATH = "./devX-3d.csv"
VALID_Y_PATH = "./devY-3d.csv"

def read_csv_data(file_path):
    # Read the CSV file using pandas
    data = pd.read_csv(file_path, sep="\t", header=None)

    # Return the DataFrame
    return np.array(data.values)

def load_train_data(train_x_path, train_y_path):

    train_x = read_csv_data(train_x_path)
    train_y = read_csv_data(train_y_path)
    train_y = train_y.reshape(train_y.shape[0])

    return train_x, train_y

def load_valid_data(valid_x_path, valid_y_path):

    train_x = read_csv_data(valid_x_path)
    train_y = read_csv_data(valid_y_path)
    train_y = train_y.reshape(train_y.shape[0])

    return train_x, train_y

train_x, train_y = load_train_data(train_x_path=TRAIN_X_PATH, train_y_path=TRAIN_Y_PATH)

valid_x, valid_y = load_valid_data(valid_x_path=VALID_X_PATH, valid_y_path=VALID_Y_PATH)

print(train_x.shape, train_y.shape)
print(valid_x.shape, valid_y.shape)

(2430, 3) (2430,)
(270, 3) (270,)


### MLP Classifier Initialization and Training

In [6]:
mlp = MLPClassifier(random_state=123, max_iter=1000)   # set fixed random state to ensure reproducibility

mlp.fit(train_x, train_y)

predict_valid_y = mlp.predict(valid_x)
predict_train_y = mlp.predict(train_x)

print(f"Accuracy on development set : {round(accuracy_score(valid_y, predict_valid_y), 3) * 100}%")
print(f"Accuracy on training set    : {round(accuracy_score(train_y, predict_train_y), 3) * 100}%")


Accuracy on development set : 83.3%
Accuracy on training set    : 82.5%


## Problem (b)
### Hyper-parameter tuning
Make take about 60 minutes ~ (depends on your cpu)

In [15]:
NUM_WORKERS = 8
MULTI_PROC  = True

def train_and_evaluate(combination, train_x, train_y, valid_x, valid_y):
    hidden_layer_sizes, activation, solver, learning_rate_init, batch_size = combination
    
    mlp = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, activation=activation, solver=solver, learning_rate_init=learning_rate_init, batch_size=batch_size, random_state=123, max_iter=1000)
    mlp.fit(train_x, train_y)
    predictions = mlp.predict(valid_x)

    score = accuracy_score(valid_y, predictions)
    return score, combination, mlp


parameter_space = {
    'hidden_layer_sizes': [(500), (1000, 500), (1000, 500, 200)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'learning_rate_init': [0.01, 0.001, 0.0005],
    'batch_size': [64, 128, 256]
}
# parameter_space = {
#     'hidden_layer_sizes': [(1000, 500)],
#     'activation': ['relu'],
#     'solver': ['adam'],
#     'learning_rate_init': [0.0005],
#     'batch_size': [128]
# }

param_combinations = list(itertools.product(
    parameter_space['hidden_layer_sizes'],
    parameter_space['activation'],
    parameter_space['solver'],
    parameter_space['learning_rate_init'],
    parameter_space['batch_size']
))

best_score = 0
best_params = {}
best_model = None

if MULTI_PROC:
    import concurrent
    from concurrent.futures import ThreadPoolExecutor

    with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
        futures = [executor.submit(train_and_evaluate, combination, train_x, train_y, valid_x, valid_y) for combination in param_combinations]
        
        for future in concurrent.futures.as_completed(futures):
            score, params, model = future.result()
            hidden_layer_sizes, activation, solver, learning_rate_init, batch_size = params
            if score > best_score:
                best_score = score
                best_model = model
                best_params = {'hidden_layer_sizes': hidden_layer_sizes, 'activation': activation, 'solver': solver, 'learning_rate_init': learning_rate_init, 'batch_size': batch_size}
else:
    for combination in param_combinations:
        hidden_layer_sizes, activation, solver, learning_rate_init, batch_size = combination

        mlp = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, activation=activation, solver=solver, learning_rate_init=learning_rate_init, batch_size=batch_size)
        mlp.fit(train_x, train_y)

        predictions = mlp.predict(valid_x)
        score = accuracy_score(valid_y, predictions)
        if score > best_score:
            best_score = score
            best_model = mlp
            best_params = {'hidden_layer_sizes': hidden_layer_sizes, 'activation': activation, 'solver': solver, 'learning_rate_init': learning_rate_init, 'batch_size': batch_size}

print("="*15, "Best Parameter", "="*15)
print('hidden_layer_sizes\t:', best_params['hidden_layer_sizes'])
print('activation\t\t:', best_params['activation'])
print('solver\t\t\t:', best_params['solver'])
print('learning_rate_init\t:', best_params['learning_rate_init'])
print('batch_size\t\t:', best_params['batch_size'])
print('Validation Accuracy\t:', str(round(best_score*100, 3)) + '%')


hidden_layer_sizes	: (1000, 500)
activation		: relu
solver			: adam
learning_rate_init	: 0.0005
batch_size		: 128
Validation Accuracy	: 86.667%


### Prediction for test data

In [16]:
SAVE_TO_CSV =True

test_x = read_csv_data("./tstX-3d.csv")

# mlp = MLPClassifier(hidden_layer_sizes=best_params["hidden_layer_sizes"],
#                     activation=best_params["activation"],
#                     solver=best_params["solver"],
#                     learning_rate_init=best_params["learning_rate_init"],
#                     batch_size=best_params["batch_size"],
#                     max_iter=1000,
#                     random_state=123)
best_mlp = best_model

# best_mlp.fit(train_x, train_y)

predict_test_y = best_mlp.predict(test_x)
print("Predition for test data: \n", predict_test_y)

if SAVE_TO_CSV:
    pred_test_y_df = pd.DataFrame({"prediction": predict_test_y})
    pred_test_y_df.to_csv('tstY-3d.csv', header=None, index=False)



Predition for test data: 
 [0 1 0 0 1 1 1 1 1 0 1 1 1 0 1 1 1 1 0 0 0 1 1 0 1 1 1 1 1 1 0 0 0 1 0 1 0
 1 0 0 1 0 0 0 1 0 0 1 0 1 1 1 0 1 0 0 0 0 1 1 1 0 1 0 1 1 1 0 1 0 0 0 1 1
 0 1 1 0 1 0 1 1 0 1 1 1 1 0 0 0 0 1 0 0 1 0 1 1 0 0 1 1 0 0 1 0 1 1 0 1 0
 0 1 0 1 0 0 1 0 0 0 1 0 1 0 1 0 1 1 1 1 0 1 1 1 0 0 1 1 1 0 0 1 0 0 1 1 0
 1 1 0 1 1 1 1 1 1 0 0 1 1 1 1 0 0 0 0 0 1 1 0 0 0 0 1 1 1 0 1 1 0 1 1 1 0
 0 1 1 0 0 0 0 0 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 0 1 1 0 1 0 1 0 1 1 0 1 1 0
 0 1 0 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 1 1 1 1 1 1 0 1 0 1 0 1 1 0 0 1 1 1 1
 0 0 1 1 0 1 1 1 0 1 1 1 0 1 0 1 1 0 0 1 0 1 1 1 0 1 0 1 1 1 0 0 0 0 0 1 1
 0 0 1 1]
