In [4]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error
import numpy as np
import warnings
import copy
from sklearn.base import clone
import csv
from itertools import product
from sklearn.model_selection import GridSearchCV

In [5]:
# Load data from CSV file
dataset = 'D5'
data = pd.read_csv('/home/mbdemoraes/Github/ieee-andescon-2024/datasets/' + dataset + '.csv', header=0, sep=';')

#Assuming the first columns are features and you want to consider specific columns as 'y'
# Specify the column indices or names for 'y'
y_columns = ['COP', 'CWP', 'NPV']

# Extract features and target variables
X = data.drop(y_columns, axis=1).values  # Features (excluding y_columns)
y = data[y_columns].values               # Specific columns as target

# Define the rolling window size for each dataset
if dataset=='D1':
    window_size = 25
elif dataset=='D2':
    window_size = 20
elif dataset=='D3':
    window_size = 15
elif dataset=='D4':
    window_size = 10
elif dataset=='D5':
    window_size = 5

In [6]:
# Define parameter grid for each method
param_grid = {
    'rf': {'n_estimators': [10, 50, 100], 
           'max_depth': [None, 5, 10, 15], 
           'max_samples': [0.25,0.50,0.75,1.0],
           'max_features': [0.25,0.50,0.75,1.0]},
    'knn': {'n_neighbors': [1,2,3,4,5]},
    'mlp': {'hidden_layer_sizes': [(10,), (10,10), (50,), (100,), (150,), (50,50), (100,50), (100,100), (150,100)], 
            'activation': ['logistic','relu', 'tanh'], 
            'learning_rate_init': [0.0001, 0.001, 0.01],
            'alpha': [0.00001, 0.0001],
            'max_iter': [1000]}
}

# Get all possible combinations of hyper-parameter values for each model
rf_param_combinations = list(product(*param_grid['rf'].values()))
knn_param_combinations = list(product(*param_grid['knn'].values()))
mlp_param_combinations = list(product(*param_grid['mlp'].values()))

# Define models
models_names = {
    'rf': RandomForestRegressor(),
    'knn': KNeighborsRegressor(),
    'mlp': MLPRegressor()
}

# Initialize dictionaries to store results for each method
results_rf = {'run': [], 'iteration': [], 'n_estimators': [], 'max_depth': [], 'max_samples': [], 'max_features': [], 'mae': [], 'rmse': []}
results_knn = {'run': [], 'iteration': [], 'n_neighbors': [], 'mae': [], 'rmse': []}
results_mlp = {'run': [], 'iteration': [], 'hidden_layer_size_layer_1': [], 
               'hidden_layer_size_layer_2': [], 'activation': [], 'learning_rate': [], 'l2_regularization': [], 'mae': [], 'rmse': []}

In [7]:
# Loop over methods
with warnings.catch_warnings():
    # Suppress the specific warning
    warnings.filterwarnings("ignore")
    for model_name in models_names:
        param_combinations = list(product(*param_grid[model_name].values()))
        for run in range(1,11):
            it_count = 1
            for i in range(0, len(X), window_size):
                if i == 0:
                    X_train = copy.deepcopy(X[:window_size])     
                    y_train = copy.deepcopy(y[:window_size])
                else:
                    model = models_names[model_name]
                    X_test = X[i:i+window_size]
                    y_test = y[i:i+window_size]
                    for params in param_combinations:           
                        model.set_params(**dict(zip(param_grid[model_name].keys(), params)))
                        print(f'Model {model_name}, run {run}, iteration {it_count}, window instances {i}-{i+window_size}, params {params}') 
             
                        # Fit the data
                        model.fit(X_train, y_train)
                        
                        # Predict unseen data
                        y_pred = model.predict(X_test)
                        
                        # Calculate MAE
                        mae = mean_absolute_error(y_test, y_pred)
                        mse = mean_squared_error(y_test, y_pred)
                        rmse = np.sqrt(mse)

                        if model_name=='rf':
                            results_rf['run'].append(run)
                            results_rf['iteration'].append(it_count)
                            results_rf['n_estimators'].append(params[0])
                            results_rf['max_depth'].append(params[1])
                            results_rf['max_samples'].append(params[2])
                            results_rf['max_features'].append(params[3])
                            results_rf['mae'].append(mae)
                            results_rf['rmse'].append(rmse)
                        elif model_name=='knn':
                            results_knn['run'].append(run)
                            results_knn['iteration'].append(it_count)
                            results_knn['n_neighbors'].append(params[0])
                            results_knn['mae'].append(mae)
                            results_knn['rmse'].append(rmse)
                        elif model_name=='mlp':
                            results_mlp['run'].append(run)
                            results_mlp['iteration'].append(it_count)
                            if len(params[0])==1:
                                results_mlp['hidden_layer_size_layer_1'].append(params[0][0])
                                results_mlp['hidden_layer_size_layer_2'].append(None)
                            else:
                                results_mlp['hidden_layer_size_layer_1'].append(params[0][0])
                                results_mlp['hidden_layer_size_layer_2'].append(params[0][1])
                            results_mlp['activation'].append(params[1])
                            results_mlp['learning_rate'].append(params[2])
                            results_mlp['l2_regularization'].append(params[3])
                            results_mlp['mae'].append(mae)
                            results_mlp['rmse'].append(rmse)

                    # Concatenate the train data with the new instances
                    X_train = np.vstack([X_train, X[i:i+window_size]])
                    y_train = np.vstack([y_train, y[i:i+window_size]])
                it_count+=1

Model rf, run 1, iteration 2, window instances 5-10, params (10, None, 0.25, 0.25)
Model rf, run 1, iteration 2, window instances 5-10, params (10, None, 0.25, 0.5)
Model rf, run 1, iteration 2, window instances 5-10, params (10, None, 0.25, 0.75)
Model rf, run 1, iteration 2, window instances 5-10, params (10, None, 0.25, 1.0)
Model rf, run 1, iteration 2, window instances 5-10, params (10, None, 0.5, 0.25)
Model rf, run 1, iteration 2, window instances 5-10, params (10, None, 0.5, 0.5)
Model rf, run 1, iteration 2, window instances 5-10, params (10, None, 0.5, 0.75)
Model rf, run 1, iteration 2, window instances 5-10, params (10, None, 0.5, 1.0)
Model rf, run 1, iteration 2, window instances 5-10, params (10, None, 0.75, 0.25)
Model rf, run 1, iteration 2, window instances 5-10, params (10, None, 0.75, 0.5)
Model rf, run 1, iteration 2, window instances 5-10, params (10, None, 0.75, 0.75)
Model rf, run 1, iteration 2, window instances 5-10, params (10, None, 0.75, 1.0)
Model rf, run 

In [8]:
# Specify the file path for the RF results CSV file
paths = [f'results_{dataset}_rf.csv',  f'results_{dataset}_knn.csv', f'results_{dataset}_mlp.csv']

for csv_file_path in paths:
    # Extract keys and values from the dictionary
    keys = list(results_rf.keys())
    values = list(results_rf.values())
    
    # Write the data to the CSV file
    with open(csv_file_path, 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        
        # Write the header row
        writer.writerow(keys)
        
        # Write the data rows
        for row in zip(*values):
            writer.writerow(row)
    
    print(csv_file_path + 'printed')

results_D5_rf.csvprinted
results_D5_knn.csvprinted
results_D5_mlp.csvprinted
