In [2]:
import os
import sys
sys.path.append("../")


In [None]:
from time import time
from src.utils_ import find_file, run_time, DataDB
from src.data_ import DataPrep
from src.models_ import instantiate_model, adam
from src.train_eval_ import train_model, evaluate_model
import pandas as pd

In [5]:
start_time = time()

files = ["6KP_48CLA.csv", "10KP_48CLA.csv", "14KP_48CLA.csv", "18KP_48CLA.csv", "22KP_48CLA.csv", "26KP_48CLA.csv"]

KP6_PATH, KP10_PATH, KP14_PATH, KP18_PATH, KP22_PATH, KP26_PATH = map(lambda f: find_file(f , "pc"), files)

kp_dict = {
    6: KP6_PATH,
    10: KP10_PATH,
    14: KP14_PATH,
    18: KP18_PATH,
    22: KP22_PATH,
    26: KP26_PATH
}

os.makedirs("./results", exist_ok=True)

In [6]:
db = DataDB()

In [7]:
params_list = db.query_all(DataDB.MODEL_COLL, network="FullyConnectedNN")

In [8]:
params = params_list[0]

In [None]:
data_dict = {}
res_dict = {}

for kp, csv_path in kp_dict.items():
    data_prep = DataPrep(
        csv_path=csv_path,
    )

    # Load data
    data = data_prep.get_data()
    
    data_dict[kp] = data

    # Instantiate model
    model = instantiate_model(
        net_name="FullyConnectedNN",
        input_size=data['input_size'],
        output_size=data['output_size'],
        params=params['best_params']
    )

    # Train model
    optimizer = adam(model, learning_rate=params['best_params']['learning_rate'])
    train_loss, val_loss = train_model(
        model=model,
        optimizer=optimizer,
        train_loader=data['train_loader'],
        val_loader=data['test_loader'],
        num_epochs=params['best_params']['num_epochs'],
        loss_func_name='log_cosh',
    )

    metrics_dict = evaluate_model(model, data['test_loader'], data['y_scaler'])

    res_dict[kp] = metrics_dict

In [None]:
top_mse, top_kp = float('inf'), None
for kp in res_dict.keys():
    print(f"{kp}KP RMSE:{res_dict[kp]['rmse']:.4f} MSE:{res_dict[kp]['mse']:.4f} R2:{res_dict[kp]['r2']:.4f} MAE:{res_dict[kp]['mae']:.4f}")
    if res_dict[kp]['mse'] < top_mse:
        top_mse = res_dict[kp]['mse']
        top_kp = kp

In [36]:
cols = ["mse", "rmse", "r2", "mae"]
df = pd.DataFrame(res_dict).T[cols]

In [None]:
df

In [32]:
sorted_items = sorted(res_dict.items(), key=lambda x: x[1]['mse'])

In [None]:
df.to_csv("../kp_results/csv/optim_kp.csv")

In [None]:
sorted_items