In [8]:
import os
import pandas as pd

import yaml
from catboost import CatBoostRegressor
from sklearn.metrics import r2_score

import sys 
sys.path.append('/Users/cher/Documents/gases-sensors/tools/')
from utils import create_experiment_folder
from dataloader import CustomDatasetRegression

In [4]:
EXP_PATH = '/Users/cher/Documents/gases-sensors/catboost/experiments'
ARGS = {
    'iterations': 100, 
    'learning_rate': 0.1,
    'depth': 6,
    'loss_function': 'RMSE',
    'verbose': 50
}
DYNAMICS = [
    'linear',
    'linear_short',
    'steps_up',
    'steps_up_pulse',
    'steps_down',
    'steps_down_pulse'
]
SENSORS = ['R1','R2', 'R3', 'R4', 'R5', 'R6', 'R7', 'R8', 'R9', 'R10', 'R11', 'R12']
GASES = ['CO']

In [6]:
def train_catboost(
        path, 
        exp_path, 
        dynamics, 
        sensors, 
        gases, 
        metric
    ):

    global ARGS
    exp_path = create_experiment_folder(exp_path)
    with open(os.path.join(exp_path, 'config.yaml'), 'w') as file:
        yaml.dump(ARGS, file, default_flow_style=False)

    metric_dict = {}
    for sensor in sensors:
        metric_dict[sensor] = pd.DataFrame(index=dynamics, columns=gases)

    for dynamic in dynamics:
        print(f'Dynamic {dynamic}')
        for gas in gases:
            dataset_train = CustomDatasetRegression(
                path=path,
                dynamic=dynamic, 
                gas=gas, 
                step='trn', 
                output_type='np'
            )
            dataset_valid = CustomDatasetRegression(
                path=path,
                dynamic=dynamic, 
                gas=gas, 
                step='vld', 
                output_type='np'
            )
            for sensor in sensors:
                x_train, y_train = dataset_train.get_all_data(sensor, 'conc')
                x_valid, y_valid = dataset_valid.get_all_data(sensor, 'conc')

                model = CatBoostRegressor(verbose=False, **ARGS)
                model.fit(x_train, y_train)

                y_valid_pred = model.predict(x_valid)
                metric_value = metric(y_valid, y_valid_pred)
                metric_dict[sensor].loc[dynamic, gas] = metric_value

                cb_model_name = f'cb_R{sensor}_{dynamic}_{gas}.cbm'
                model.save_model(os.path.join(exp_path, cb_model_name))
    
    for sensor in sensors:
        metric_dict[sensor].to_csv(os.path.join(exp_path, f'{sensor}.csv'), index=False)

In [None]:
train_catboost(
    path='/Users/cher/Documents/gases-sensors/data/exp1_melted',
    exp_path='/Users/cher/Documents/gases-sensors/catboost/experiments', 
    dynamics=DYNAMICS, 
    sensors=SENSORS, 
    gases=GASES, 
    metric=r2_score
)