# Sensor Calibration

This example will walk you through the model creation using two different methods

In [None]:
from src.data.data import *
data = data_wrapper()

## Load data

In [None]:
# Set these below or use defaults from config.yaml 
#options = {'clean_na': False, 'clean_na_method': 'drop', 'frequency': '3Min', 'load_cached_API': True, 'store_cached_API': True}
options = {'clean_na': True, 'clean_na_method': 'drop'}
testname = "2019-03_EXT_UCD_URBAN_BACKGROUND_API"
data.load_test(testname, options)

## Create a Linear Model

In [None]:
from src.models.model import model_wrapper

# Input model description
model_description_ols = {"model_name": "OLS_UCD",
                    "model_type": "OLS",
                    "model_target": "ALPHASENSE",
                    "data": {"train": {"2019-03_EXT_UCD_URBAN_BACKGROUND_API": {"devices": ["5262"],
                                                                               "reference_device": "CITY_COUNCIL"}},
                            "test": {"2019-03_EXT_UCD_URBAN_BACKGROUND_API": {"devices": ["5565"],
                                                                               "reference_device": "CITY_COUNCIL"}},
                            "features":  {"REF": "NO2_CONV",
                                            "A": "GB_2W",
                                            "B": "GB_2A",
                                            "C": "HUM"},
                            "data_options": {"frequency": '1Min',
                                            "clean_na": True,
                                            "clean_na_method": "drop",
                                            "min_date": None,
                                            "frequency": "1Min",
                                            "max_date": '2019-01-15'},
                            },
                    "hyperparameters": {"ratio_train": 0.75},
                    "model_options": {"session_active_model": True,
                                "show_plots": True,
                                "export_model": False,
								"export_model_file": False,
								"extract_metrics": True}
                    }

# --- 
# Init rf model
ols_model = model_wrapper(model_description_ols, verbose = True)

# Prepare dataframe for modeling
test_name = data.prepare_dataframe_model(ols_model)
                    
# Train Model based on training dataset
ols_model.train()

# Get prediction for train device
device = ols_model.data['train'][test_name]['devices'][0]
prediction_name = device + '_' + ols_model.name
prediction = ols_model.predict(data.tests[test_name].devices[device].readings, prediction_name)
# Combine it in readings
data.tests[test_name].devices[device].readings.combine_first(prediction)

# Archive model
if ols_model.options['session_active_model']:
    data.archive_model(ols_model)

# Print metrics
if ols_model.options['extract_metrics']:
    metrics_model_ols = ols_model.extract_metrics('train')

## Create a Random Forest

In [None]:
from src.models.model import model_wrapper

# Input model description
model_description_rf = {"model_name": "RF_UCD",
                    "model_type": "RF",
                    "model_target": "ALPHASENSE",
                    "data": {"train": {"2019-03_EXT_UCD_URBAN_BACKGROUND_API": {"devices": ["5262"],
                                                                               "reference_device": "CITY_COUNCIL"}},
                            "test": {"2019-03_EXT_UCD_URBAN_BACKGROUND_API": {"devices": ["5565"],
                                                                               "reference_device": "CITY_COUNCIL"}},
                            "features":  {"REF": "NO2_CONV",
                                            "A": "GB_2W",
                                            "B": "GB_2A",
                                            "C": "HUM"},
                            "data_options": {"target_raster": '1Min',
                                            "clean_na": True,
                                            "clean_na_method": "drop",
                                            "min_date": None,
                                            "frequency": "1Min",
                                            "max_date": '2019-01-15'},
                            },
                    "hyperparameters": {"ratio_train": 0.75, 
                                        "min_samples_leaf": 2,
                                        "max_features": None,
                                       "n_estimators": 100,
										"shuffle_split": True},
                    "model_options": {"session_active_model": True,
                                "show_plots": True,
                                "export_model": False,
								"export_model_file": False,
								"extract_metrics": True}
                    }

# --- 
# Init rf model
rf_model = model_wrapper(model_description_rf, verbose = True)

# Prepare dataframe for modeling
test_name = data.prepare_dataframe_model(rf_model)
                    
# Train Model based on training dataset
rf_model.train()

# Get prediction for train device
device = rf_model.data['train'][test_name]['devices'][0]
prediction_name = device + '_' + rf_model.name
prediction = rf_model.predict(data.tests[test_name].devices[device].readings, prediction_name)
# Combine it in readings
data.tests[test_name].devices[device].readings.combine_first(prediction)

# Archive model
if rf_model.options['session_active_model']:
    data.archive_model(rf_model)

# Print metrics
if rf_model.options['extract_metrics']:
    metrics_model_rf = rf_model.extract_metrics('train')

## Model comparison

In [None]:
from src.visualization.visualization import targetDiagram
%matplotlib inline
models = dict()

group = 0
for model in [ols_model, rf_model]:
    for dataset in ['train', 'validation']:
        if dataset in model.metrics.keys(): 
            models[model.name + '_' + dataset] = model.metrics[dataset]
            models[model.name + '_' + dataset]['group'] = group

targetDiagram(models, True, 'seaborn-talk')