# Calibration workflows

This notebook shows how to perform calibration based on sensor test data, export and load various types of models. The main implementation is based on sklearn's models and makes use of the fit/predict/transform convention to generalise the structure applied for sensor's processing. 

These flows can later on be implemented to process sensors' data automatically by using blueprints, simply naming the metric to add (see processing_data.ipynb)

In [1]:
from scdata.test import Test
from scdata.device import Device
from scdata._config import config

config.out_level='DEBUG'
config.framework='jupyterlab'

## Load your data

In [None]:
test = Test('EXAMPLE')

In [None]:
# Add as many devices as needed. See understanding blueprints below for more info
test.add_device(Device(blueprint = 'sc_21_station', descriptor = {'source': 'api', 
                                                              'id': '10751', 
                                                              'min_date': '2020-05-05'
                                                             }
                         )
               )

# Add as many devices as needed. See understanding blueprints below for more info
test.add_device(Device(blueprint = 'sc_21_station', descriptor = {'source': 'api', 
                                                              'id': '10752', 
                                                              'min_date': '2020-05-05'
                                                             }
                         )
               )

In [None]:
#test.create()
test.load()

## Create models

This section will go through creating some models that will aim to . As mentioned above, this is entirely based on sklearn's package, so it will make extensive use of it.

### Linear model

In [None]:
# sklearn model tools
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

# Extra tools
from scdata._config import config
from scdata.test.utils import normalise_vbls
from scdata.io import model_export, model_load
from scdata.utils import get_metrics

In [None]:
# Model inputs. 
# Here we will calibrate temperature of one device based on another's temperature
measurand = {'10751': ['EXT_TEMP']} # Ground truth
inputs = {'10752': ['TEMP']} # Input
variables = {"measurand": measurand, "inputs": inputs}

# Options
options = config.model_def_opt
print (options)

# Prepare options
df, refn = test.prepare(measurand, inputs)
# Do something else with df if necessary
labels, features = normalise_vbls(df, refn)

# Train test split
train_X, test_X, train_y, test_y = train_test_split(features, labels, 
                                        test_size = options['test_size'], 
                                        shuffle = options['shuffle'])

# Create model
model = LinearRegression()

# Fit - predict
model.fit(train_X, train_y)
train_yhat = model.predict(train_X)
test_yhat = model.predict(test_X)

# Diagnose
metrics = {'train': get_metrics(train_y, train_yhat),
           'test': get_metrics(test_y, test_yhat)}

# Export
model_export(name = 'LINEAR_TEMPERATURE', model = model, variables = variables, 
             hyperparameters = None, options = options,
             metrics = metrics)

In [None]:
# Check the metrics
print (metrics['train'])

print (metrics['test'])

In [None]:
# Now we can add the metrics into the test
metric = {f'TEMP_CORR': {'process': 'apply_regressor',
                           'kwargs': {'model': model,
                                      'variables': variables,
                                      'options': options}
                        }}

# Add it and process it
test.devices['10752'].add_metric(metric)
test.devices['10752'].process(metrics = metric)

#### Plot

In [None]:
traces = {1: {'devices': '10751',
              'channel': 'EXT_TEMP',
              'subplot': 1},
          2: {'devices': '10752',
              'channel': 'TEMP_CORR',
              'subplot': 1},
          3: {'devices': '10752',
              'channel': 'TEMP',
              'subplot': 1},           
         }

options = {
            'frequency': '1H'
}
test.ts_iplot(traces = traces, options = options);

In [None]:
traces = {1: {'devices': '10751',
              'channel': 'EXT_TEMP',
              'subplot': 1},
          2: {'devices': '10752',
              'channel': 'TEMP_CORR',
              'subplot': 1}         
         }

options = {
            'frequency': '1H'
}
test.scatter_plot(traces = traces, options = options);

### ML model

In [None]:
# Model inputs
measurand = {'10751': ['EXT_TEMP']} # Ground truth
inputs = {'10752': ['TEMP']} # Input
variables = {"measurand": measurand, "inputs": inputs}

# Hyperparameters and options
hyperparameters = config.model_hyperparameters['rf']
options = config.model_def_opt

# This averages the common channels into one, if any
options['common_avg'] = True

# Prepare options
df, refn = test.prepare(measurand, inputs, options)

# Do something else with df if necessary
labels, features = normalise_vbls(df, refn)

# Train test split
train_X, test_X, train_y, test_y = train_test_split(features, labels, 
                                        test_size = options['test_size'], 
                                        shuffle = options['shuffle'])

# Create model
model = RandomForestRegressor(n_estimators = hyperparameters['n_estimators'], 
                              min_samples_leaf = hyperparameters['min_samples_leaf'], 
                              oob_score = hyperparameters['oob_score'], 
                              max_features = hyperparameters['max_features'])

# Fit - predict
model.fit(train_X, train_y)
train_yhat = model.predict(train_X)
test_yhat = model.predict(test_X)

# Diagnose
metrics = {'train': get_metrics(train_y, train_yhat),
           'test': get_metrics(test_y, test_yhat)}

# Export
model_export(name = 'RF_TEMP', model = model, variables = variables, 
             hyperparameters = hyperparameters, options = options,
             metrics = metrics)

In [None]:
# Now we can add the metrics into the test
metric = {f'TEMP_CORR_ML': {'process': 'apply_regressor',
                           'kwargs': {'model': model,
                                      'variables': variables,
                                      'options': options}
                        }}

# Add it and process it
test.devices['10752'].add_metric(metric)
test.devices['10752'].process(metrics = metric)

## Compare

In [None]:
traces = {1: {'devices': '10751',
              'channel': 'EXT_TEMP',
              'subplot': 1},
          2: {'devices': '10752',
              'channel': 'TEMP_CORR',
              'subplot': 1},
          3: {'devices': '10752',
              'channel': 'TEMP_CORR_ML',
              'subplot': 1},          
          4: {'devices': '10752',
              'channel': 'TEMP',
              'subplot': 1},           
         }

options = {
            'frequency': '1H'
}
test.ts_iplot(traces = traces, options = options);

In [None]:
traces = {1: {'devices': ['10751', '10752'],
              'channels': ['EXT_TEMP', 'TEMP_CORR'],
              'subplot': 1},
          2: {'devices': ['10751', '10752'],
              'channels': ['EXT_TEMP', 'TEMP_CORR_ML'],
              'subplot': 2}    
        }


options = {'frequency': '1H'}
formatting = {'width': 25, 'height': 10, 'ylabel': {1: 'Corrected temperature (degC)'}, 
              'title': 'Alphadelta / Avda Roma - Traffic',
                                         'xlabel': {1: 'Ground trugh (degC)'}, 
              'fontsize': 12}

test.scatter_plot(traces = traces, options = options, formatting = formatting);