# Data processing

This example walks through the basics for processing data and added metrics.

## Concepts

Devices in the framework contain _raw readings_ that are under the device.readings pandas dataframe. A list of the sensors raw metrics can be shown in device.sensors.

Devices can also contain processed values called metrics. These metrics can be added by passing a callable function and then processed.

In [None]:
from scdata.test import Test
from scdata.device import Device
from scdata._config import config

config.out_level='DEBUG'

test = Test('EXAMPLE_0.7.0')

In [None]:
test.load()

## Process basics

In [None]:
## The readings for each device are accessible via
test.devices['14627'].readings

## Basic example calculation

In [None]:
df = test.devices['14627'].readings

In [None]:
df['METRIC'] = 8 * df['ADC_48_1'] + 25 * df['ADC_48_3']

In [None]:
df[['ADC_48_1', 'ADC_48_3', 'METRIC']]

## Making it repeatable

In [None]:
## The sensors for each device are accessible via
test.devices['14627'].sensors

In [None]:
## The metrics for each device are accessible via
test.devices['14627'].metrics

In [None]:
help(Test.process)

In [None]:
## Process the metrics as a default
test.process()

Now we can see processed metrics in the `device.readings`

In [None]:
for device in test.devices:
    print (test.devices[device].readings.columns)

## Add metrics

In [None]:
help(Device.add_metric)

In [None]:
help(Device.process)

In [None]:
import scdata
help(scdata.device.process.timeseries)
# help(sc.device.process.alphasense)
# help(sc.device.process.regression)

### Basic polynomial

In [None]:
help(scdata.device.process.timeseries.poly_ts)

In [None]:
metric = {f'ADC_POLY': {'process': 'poly_ts',
                           'kwargs': {'channels': ['ADC_48_1', 'ADC_48_3'],
                                      'coefficients': [8, 25]}
                        }}

test.devices['14627'].add_metric(metric)
test.devices['14627'].process(lmetrics=['ADC_POLY'])

In [None]:
test.devices['14627'].readings

In [None]:
traces = {1: {'devices': '14627',
              'channel': 'ADC_POLY',
              'subplot': 2},
          2: {'devices': '14627',
              'channel': 'ADC_48_1',
              'subplot': 1},
          3: {'devices': '14627',
              'channel': 'ADC_48_3',
              'subplot': 1},            
         }

options = {
            'frequency': '1H'
}
formatting = {'width': 800, 'height': 200, 'padding-bottom': 400}
test.ts_uplot(traces = traces, options = options, formatting=formatting)

### Basic smoothing

In [None]:
metric = {f'NOISE_A_SMOOTH': {'process': 'rolling_avg',
                           'kwargs': {'name': ['NOISE_A'],
                                      'window_size': 5}
                        }}

test.devices['14627'].add_metric(metric)
test.devices['14627'].process(lmetrics=['NOISE_A_SMOOTH'])

In [None]:
metric = {f'NOISE_A_SMOOTH_10': {'process': 'rolling_avg',
                           'kwargs': {'name': ['NOISE_A'],
                                      'window_size': 10}
                        }}

test.devices['14627'].add_metric(metric)
test.devices['14627'].process(lmetrics=['NOISE_A_SMOOTH_10'])

In [None]:
metric = {f'NOISE_A_SMOOTH_60': {'process': 'rolling_avg',
                           'kwargs': {'name': ['NOISE_A'],
                                      'window_size': 60}
                        }}

test.devices['14627'].add_metric(metric)
test.devices['14627'].process(lmetrics=['NOISE_A_SMOOTH_60'])

In [None]:
test.devices['14627'].readings.columns

In [None]:
traces = {1: {'devices': '14627',
              'channel': 'NOISE_A',
              'subplot': 1},
          2: {'devices': '14627',
              'channel': 'NOISE_A_SMOOTH',
              'subplot': 1},
          3: {'devices': '14627',
              'channel': 'NOISE_A_SMOOTH_10',
              'subplot': 1},
          4: {'devices': '14627',
              'channel': 'NOISE_A_SMOOTH_60',
              'subplot': 1}          
         }

options = {
            'frequency': '.2H'
}
formatting = {'width': 800, 'height': 400}
test.ts_uplot(traces = traces, options = options, formatting=formatting)

## Reprocessing

When adding a new metric, one can only process the added metric as above or the whole test: `test.process()`

If processes take too long, when adding a metric, the new ones can be processed as: `test.process(only_new = True)`

In [None]:
help(Test.process)

In [None]:
help(scdata.device.process.timeseries.clean_ts)

In [None]:
metric = {f'PM_1_CLEAN': {'process': 'clean_ts',
                           'kwargs': {'name': 'PM_1', 'limits': [0, 1000], 'window_size': 3}
                        }}

test.devices['14602'].add_metric(metric)
test.process(only_new = True)

In [None]:
traces = {1: {'devices': '14602',
              'channel': 'PM_1',
              'subplot': 1},
          2: {'devices': '14602',
              'channel': 'PM_1_CLEAN',
              'subplot': 1},          
         }

options = {
            'frequency': '1H'
}
test.ts_uplot(traces = traces, options = options)

In [None]:
metric = {f'SCD30_CO2_SMOOTH': {'process': 'rolling_avg',
                           'kwargs': {'name': ['SCD30_CO2'],
                                      'window_size': 10}
                        }}

test.devices['14627'].add_metric(metric)
test.devices['14627'].process(only_new=True)

In [None]:
traces = {1: {'devices': '14627',
              'channel': 'SCD30_CO2',
              'subplot': 1},
          2: {'devices': '14627',
              'channel': 'SCD30_CO2_SMOOTH',
              'subplot': 1}
         }

options = {
            'frequency': '1Min'
}
test.ts_uplot(traces = traces, options = options)