# Load data, plot, and export
This example will load data from the SmartCitizen API, do some plots, and export it in CSV format

## Initialise framework

In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
from scdata.data import Data
data = Data()

## Define the devices

The sensor readings will be stored, for each device (a SCK, station, or other), into a test. That test will need a name.

Devices also have predefined or custom blueprints, that can be found in `src/data/interim/blueprints.yaml`.


In [4]:
# Here we give a name to the test that we will put the data in
testname = 'EXAMPLEU'

# Here we define the list of sensor ids we want to download
devices = ['10604']

blueprint = 'sck_21'

# Here we specify the different options for data load
options = {'frequency': '5Min',
            'min_date': '2020-01-07',
            'max_date': '2020-01-25',
            'clean_na': "drop"
            }

# Other example for options
# options = {'frequency': '1Min',
#             'min_date': None,
#             'max_date': None,
#             'clean_na': None
#             }

In [5]:
# Add devices to list. You don't need to touch anything here
from scdata.data.device import Device
tdevices = list()
for device in devices:
    tdevices.append(Device(blueprint = blueprint, descriptor = {'id': device, 
                                                                'source': 'api'
                                                                }))

# Load the data

The test defined above, with all the devices, will be stored in an overarching data structure conveniently called `Data`. This exposes methods to load, export and process tests and devices. It will also create a folder in the `data/processed` directory where we will store the test information, and cache some data for the devices to speed things up next time you load the test

In [6]:
# We load the data based on the input above
data.load_devices(testname, tdevices, options = options)

[32m[SUCCESS]: [0mAdd device 10604 OK
Creating new test
[32m[SUCCESS]: [0mTest update Finished
Processing files
Test 2020-04_INT_EXAMPLEU path: /Users/macoscar/Documents/04_Projects/02_FabLab/01_SmartCitizen/01_Repositories/DataAnalysis/smartcitizen-data/data/processed/2020/04/2020-04_INT_EXAMPLEU
[32m[SUCCESS]: [0mTest creation finished
Loading test 2020-04_INT_EXAMPLEU
Using options: {'cached_data_margin': 1, 'load_cached_api': True, 'store_cached_api': True, 'clean_na': 'drop', 'frequency': '5Min', 'min_date': '2020-01-07', 'max_date': '2020-01-25'}
---------------------------
Loading device 10604
Device 10604 is located at 41.396867, 2.194351
Device 10604 timezone is Europe/Madrid
Device 10604 has last reading at 2020-01-20T11:50:19Z
Checking if we can load cached data
[31m[ERROR]: [0mFile not found
Downloading device from API
First reading requested: 2020-01-07 01:00:00+01:00
Last reading requested: 2020-01-25 01:00:00+01:00
Requesting up to max available date in the API 2

'2020-04_INT_EXAMPLEU'

In [7]:
data.process()

---------------------------
Processing device 10604
Processing CCS811_ECO2_CLEAN
Loading clean_ts from scdata.process.formula
Processing CCS811_VOCS_CLEAN
Loading clean_ts from scdata.process.formula
Processing PM_10_CLEAN
Loading clean_ts from scdata.process.formula
Processing PM_1_CLEAN
Loading clean_ts from scdata.process.formula
Processing PM_25_CLEAN
Loading clean_ts from scdata.process.formula
[32m[SUCCESS]: [0mTest 2020-04_INT_EXAMPLEU processed OK


True

## Data structure
Here we show how the data is structure in the data object.

First the tests:

In [None]:
tests = list(data.tests.keys())
tests

Then, the devices inside a test

In [None]:
devices = list(data.tests[tests[0]].devices.keys())
devices

Then the data inside the devices

In [None]:
data.tests[tests[0]].devices[devices[0]].readings.head(5)

## Export the whole test
This will export all the devices in a test in csv file format, in the specified path

In [None]:
testname = tests[0]
# Example exporting only the first. We can iterate over devices with a for loop and export them all in separate CSV files
data.export(testname, path = '/path/to/folder', forced_overwrite = True)

## Export one device
This will export one device in csv format, in the specified path

In [None]:
data.tests[tests[0]].devices['10604'].export(path = '/path/to/folder', forced_overwrite = True)

## Explore data

Make some plots about the data and export them - put a file name and the export path in the field

In [None]:
from src.visualization.visualization import Plot

All the devices in the test with name `testname`:

In [None]:
plot_description = {"type": "timeseries",
                    "library": "matplotlib",
                    "data": {"test": testname,
                                    "traces": {"1": {"device": 'all',
                                                    "channel" : "PM_1",
                                                    "subplot": 1},
                                              "2": {"device": 'all',
                                                    "channel" : "PM_25",
                                                    "subplot": 2},
                                              "3": {"device": 'all',
                                                    "channel" : "PM_10",
                                                    "subplot": 3}}},
                    "options": {"show_plot": True,
                                "export_path": None, 
                                "file_name": None,
                                "frequency": '3Min',
                                "clean_na": False,
                                "clean_na_method": '',
                                "max_date": None,
                                "min_date": None},
                    "formatting": {"xlabel": "Date",
                                   "ylabel": {1: "PM1 (ug/m3)", 2: "PM2.5 (ug/m3)", 3: "PM10 (ug/m3)"},
                                   "yrange": {1: [0, 50], 2: [0, 50], 3: [0, 50]},
                                   "title": "PM Sensor tests",
                                   "sharex":True,
                                   "grid": True,
                                   "height": 20,
                                   "width": 15,
                                   "style": "seaborn-whitegrid"}
                    }

plot = Plot(plot_description)
plot.plot(data)
# Uncomment below to export the plot
# plot.export()

Only one device, with different metrics:

In [None]:
plot_description = {"type": "timeseries",
                    "library": "matplotlib",
                    "data": {"test": testname,
                                    "traces": {"1": {"device": '10604',
                                                    "channel" : "PM_1",
                                                    "subplot": 1},
                                              "2": {"device": '10604',
                                                    "channel" : "PM_25",
                                                    "subplot": 2},
                                              "3": {"device": '10604',
                                                    "channel" : "PM_10",
                                                    "subplot": 3}}},
                    "options": {"show_plot": True,
                                "export_path": None, 
                                "file_name": None,
                                "frequency": '3Min',
                                "clean_na": False,
                                "clean_na_method": '',
                                "max_date": None,
                                "min_date": None},
                    "formatting": {"xlabel": "Time (-)",
                                   "ylabel": {1: "PM1 (ug/m3)", 2: "PM2.5 (ug/m3)", 3: "PM10 (ug/m3)"},
                                   "yrange": {1: [0, 50], 2: [0, 50], 3: [0, 50]},
                                   "title": "PM Sensor tests",
                                   "sharex":True,
                                   "grid": True,
                                   "height": 20,
                                   "width": 15,
                                   "style": "seaborn-whitegrid"}
                    }

plot = Plot(plot_description)
plot.plot(data)
# Uncomment below to export the plot
# plot.export()