In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# General
import warnings                                  
warnings.filterwarnings('ignore')
from IPython.display import display, HTML
import math
from os.path import join
import sys
# Plots
import matplotlib.pyplot as plot
import matplotlib.colors
from matplotlib import gridspec
%matplotlib inline
import pprint
pp = pprint.PrettyPrinter(indent=4)
# SAF
from src.data.data import *
data = data_wrapper()

cmd_subfolder = data.toolsDirectory
print ('Comand subfolder')
print (cmd_subfolder)

if cmd_subfolder not in sys.path:
    sys.path.insert(0, cmd_subfolder)
    
from backup import *
s3handler = S3handler()
s3handler.get_objects()

In [None]:
deliveries_path = join(data.inventoryDirectory, 'deliveries', 'deliveries.yaml')
inventory_path = join(data.inventoryDirectory, 'deliveries', 'inventory.csv')
s3handler.download(deliveries_path)
s3handler.download(inventory_path)

In [None]:
with open(deliveries_path, 'r') as deliveries_yaml:
    print (f'Loading deliveries file from: {deliveries_path}')
    deliveries = yaml.load(deliveries_yaml, Loader=yaml.SafeLoader)
with open(inventory_path, 'r') as inventory_csv:
    print (f'Loading inventory file from: {inventory_path}')
    inventory = pd.read_csv(inventory_csv).set_index('time')

In [None]:
inventory

In [None]:
# INPUT DATA
# Name of test to be analysed
client = 'ISGLOBAL'
batch = 4
type_file = None
# Percentage of points to be considered NG sensor
limit_errors = 3
# Multiplier for std_dev (sigma) - Normal distribution (99.73%)
limit_confidence_sigma = 3
# t-student confidence level (%)
t_confidence_level = 99
# Use average dispersion or instantaneous
use_instantatenous_dispersion = False
# Min/max date for the analysis
min_date = None
max_date = None
raster = '10Min'
# In case there is a device with lower amount of channels, ignore the missing channels and keep going
ignore_missing_channels = True
# Smooth channels
smooth_channels = True
smooth_number = 5
# Set options below or use defaults from config.yaml 
options = {'clean_na': True, 
           'clean_na_method': 'drop', 
           'load_cached_API': True}

## Get kits from inventory and put it in a comprehensible structure
KITS_IDS = [re.sub('https://smartcitizen.me/kits/','',x) for x in list(inventory.loc[(inventory['delivery'] == client) & (inventory['batch'] == batch), 'platform_url'].values)]
#print (KITS_IDS)
test_names = list(set(list(inventory.loc[(inventory['delivery'] == client) & (inventory['batch'] == batch), 'test'])))
#print (test_names)
data_st = dict()
for test in test_names:
    data_st[test] = dict()
    data_st[test]['dispersion_df'] = pd.DataFrame()
    data_st[test]['kits'] = [re.sub('https://smartcitizen.me/kits/','',x) for x in list(inventory.loc[(inventory['delivery'] == client) & (inventory['test'] == test) & (inventory['delivered'] == 'y'), 'platform_url'].values)]
    data_st[test]['min_date'] = list(set(list(inventory.loc[(inventory['delivery'] == client) & (inventory['test'] == test) & (inventory['delivered'] == 'y'), 'min_validation_date'].dropna().values)))
    data_st[test]['max_date'] = list(set(list(inventory.loc[(inventory['delivery'] == client) & (inventory['test'] == test) & (inventory['delivered'] == 'y'), 'max_validation_date'].dropna().values)))
print (data_st)

In [None]:
for test in test_names:
    data.load_test(test, options)

In [None]:
display(HTML('<h2>Delivery report</h2>'))
display(HTML('<h3>Information</h3>'))
display(HTML('<p><strong>Delivery:</strong> {}</p>'.format(client)))
display(HTML('<p><strong>Batch:</strong> {}</p>'.format(round(batch))))
display(HTML('<p><strong>Total number of devices tested:</strong> {}</p>'.format(len(KITS_IDS))))
for test in test_names:
    delivered_devices = len(list(inventory.loc[(inventory['test'] == test) & (inventory['delivery'] == client) & (inventory['batch'] == batch), 'platform_url']))
    tested_in_test = len(list(inventory.loc[(inventory['test'] == test), 'platform_url']))
    display(HTML('<div style="page-break-before: always"> </div>'))
    display(HTML(f'- {delivered_devices} devices tested in a group of {tested_in_test}. Batch summary:'))
    display(inventory.loc[(inventory['test'] == test) & (inventory['delivery'] == client) & (inventory['batch'] == batch) & (inventory['delivered'] == 'y'), ['platform_name', 'platform_url', 'tested','delivered']])
display(HTML('<p><strong>Test author(s):</strong><br> {}</p><hr>'.format('Óscar González - Victor Barberán')))

### Test Explanation

The devices are co-located for a period of at least 3 days in an **indoor** environment. Devices that show an abnormal behaviour are analysed and replaced if necessary.

#### Conditions

- Indoor environment
- No controlled temperature/humidity setup

#### Additional comments

- The test batch average is considered reference for batch dispersion limits
- The confidence intervals used are those of the normal distribution (sample numbers >30) and of the t-student distribution (sample numbers <30).
- The individual sensors components integrated in the Smart Citizen hardware have their own accuracies and dispersions, for which Smart Citizen cannot assume any liability other than trying to work with the most appropiate selection. The tests we perform are aimed to determine and assume any failures in the sensors and their integration within the Smart Citizen hardware. For more information, please check the <a href="https://docs.smartcitizen.me">official documentation</a> and the datasheets of each of the sensors in the [sensors part](https://docs.smartcitizen.me/Components/Urban%20Sensor%20Board/).

In [None]:
list_channels =  ['PRESS', 'CCS811_ECO2', 'EXT_PM_10', 'NOISE_A', 'TEMP', 'CCS811_VOCS', 'HUM', 'EXT_PM_1', 'LIGHT', 'EXT_PM_25']
min_date = None
max_date = None

warning_displayed = False

for test in test_names:
    print (f'Calculating test:\n {test}')
    kits_delivered = data_st[test]['kits']
    kits_in_test = list(data.tests[test].devices.keys())
    
    print (f'Kits delivered from this test:\n {kits_delivered}')
    print (f'All kits tested in this test:\n {kits_in_test}')
    
    dispersion_df = pd.DataFrame()
    location_test = list(set([data.tests[test].devices[device].location for device in data.tests[test].devices.keys()]))[0]
    print (f'Test location:\n {location_test}')
    
    for channel in list_channels:
        columns = list()
        
        for device in kits_delivered:
            if channel in data.tests[test].devices[device].readings.columns and len(data.tests[test].devices[device].readings.loc[:,channel]) >0 :
                # Important to resample and bfill for unmatching measures
                if smooth_channels:
                    channel_new = data.tests[test].devices[device].readings[channel].resample(raster).bfill().rolling(window=smooth_number).mean()
                    dispersion_df[channel + '-' + device] = channel_new
                else:
                    dispersion_df[channel + '-' + device] = data.tests[test].devices[device].readings[channel].resample(raster).bfill()

                columns.append(channel + '-' + device)
            else:
                display(HTML('<p>WARNING: Device {} does not contain {}</p>'.format(device, channel)))
                warning_displayed = True
    
        try:
            if dispersion_df.index.tzinfo is None: dispersion_df.index = dispersion_df.index.tz_localize('UTC').tz_convert(location_test)
        except:
            traceback.print_exc()
            pass
        # Trim dataset to min and max dates (normally these tests are carried out with _minutes_ of differences)
        if data_st[test]['min_date'] != []: 
            dispersion_df = dispersion_df[dispersion_df.index > pd.to_datetime(data_st[test]['min_date'][0]).tz_localize('UTC').tz_convert(location_test)]
        if data_st[test]['max_date'] != []: 
            dispersion_df = dispersion_df[dispersion_df.index < pd.to_datetime(data_st[test]['max_date'][0]).tz_localize('UTC').tz_convert(location_test)]

        # Calculate Metrics
        dispersion_df[channel + '_AVG'] = dispersion_df.loc[:,columns].mean(skipna=True, axis = 1)
        dispersion_df[channel + '_STD'] = dispersion_df.loc[:,columns].std(skipna=True, axis = 1)
        data_st[test]['dispersion_df'] = dispersion_df
    if data_st[test]['min_date'] != []: print (f"Test min validation date:\n {data_st[test]['min_date'][0]}")
    if data_st[test]['max_date'] != []: print (f"Test max validation date:\n {data_st[test]['max_date'][0]}")

## Plots

In [None]:
# Number of subplots
import matplotlib.cm as cm
plot.style.use('seaborn-white')
number_of_subplots = len(list_channels) 
if number_of_subplots % 2 == 0: cols = 2
else: cols = 3
rows = int(math.ceil(number_of_subplots / cols))

for test in test_names:
    gs = gridspec.GridSpec(rows, cols, wspace=0.9, hspace=0.4)
    fig = plot.figure(figsize=(cols*10,rows*5))
    fig.tight_layout()

    n = 0
    display(HTML('<div style="page-break-before: always"> </div>'))
    display(HTML(f'<h4>Test number: {test_names.index(test) +1} of {len(test_names)}</h4>'))
    display(HTML('<h5>Scatter plots</h5>'))
    # Scatter plots
    for channel in list_channels:
        ax = fig.add_subplot(gs[n])
        n += 1

        for device in data_st[test]['kits']:
            color = cm.viridis.colors[round(data_st[test]['kits'].index(device)*len(cm.viridis.colors)/len(data_st[test]['kits']))]
            plot.scatter(data_st[test]['dispersion_df'][channel + '_AVG'], 
                      data_st[test]['dispersion_df'][channel + '-' + device], 
                      label = device, alpha = 0.3, color = color)
 
        plot.plot([min(data_st[test]['dispersion_df'][channel + '_AVG']), max(data_st[test]['dispersion_df'][channel + '_AVG'])], 
                  [min(data_st[test]['dispersion_df'][channel + '_AVG']), max(data_st[test]['dispersion_df'][channel + '_AVG'])], 
                  'r', label = 'AVG', alpha = 0.9, linewidth = 1.5)

        plot.plot([min(data_st[test]['dispersion_df'][channel + '_AVG']), max(data_st[test]['dispersion_df'][channel + '_AVG'])],
                  [min(data_st[test]['dispersion_df'][channel + '_AVG']-limit_confidence_sigma*data_st[test]['dispersion_df'][channel + '_STD'].mean()), 
                   max(data_st[test]['dispersion_df'][channel + '_AVG']-limit_confidence_sigma*data_st[test]['dispersion_df'][channel + '_STD'].mean())], 
                  'g', label = 'AVG ± σSTD', alpha = 0.8, linewidth = 1.5)
        
        plot.plot([min(data_st[test]['dispersion_df'][channel + '_AVG']), max(data_st[test]['dispersion_df'][channel + '_AVG'])],
                  [min(data_st[test]['dispersion_df'][channel + '_AVG']+limit_confidence_sigma*data_st[test]['dispersion_df'][channel + '_STD'].mean()), 
                   max(data_st[test]['dispersion_df'][channel + '_AVG']+limit_confidence_sigma*data_st[test]['dispersion_df'][channel + '_STD'].mean())], 
                  'g', alpha = 0.8, linewidth = 1.5)
        
        plot.legend(bbox_to_anchor=(1, 0.4), fancybox=True, loc='center left', ncol = 2)
        plot.xlabel('Refererence (avg. of test)')
        plot.ylabel('Individual device (-)')
        plot.title(f"Dispersion analysis for {channel} sensor - STD = {round(data_st[test]['dispersion_df'][channel + '_STD'].mean(), 2)}")
        plot.grid()
    plot.show()

    display(HTML('<div style="page-break-before: always"> </div>'))
    display(HTML('<h5>Time Series plots</h5>'))
    # Time series plot
    gs = gridspec.GridSpec(rows, cols, wspace=0.9, hspace=0.4)
    fig = plot.figure(figsize=(cols*10,rows*5))
    fig.tight_layout()

    n = 0    
    # Make the actual plots
    for channel in list_channels:
        ax = fig.add_subplot(gs[n])
        n += 1

        for device in data_st[test]['kits']:
            color = cm.viridis.colors[round(data_st[test]['kits'].index(device)*len(cm.viridis.colors)/len(data_st[test]['kits']))]
            plot.plot(data_st[test]['dispersion_df'].index, 
                      data_st[test]['dispersion_df'][channel + '-' + device], 
                      label = device, alpha = 0.3, color = color)
 
        plot.plot(data_st[test]['dispersion_df'].index, data_st[test]['dispersion_df'][channel + '_AVG'] , 'r', label = 'AVG', alpha = 0.9, linewidth = 1.5)
        plot.plot(data_st[test]['dispersion_df'].index, data_st[test]['dispersion_df'][channel + '_AVG'] , 'r', label = 'AVG', alpha = 0.9, linewidth = 1.5)

        plot.plot(data_st[test]['dispersion_df'].index,
                  data_st[test]['dispersion_df'][channel + '_AVG']-limit_confidence_sigma*data_st[test]['dispersion_df'][channel + '_STD'].mean(), 
                  'g', label = 'AVG ± σSTD', alpha = 0.8, linewidth = 1.5)

        plot.plot(data_st[test]['dispersion_df'].index,
                  data_st[test]['dispersion_df'][channel + '_AVG']+limit_confidence_sigma*data_st[test]['dispersion_df'][channel + '_STD'].mean(), 
                  'g', alpha = 0.8, linewidth = 1.5)

        plot.legend(bbox_to_anchor=(1, 0.4), fancybox=True, loc='center left', ncol = 2)
        plot.xlabel('Refererence (avg. of test)')
        plot.ylabel('Individual device (-)')
        plot.title(f"Dispersion analysis for {channel} sensor - STD = {round(data_st[test]['dispersion_df'][channel + '_STD'].mean(), 2)}")
        plot.grid()
    plot.show()

## Additional Comment

- FD94: Only published battery - 10615. Shows signs of water corrosion - Replaced full kit by a new one (not PM
sensor)

<div style="text-align:center">
<img src="https://i.imgur.com/f02F9mD.jpg" width=600px>
</div>

In [None]:
# Create report

In [None]:
import subprocess
import traceback


filename = str(datetime.now().year)[-2:] + \
            '{:02d}'.format(datetime.now().month) \
            + '_' + client + '_' + str(round(batch))
print (f'Creating delivery report for {filename}')

subprocess.call(['jupyter', 'nbconvert', 'delivery_report.ipynb',
                 '--config', 'sc_nbconvert_config.py',  '--sc_Preprocessor.expression="show_only_output"',
                 '--to', 'html',  '--TemplateExporter.template_file=./templates/full_sc',
                 '--output-dir=../data/reports/deliveries', f'--output={filename}.html'])

subprocess.call(['open', f'../data/reports/deliveries/{filename}.html'])

In [None]:
# Upload it

In [None]:
cmd_subfolder = data.toolsDirectory
print ('Comand subfolder')
print (cmd_subfolder)

if cmd_subfolder not in sys.path:
    sys.path.insert(0, cmd_subfolder)
    
from backup import *
s3handler = S3handler()
s3handler.get_objects()

filehtml = join(data.dataDirectory, f'reports/deliveries/{filename}.html')
filepdf = join(data.dataDirectory, f'reports/deliveries/{filename}.pdf')

try:
    s3handler.upload(filehtml, expiration=14*24*3600) #expiration in seconds
    s3handler.upload(filepdf, expiration=14*24*3600) #expiration in seconds
except:
    traceback.print_exc()
    pass

In [None]:
# Update deliveries
deliveries[client]['batches'][batch]['report'] = filename
with open(deliveries_path, 'w') as deliveries_yaml:
    print (f'Saving deliveries file from: {deliveries_path}')
    yaml.dump(deliveries, deliveries_yaml)

s3handler.upload(deliveries_path)