# Exploring the UTx000 Extension Beacon Data
(Known as BPEACE2 in the [GH repo](https://github.com/intelligent-environments-lab/utx000))

In [2]:
import warnings
warnings.filterwarnings('ignore')

# Post-Calibration
With access to higher-grade instruments, we start to look at the possibility of post-calibrating the sensors to get more-reliable results.

## Package Import

In [3]:
import os
import sys
sys.path.append('../')

from src.features import build_features
from src.visualization import visualize
from src.reports import make_report

import pandas as pd
import numpy as np

from datetime import datetime, timedelta

import math
import statsmodels.api as sm

import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
from matplotlib.colors import ListedColormap, LinearSegmentedColormap

# Data Import
Here are functions defined to import data from the various sources.

## Beacons
We have to grab the raw data from the beacons during the various calibration events. We can borrow from the ```porcess_beacon.py``` from the ```make_dataset.py``` source file.

In [15]:
def process_beacon(beacon_list=np.arange(0,51,1),start_time=datetime(2020,12,28),end_time=datetime(2020,12,29)):
    '''
    Combines data from all sensors on specified beacons

    Returns True if able to save one dataframe that contains all the data at regular intervals in /data/processed directory
    '''

    beacon_data = pd.DataFrame() # dataframe to hold the final set of data
    beacons_folder='../data/raw/bpeace2/beacon'
    # list of all beacons used in the study
    print('Processing beacon data...\n\tReading for beacon:')
    for beacon in beacon_list:

        # correcting the number since the values <10 have leading zero in directory
        number = f'{beacon:02}'
        print(f'\t{number}')

        file_count = 0
        beacon_folder=f'{beacons_folder}/B{number}'
        for file in os.listdir(f'{beacon_folder}/adafruit'):
            if file.endswith('.csv'):
                file_count += 1
                
        if file_count > 0:
            beacon_df = pd.DataFrame() # dataframe specific to the beacon

            def import_and_merge(csv_dir,number):
                df_list = []
                for file in os.listdir(csv_dir+'/'):
                    try:
                        # reading in raw data (csv for one day at a time) and appending it to the overal dataframe
                        day_df = pd.read_csv(f'{csv_dir}/{file}',
                                            index_col='Timestamp',parse_dates=True,
                                            infer_datetime_format=True)
                        df_list.append(day_df)

                    except Exception as inst:
                        # for whatever reason, some files have header issues - these are moved to purgatory to undergo triage
                        print(f'\t\tIssue encountered while importing {csv_dir}/{file}, skipping...')

                df = pd.concat(df_list).resample('2T').mean() # resampling to 2 minute intervals=

                return df

            # Python3 Sensors
            # ---------------
            py3_df = import_and_merge(f'{beacon_folder}/adafruit', number)

            # Changing NO2 readings on beacons without NO2 readings to CO (wiring issues - see Hagen)
            if int(number) > 27:
                print('\t\tNo NO2 sensor - removing values')

                py3_df[['CO','T_CO','RH_CO']] = py3_df[['NO2','T_NO2','RH_NO2']]
                py3_df[['NO2','T_NO2','RH_NO2']] = np.nan

            py3_df['CO'] /= 1000 # converting ppb measurements to ppm

            # Python2 Sensors
            # ---------------
            py2_df = import_and_merge(f'{beacon_folder}/sensirion', number)

            # merging python2 and 3 sensor dataframes
            beacon_df = py3_df.merge(right=py2_df,left_index=True,right_index=True,how='outer')

            # getting relevant data only
            beacon_df = beacon_df[start_time:end_time]

            # concatenating the data to the overall dataframe
            beacon_df['Beacon'] = beacon
            beacon_data = pd.concat([beacon_data,beacon_df])

    return beacon_data

## APS
There is previous code that we can adapt to import APS data.

In [19]:
def process_aps(file):
    '''
    Inputs:
        inverted: boolean that, if True, means that the rows are the size bins and the columns are the timestamps
        file: string holding the reference data

    Returns a dataframe with columns PM1, PM2.5, and PM10 indexed by timestamp
    '''
    raw_data = pd.read_csv('../data/calibration/'+file,skiprows=6)
    df = raw_data.drop(['Sample #','Aerodynamic Diameter'],axis=1)
    date = df['Date']
    sample_time = df['Start Time']
    datetimes = []
    for i in range(len(date)):
        datetimes.append(datetime.strptime(date[i] + ' ' + sample_time[i],'%m/%d/%y %H:%M:%S'))

    df['Timestamp'] = datetimes
    df = df.set_index(['Timestamp'])
    df = df.iloc[:,:54]
    df = df.drop(['Date','Start Time'],axis=1)

    for column in df.columns:
        df[column] = pd.to_numeric(df[column])

    df['PM_C_1'] = df.iloc[:,:10].sum(axis=1)*1000
    df['PM_C_2p5'] = df.iloc[:,:23].sum(axis=1)*1000
    df['PM_C_10'] = df.iloc[:,:42].sum(axis=1)*1000

    return df

## Licor
CO2 data is gathered in a simple, two-column format.

## CAPS

## NO

## Background 2
Beacon data from the second background calibration event

In [17]:
beacon_b2 = process_beacon(beacon_list=[1, 4, 5, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 28, 29, 30, 32, 34, 36, 40, 44, 46, 48],
                           start_time=datetime(2020,12,28,11,10,0), end_time=datetime(2020,12,28,12,20,0))

Processing beacon data...
	Reading for beacon:
	01
	04
		Issue encountered while importing ../data/raw/bpeace2/beacon/B04/adafruit/b04_2020-03-22.csv, skipping...
	05
	09
	10
	12
		Issue encountered while importing ../data/raw/bpeace2/beacon/B12/adafruit/b12_2020-12-28.csv, skipping...
	13
	14
		Issue encountered while importing ../data/raw/bpeace2/beacon/B14/sensirion/b14_2020-03-22.csv, skipping...
	15
	16
	17
	18
	19
	21
	22
	23
	24
	25
	26
	28
		No NO2 sensor - removing values
	29
		No NO2 sensor - removing values
	30
		No NO2 sensor - removing values
	32
		No NO2 sensor - removing values
	34
		No NO2 sensor - removing values
	36
		No NO2 sensor - removing values
	40
		No NO2 sensor - removing values
	44
		No NO2 sensor - removing values
	46
		No NO2 sensor - removing values
	48
		No NO2 sensor - removing values


In [18]:
beacon_b2.head()

Unnamed: 0_level_0,TVOC,eCO2,Lux,Visible,Infrared,NO2,T_NO2,RH_NO2,CO,T_CO,...,PM_N_0p5,PM_N_1,PM_N_2p5,PM_N_4,PM_N_10,PM_C_1,PM_C_2p5,PM_C_4,PM_C_10,Beacon
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-12-28 11:10:00,,,,,,,,,,,...,,,,,,,,,,1
2020-12-28 11:12:00,,,,,,,,,,,...,,,,,,,,,,1
2020-12-28 11:14:00,,,,,,,,,,,...,,,,,,,,,,1
2020-12-28 11:16:00,,,,,,,,,,,...,,,,,,,,,,1
2020-12-28 11:18:00,0.0,400.0,390.54576,4777812.5,72.9,4018.325,24.0,43.0,1363.75925,24.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [24]:
aps_b2 = process_aps(file="pm_count_12282020.csv")
aps_b2.head()

Unnamed: 0_level_0,<0.523,0.542,0.583,0.626,0.673,0.723,0.777,0.835,0.898,0.965,...,12.86,13.82,14.86,15.96,17.15,18.43,19.81,PM_C_1,PM_C_2p5,PM_C_10
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-12-28 11:08:00,2888,245,322,429,436,438,384,272,232,154,...,0,0,0,0,0,0,0,5800000,6361000,6396000
2020-12-28 11:08:30,2808,234,327,421,464,442,365,291,217,152,...,0,0,0,0,0,0,0,5721000,6288000,6328000
2020-12-28 11:09:00,2820,236,324,402,450,408,376,298,232,173,...,0,0,0,0,0,0,0,5719000,6284000,6322000
2020-12-28 11:09:30,2908,229,333,401,493,413,380,322,256,190,...,0,0,0,0,0,0,0,5925000,6502000,6548000
2020-12-28 11:10:00,2860,224,323,344,450,438,349,265,221,168,...,0,1,0,0,0,0,0,5642000,6224000,6276000
