In [1]:
import plotly.graph_objects as go
import plotly
import plotly.io as pio
import pandas as pd
import numpy as np
import io
import os
import requests
from itertools import accumulate
import datetime
from scipy.stats import pearsonr
from scipy.stats import spearmanr

In [2]:
def get_url(location_name):
    '''Gets the csv location data from radwatch downloads. '''
    
    url = 'https://radwatch.berkeley.edu/test/dosenet/' + location_name + '.csv'

    header = {
      "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
      "X-Requested-With": "XMLHttpRequest"
    }
    
    s=requests.get(url,headers=header).text
    raw_cpm_data = pd.read_csv(io.StringIO(s))
    
    return raw_cpm_data

In [3]:
def compare_length(dataset1, dataset2):
    max1 = max(dataset1)
    min1 = min(dataset1)
    max2 = max(dataset2)
    min2 = min(dataset2)
    fmax = None
    fmin = None
    
    if max1 > max2:
        if min1 > min2:
            fmin = min1
            fmax = max2
        if min1 < min2:
            fmin = min2
            fmax = max2
        if min1 == min2:
            fmin = min1
            fmax = max2
    if max1 < max2:
        if min1 > min2:
            fmin = min1
            fmax = max1
        if min1 < min2:
            fmin = min2
            fmax = max1
        if min1 == min2:
            fmin = min2
            fmax = max1
    if max1 == max2:
        if min1 > min2:
            fmin = min1
            fmax = max1
        if min1 < min2:
            fmin = min2
            fmax = max1
        if min1 == min2:
            fmin = min1
            fmax = max2
        
    return fmin, fmax

In [4]:
explor_data = get_url('exploratorium_adc')

In [5]:
explor_data

Unnamed: 0,deviceTime_utc,deviceTime_local,deviceTime_unix,co2_ppm,noise,error_flag
0,2020-02-25 02:06:29+00:00,2020-02-24 18:06:29-08:00,1.582596e+09,763.73,1.38,
1,2020-02-25 02:01:29+00:00,2020-02-24 18:01:29-08:00,1.582596e+09,720.23,1.41,
2,2020-02-25 01:56:34+00:00,2020-02-24 17:56:34-08:00,1.582596e+09,749.06,1.40,
3,2020-02-25 01:56:29+00:00,2020-02-24 17:56:29-08:00,1.582596e+09,749.06,1.40,
4,2020-02-25 01:51:34+00:00,2020-02-24 17:51:34-08:00,1.582595e+09,750.13,1.38,
...,...,...,...,...,...,...
123373,2020-06-16 23:41:22+0000,2020-06-16 16:41:22-0700,1.592351e+09,625.92,1.32,0.0
123374,2020-06-16 23:46:22+0000,2020-06-16 16:46:22-0700,1.592351e+09,626.40,1.34,0.0
123375,2020-06-16 23:51:22+0000,2020-06-16 16:51:22-0700,1.592351e+09,630.16,1.33,0.0
123376,2020-06-16 23:56:22+0000,2020-06-16 16:56:22-0700,1.592352e+09,624.89,1.35,0.0


In [6]:
explor_data.sort_values('deviceTime_utc', inplace = True)

In [7]:
indexNames = explor_data[explor_data['co2_ppm'] >= 4000].index

In [8]:
explor_data.drop(indexNames, inplace = True)

In [9]:
beacon = pd.read_csv(r'testing_explor_co2.csv')

In [10]:
beacon

Unnamed: 0,local_timestamp,datetime,node_id,pm_pct_fs,epoch,julian_day,CO2_QC_level,PM_QC_level,CO2_ppm,PM_ug/m3
0,2019-10-06 00:00:00,2019-10-06 07:00:00,48,1.039950,1.570345e+09,279.291667,1a,2,454.0,14.299968
1,2019-10-06 01:00:00,2019-10-06 08:00:00,48,1.306160,1.570349e+09,279.333333,1a,2,461.9,19.366831
2,2019-10-06 02:00:00,2019-10-06 09:00:00,48,0.604510,1.570352e+09,279.375000,1a,2,439.6,6.178931
3,2019-10-06 03:00:00,2019-10-06 10:00:00,48,0.459710,1.570356e+09,279.416667,1a,2,439.8,3.364179
4,2019-10-06 04:00:00,2019-10-06 11:00:00,48,0.497530,1.570360e+09,279.458333,1a,2,437.1,4.067568
...,...,...,...,...,...,...,...,...,...,...
6997,2020-07-28 06:00:00,2020-07-28 13:00:00,48,1.104736,1.595941e+09,210.541667,1a,1a,410.4,15.769889
6998,2020-07-28 07:00:00,2020-07-28 14:00:00,48,1.201766,1.595945e+09,210.583333,1a,1a,411.5,17.603759
6999,2020-07-28 08:00:00,2020-07-28 15:00:00,48,1.131931,1.595948e+09,210.625000,1a,1a,409.1,16.283887
7000,2020-07-28 09:00:00,2020-07-28 16:00:00,48,0.609216,1.595952e+09,210.666667,1a,1a,408.7,6.404558


In [11]:
"""fig = go.Figure()
fig.add_scatter(
        x = explor_data['deviceTime_local'],
        y = explor_data['co2_ppm'],
        name = 'explor')
fig.add_scatter(
        x = beacon['local_timestamp'],
        y = beacon['CO2_ppm'],
        name = 'beacon')
fig.update_layout(plot_bgcolor='white',width=1000, height=450, title = 'Line chart')
fig.update_yaxes(title="air quality",titlefont=dict(color='black', size=20),
                showgrid=False,tickcolor='black',
                tickfont=dict(color='black', size=16))
fig.update_xaxes(title="Time (local) ",titlefont=dict(color='black', size=20),
                linecolor='black',tickfont=dict(color='black',size=12))
fig.update_layout(legend_orientation="h",
                legend=dict(x=0,y=-.2, font=dict(size=13)))
fig.update_layout(barmode='group')
    
fig"""

'fig = go.Figure()\nfig.add_scatter(\n        x = explor_data[\'deviceTime_local\'],\n        y = explor_data[\'co2_ppm\'],\n        name = \'explor\')\nfig.add_scatter(\n        x = beacon[\'local_timestamp\'],\n        y = beacon[\'CO2_ppm\'],\n        name = \'beacon\')\nfig.update_layout(plot_bgcolor=\'white\',width=1000, height=450, title = \'Line chart\')\nfig.update_yaxes(title="air quality",titlefont=dict(color=\'black\', size=20),\n                showgrid=False,tickcolor=\'black\',\n                tickfont=dict(color=\'black\', size=16))\nfig.update_xaxes(title="Time (local) ",titlefont=dict(color=\'black\', size=20),\n                linecolor=\'black\',tickfont=dict(color=\'black\',size=12))\nfig.update_layout(legend_orientation="h",\n                legend=dict(x=0,y=-.2, font=dict(size=13)))\nfig.update_layout(barmode=\'group\')\n    \nfig'

In [12]:
etch_roof = get_url('etch_roof_adc')

In [13]:
etch_roof

Unnamed: 0,deviceTime_utc,deviceTime_local,deviceTime_unix,co2_ppm,noise,error_flag
0,2020-02-24 23:15:40+00:00,2020-02-24 15:15:40-08:00,1582586140,489.59,3.28,
1,2020-02-24 23:10:40+00:00,2020-02-24 15:10:40-08:00,1582585840,504.69,3.32,
2,2020-02-24 23:05:40+00:00,2020-02-24 15:05:40-08:00,1582585540,502.81,3.27,
3,2020-02-24 23:00:45+00:00,2020-02-24 15:00:45-08:00,1582585245,508.80,3.18,
4,2020-02-24 23:00:40+00:00,2020-02-24 15:00:40-08:00,1582585240,508.80,3.18,
...,...,...,...,...,...,...
145835,2017-11-17 19:00:49+00:00,2017-11-17 11:00:49-08:00,1510945249,563.10,3.04,
145836,2017-11-17 18:55:49+00:00,2017-11-17 10:55:49-08:00,1510944949,563.72,3.04,
145837,2017-11-17 18:50:49+00:00,2017-11-17 10:50:49-08:00,1510944649,572.51,2.98,
145838,2017-11-17 18:45:49+00:00,2017-11-17 10:45:49-08:00,1510944349,580.67,2.91,


In [14]:
etch_roof.sort_values('deviceTime_utc')

Unnamed: 0,deviceTime_utc,deviceTime_local,deviceTime_unix,co2_ppm,noise,error_flag
145839,2017-11-17 18:40:49+00:00,2017-11-17 10:40:49-08:00,1510944049,544.68,2.99,
145838,2017-11-17 18:45:49+00:00,2017-11-17 10:45:49-08:00,1510944349,580.67,2.91,
145837,2017-11-17 18:50:49+00:00,2017-11-17 10:50:49-08:00,1510944649,572.51,2.98,
145836,2017-11-17 18:55:49+00:00,2017-11-17 10:55:49-08:00,1510944949,563.72,3.04,
145835,2017-11-17 19:00:49+00:00,2017-11-17 11:00:49-08:00,1510945249,563.10,3.04,
...,...,...,...,...,...,...
4,2020-02-24 23:00:40+00:00,2020-02-24 15:00:40-08:00,1582585240,508.80,3.18,
3,2020-02-24 23:00:45+00:00,2020-02-24 15:00:45-08:00,1582585245,508.80,3.18,
2,2020-02-24 23:05:40+00:00,2020-02-24 15:05:40-08:00,1582585540,502.81,3.27,
1,2020-02-24 23:10:40+00:00,2020-02-24 15:10:40-08:00,1582585840,504.69,3.32,


In [15]:
etch_roof_beacon = pd.read_csv('etch_roof_beacon.csv')

In [16]:
indexNames = etch_roof_beacon[etch_roof_beacon['CO2_ppm'] == -999].index

In [17]:
etch_roof_beacon.drop(indexNames, inplace = True)

In [18]:
etch_roof_beacon.drop(['local_timestamp', 'datetime', 'node_id', 'pm_pct_fs', 'julian_day', 'CO2_QC_level', "PM_QC_level", 'PM_ug/m3'], axis = 1, inplace = True)

In [19]:
etch_roof.drop(['deviceTime_utc', 'deviceTime_local', 'error_flag', 'noise'], axis = 1, inplace = True)

In [20]:
etch_roof_beacon.dropna(axis = 0, inplace = True)

In [21]:
etch_roof.dropna(axis = 0, inplace = True)

In [22]:
etch_roof_beacon

Unnamed: 0,epoch,CO2_ppm
0,1.510906e+09,410.6
1,1.510909e+09,411.0
2,1.510913e+09,411.3
3,1.510916e+09,408.7
4,1.510920e+09,408.2
...,...,...
12146,1.566047e+09,408.9
12147,1.566050e+09,409.1
12149,1.566061e+09,413.3
12151,1.568657e+09,457.3


In [23]:
etch_roof

Unnamed: 0,deviceTime_unix,co2_ppm
0,1582586140,489.59
1,1582585840,504.69
2,1582585540,502.81
3,1582585245,508.80
4,1582585240,508.80
...,...,...
145835,1510945249,563.10
145836,1510944949,563.72
145837,1510944649,572.51
145838,1510944349,580.67


In [24]:
d_unix_list = etch_roof['deviceTime_unix']
f_unix_list = etch_roof_beacon['epoch']

In [25]:
fmin, fmax = compare_length(d_unix_list, f_unix_list)
d_unix_list = [x for x in d_unix_list if x <= fmax and x >= fmin]
f_unix_list = [x for x in f_unix_list if x <= fmax and x >= fmin]

In [26]:
etch_roof = etch_roof[etch_roof['deviceTime_unix'].isin(d_unix_list)]
etch_roof_beacon = etch_roof_beacon[etch_roof_beacon['epoch'].isin(f_unix_list)]

In [27]:
d_list = pd.to_datetime(etch_roof['deviceTime_unix'], unit = 's')
etch_roof.insert(loc = 0, column = 'unix_time', value = d_list)

In [28]:
f_list = pd.to_datetime(etch_roof_beacon['epoch'], unit = 's')
etch_roof_beacon.insert(loc = 0, column = 'unix_time', value = f_list)

In [29]:
etch_roof = etch_roof.reset_index(drop=True)
etch_roof_beacon = etch_roof_beacon.reset_index(drop=True)

In [30]:
etch_roof['date_time'] = pd.to_datetime(etch_roof['unix_time'],unit='s')
etch_roof_beacon['date_time'] = pd.to_datetime(etch_roof_beacon['unix_time'],unit='s')

In [31]:
etch_roof

Unnamed: 0,unix_time,deviceTime_unix,co2_ppm,date_time
0,2019-09-09 18:55:40,1568055340,465.90,2019-09-09 18:55:40
1,2019-09-09 18:50:40,1568055040,464.63,2019-09-09 18:50:40
2,2019-09-09 18:45:40,1568054740,465.52,2019-09-09 18:45:40
3,2019-09-09 18:40:40,1568054440,461.59,2019-09-09 18:40:40
4,2019-09-09 18:35:40,1568054140,460.69,2019-09-09 18:35:40
...,...,...,...,...
145476,2017-11-17 19:00:49,1510945249,563.10,2017-11-17 19:00:49
145477,2017-11-17 18:55:49,1510944949,563.72,2017-11-17 18:55:49
145478,2017-11-17 18:50:49,1510944649,572.51,2017-11-17 18:50:49
145479,2017-11-17 18:45:49,1510944349,580.67,2017-11-17 18:45:49


In [32]:
etch_roof_beacon

Unnamed: 0,unix_time,epoch,CO2_ppm,date_time
0,2017-11-17 19:00:00,1.510945e+09,420.9,2017-11-17 19:00:00
1,2017-11-17 20:00:00,1.510949e+09,421.9,2017-11-17 20:00:00
2,2017-11-17 21:00:00,1.510952e+09,421.7,2017-11-17 21:00:00
3,2017-11-17 23:00:00,1.510960e+09,419.0,2017-11-17 23:00:00
4,2017-11-18 00:00:00,1.510963e+09,413.7,2017-11-18 00:00:00
...,...,...,...,...
9277,2019-08-17 13:00:00,1.566047e+09,408.9,2019-08-17 13:00:00
9278,2019-08-17 14:00:00,1.566050e+09,409.1,2019-08-17 14:00:00
9279,2019-08-17 17:00:00,1.566061e+09,413.3,2019-08-17 17:00:00
9280,2019-09-16 18:00:00,1.568657e+09,457.3,2019-09-16 18:00:00


In [33]:
etch_roof = etch_roof.resample('d', on='date_time')['co2_ppm'].mean().reset_index()
etch_roof_beacon = etch_roof_beacon.resample('d', on='date_time')['CO2_ppm'].mean().reset_index()

In [34]:
etch_roof

Unnamed: 0,date_time,co2_ppm
0,2017-11-17,420.369048
1,2017-11-18,457.382635
2,2017-11-19,462.949514
3,2017-11-20,624.208259
4,2017-11-21,661.599110
...,...,...
657,2019-09-05,574.697292
658,2019-09-06,585.114236
659,2019-09-07,588.940868
660,2019-09-08,577.558658


In [35]:
etch_roof_beacon

Unnamed: 0,date_time,CO2_ppm
0,2017-11-17,420.875000
1,2017-11-18,421.370000
2,2017-11-19,437.280952
3,2017-11-20,445.422727
4,2017-11-21,452.352632
...,...,...
664,2019-09-12,
665,2019-09-13,
666,2019-09-14,
667,2019-09-15,


In [36]:
all_data = pd.merge(etch_roof, etch_roof_beacon)

In [37]:
all_data.dropna(axis=0, inplace = True)

In [51]:
fig = go.Figure()
fig.add_scatter(
        x = all_data['CO2_ppm'],
        y = all_data['co2_ppm'], mode = 'markers')

fig.update_layout(plot_bgcolor='white',width=1000, height=450, title = 'Beacon VS Dosenet Daily Averages, Pearson r = %s, Spearman r = %s'%(pcorr, spcorr))
fig.update_yaxes(title="Dosenet Results",titlefont=dict(color='black', size=20),
                showgrid=False,tickcolor='black',
                tickfont=dict(color='black', size=16))
fig.update_xaxes(title="BEACON Data ",titlefont=dict(color='black', size=20),
                linecolor='black',tickfont=dict(color='black',size=12))
fig.update_layout(legend_orientation="h",
                legend=dict(x=0,y=-.2, font=dict(size=13)))
fig.update_layout(barmode='group')
    
fig


In [50]:
pcorr, _ = pearsonr(all_data['co2_ppm'], all_data['CO2_ppm'])
spcorr, _ = spearmanr(all_data['co2_ppm'], all_data['CO2_ppm'])
pcorr = round(pcorr, 3)
spcorr = round(spcorr, 3)
print('Pearson r: %s' %pcorr)
print('Spearman r: %s' %spcorr)

Pearson r: 0.23
Spearman r: 0.277


In [48]:
fig2 = go.Figure()
fig2.add_trace(go.Histogram(
        x = all_data['CO2_ppm'], name = 'beacon'))
fig2.add_trace(go.Histogram
        (x = all_data['co2_ppm'], name = 'doesnet'))

fig2.update_layout(plot_bgcolor='white',width=1000, height=450, title = 'Beacon VS Dosenet Histogram')
fig2.update_yaxes(title="Counts",titlefont=dict(color='black', size=20),
                showgrid=False,tickcolor='black',
                tickfont=dict(color='black', size=16))
fig2.update_xaxes(title="PPM",titlefont=dict(color='black', size=20),
                linecolor='black',tickfont=dict(color='black',size=12))
fig2.update_layout(legend_orientation="h",
                legend=dict(x=0,y=-.2, font=dict(size=13)))
fig2.update_layout(barmode='group')
    
fig2

In [47]:
print(np.mean(all_data['co2_ppm']))

529.4049006784105


In [52]:
print(os.getcwd())

C:\Users\Richie Woo\Documents\python-scripts


In [58]:
os.chdir(r'C:\Users\Richie Woo\Documents\python-scripts\processed_data\co2_graphs')

In [60]:
fig2.write_html('etch_histogram.html')
fig.write_html('etch_scatter.html')