In [44]:
from datetime import datetime, timedelta, time
import pandas as pd
import math, pickle
from numpy import mean, std
import matplotlib.pyplot as plt
# stdev 

In [69]:
# be careful with the holidays
# at the method is_lective_day()
# 2-21: Carnaval

def to_datetime(t, f="%Y-%m-%d %H:%M"):
    """
    Given string returns datetime object
    Default format of the string: "%Y-%m-%d %H:%M"
    """
    return datetime.strptime(t, f) if isinstance(t, str) else t


def to_text(t, f="%Y-%m-%d %H:%M"):
    """Given datetime, return the string"""
    return t.strftime(f)


def to_unix_time(t):
    """Given datetime, return the unix time."""
    return t.timestamp()


def next_days(s, n=1):
    """Given datatime, yield the next n day/s"""
    s = to_datetime(s)
    while True:
        s += timedelta(days=n)
        yield s


def is_lective_day(now):
    """
    Given the datetime check if `now` is lective.

    (not considering the holidays).
    """
    today = now.weekday()
    return 0 <= today < 5


def is_lective_time(now, s_h=8, s_m=0, e_h=14, e_m=40):
    """
    Given the datetime `now`,
    the start time (s_h, s_m) and the end time (e_h, e_m),
    return whether 'now' is lective time.

    By default, start_time is (8, 0), which represents 8:00,
    and the end time is (14, 40) which represents 14:40.

    
    INPUT:
        now: datetime
        s_h, s_m: int, meaning start_hour and start_minut
        e_h, e_m: int, meaning end_hour and end_minut
    OUTPUT:
        True or False
    """
    start = time(s_h,s_m)
    end   = time(e_h,e_m)
    now   = now.time()
    return start <= now <= end


def is_lective(now):
    """
    Given the datetime `now`,
    return if it is in lective time.
    
    INPUT:
        now: datetime
    OUTPUT:
        True or False
    """
    return is_lective_day(now) and is_lective_time(now)


def get_lective_dates(start, end, step=1):
    """
    Given a start and an end (both dates),
    return a list of strings (lective dates) 
    with step `step`.

    For instance,
        start = '2020-08-17'
        end   = '2020-08-24'
        step  = 2
    Returns ['2020-8-17', '2020-8-19', '2020-8-21']
    
    INPUT:
        start, end: str, dates of the form "%Y-%m-%d"
        step: int
    OUTPUT:
        a list of strings that represents lective dates
    """
    
    assert end >= start and step > 0
    
    f = "%Y-%m-%d"
    start = to_datetime(start[:10], f)
    end = to_datetime(end[:10], f)

    dates = []
    now = start
    gen = next_days(start, step)
    while now <= end:
        if is_lective_day(now):
            dates.append(to_text(now, f))
        now = next(gen)
    return dates


def get_lective_data(path):
    """..."""
    with open(path, 'r', encoding="UTF-8") as f:
        df = pd.read_csv(f)
        labels = list(df.columns)
        data = []
        for row in list(df.values):
            if is_lective(to_datetime(row[0][:16])):
                data.append(row)
    return labels, data


def plot_and_save(X, Y, stdev, xlabel, ylabel, filename, standard=None):
    """..."""
    plt.gcf().set_size_inches(15, 7)
    
    plt.plot(X, Y, 'o-')
    if standard is not None:
        plt.hlines(standard, X[0], X[-1], color='r')
    plt.fill_between(X, [Y[i]-stdev[i] for i in range(len(Y))], [Y[i]+stdev[i] for i in range(len(Y))], alpha=0.1)
    
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    
    plt.savefig(filename)
    plt.clf()
    
    
def plot_all(means, standards, filenames):
    """..."""
    for key in means.keys():
        X = []
        Y = []
        standard = standards[key]
        for date in means[key].keys():
            X.append(date[6:])
            Y.append(means[key][date])
        filename = 'DAY ' + key.split()[0] + (key.split()[1].replace('.', '') if key.split()[0]=='PM' else '')
        plot_and_save(X, Y, 'Date', key, filename, standard)
        
def get_mean(data, labels):
    """
    INPUT:
        data: {'timestamp':[], x:[], y:[], ...}
        
    Given data and the 
    Return a list of means for every day
    """

def find_hora_dia(s, horaris, dies_lectius):
    """..."""
    dt = to_datetime(s[:16])
    hora = dt.time()
    day = dt.weekday()
    if day in dies_lectius:
        for i, (start, end) in enumerate(horaris):
            if start <= hora <= end:
                return i, day
    return -1, -1


def get_time_table(data, horaris, dies_lectius):
    """..."""
    
    time_table = []
    
    for i in range(len(dies_lectius)):
        time_table.append([])
        for j in range(len(horaris)):
            time_table[i].append([])
    
    for row in data:
        hora, dia = find_hora_dia(row[0], horaris, dies_lectius)
        if hora == -1 or dia == -1:
            break
        time_table[dia][hora].append(row[1]) # humidity   

In [47]:
PATH = "2020_2to3.csv"
labels, data = get_lective_data(PATH)
dates = get_lective_dates(data[0][0], data[-1][0])

In [49]:
# to calculate the means and the stdv of every day
means = dict.fromkeys(labels[1:])
stdev = dict.fromkeys(labels[1:])

for label in labels[1:]:
    means[label] = {}
    stdev[label] = {}

for key in means.keys():
    for date in dates:
        means[key][date] = []
        stdev[key][date] = []

for d in data:
    date = d[0][:10]
    for i, label in enumerate(means.keys()):
        if not math.isnan(d[i+1]):
            means[label][date].append(d[i+1])

for key in means.keys():
    for date in dates:
        m = mean(means[key][date])
        d = std(means[key][date])
        means[key][date] = m
        stdev[key][date] = d
means

{'humidity in % (SHT31 - Humidity)': {'2020-02-05': 35.456875,
  '2020-02-06': 42.36057644110276,
  '2020-02-07': 47.40356608478803,
  '2020-02-10': 47.019070351758785,
  '2020-02-11': 50.822330827067674,
  '2020-02-12': 52.881077694235586,
  '2020-02-13': 51.671175000000005,
  '2020-02-14': 54.787580174927115,
  '2020-02-17': 55.64457286432161,
  '2020-02-18': 44.77129999999999,
  '2020-02-19': 42.888825,
  '2020-02-20': 44.349373433583956,
  '2020-02-21': 43.97379396984925,
  '2020-02-24': 42.18912280701754,
  '2020-02-25': 49.01445,
  '2020-02-26': 39.318200000000004,
  '2020-02-27': 42.38399999999999,
  '2020-02-28': 48.69205,
  '2020-03-02': 39.619475,
  '2020-03-03': 34.79685039370079,
  '2020-03-04': 39.85438596491228,
  '2020-03-05': 44.481654135338346,
  '2020-03-06': 34.830100250626565,
  '2020-03-09': 42.6421,
  '2020-03-10': 37.096040100250626,
  '2020-03-11': 47.431027568922296,
  '2020-03-12': 49.03609022556391,
  '2020-03-13': 49.11846153846154},
 'air temperature in ºC 

In [73]:
# To calculate the means every 8 days

means_day = means

means_8days = dict.fromkeys(means.keys())
for key in means_8days.keys():
    means_8days[key] = {}

now = "2020-02-05"
end = "2020-03-13"
f = "%Y-%m-%d"
gen8 = next_days(datetime.strptime(now, f), 8)

tmp_dates = []
while now <= end:
    tmp_dates.append(now)
    now = next(gen8).strftime(f)

for key in means_8days.keys():
    for date in tmp_dates:
        means_8days[key][date] = []

i = 0
j = 0
#print(tmp_dates, tmp)
for label in means.keys():
    for date in means[label].keys():
        if i+1 < len(tmp_dates) and date >= tmp_dates[i+1]:
            i += 1
        means_8days[label][tmp_dates[i]].append(means[label][date])
    i = 0
means_8days

In [70]:
# to plot the mean graphics (with standard deviation) with standard line (if there are any)

keys = [
    'eCO2 in ppm (AMS CCS811 - eCO2)',
    'TVOC in ppb (AMS CCS811 - TVOC)',
    'PM 1 in ug/m3 (PMS5003_AVG-PM1)',
    'PM 10 in ug/m3 (PMS5003_AVG-PM10)',
    'PM 2.5 in ug/m3 (PMS5003_AVG-PM2.5)'
]

standards = [
    1000,
    150, # 0.3/2 * 1000 -> very good limit
    None,
    50,
    25
]

names = [
    'eCO2',
    'TVOC',
    'PM1',
    'PM10',
    'PM25'
]


for i, key in enumerate(keys):
    X = []
    Y = []
    std = []
    standard = standards[i]
    for date in means[key].keys():
        X.append(date[6:])
        Y.append(means[key][date])
        std.append(stdev[key][date])
    filename = 'DAY ' + names[i] + ' (with standard)'
    plot_and_save(X, Y, std, 'Date', key, filename, standard)

<Figure size 1080x504 with 0 Axes>

In [101]:
# to get the time_table means
def find_hora_dia(s, horaris, dies_lectius):
    """..."""
    dt = to_datetime(s[:16])
    hora = dt.time()
    dia = dt.weekday()
    if dia in dies_lectius:
        for i, (start, end) in enumerate(horaris):
            if start <= hora <= end:
                return i, dia
    return None

keys = [
    'eCO2 in ppm (AMS CCS811 - eCO2)',
    'TVOC in ppb (AMS CCS811 - TVOC)',
    'PM 1 in ug/m3 (PMS5003_AVG-PM1)',
    'PM 10 in ug/m3 (PMS5003_AVG-PM10)',
    'PM 2.5 in ug/m3 (PMS5003_AVG-PM2.5)'
]

horaris = [ (time(s_h, s_m), time(e_h, e_m)) for s_h, s_m, e_h, e_m in 
    [
        ( 8, 0,  8,45),
        ( 8,45,  9,30),
        ( 9,30, 10,15),
        (10,45, 11,30),
        (11,30, 12,15),
        (12,15, 13, 0),
        (13,10, 13,55),
        (13,55, 14,40)
    ]
]

dies_lectius = list(range(5))

time_table = dict.fromkeys(keys)
for key in keys:
    time_table[key] = []
    for i in range(len(horaris)):
        time_table[key].append([])
        for j in range(len(dies_lectius)):
            time_table[key][i].append([])

time_table

{'eCO2 in ppm (AMS CCS811 - eCO2)': [[[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []]],
 'TVOC in ppb (AMS CCS811 - TVOC)': [[[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []]],
 'PM 1 in ug/m3 (PMS5003_AVG-PM1)': [[[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []]],
 'PM 10 in ug/m3 (PMS5003_AVG-PM10)': [[[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []],
  [[], [], [], [], []]],
 'PM 2.5 in ug/m3 (PMS5003_AVG-PM2.5)': [[[], [], [], [], []],
  [[], [], [], [], []],

In [103]:
for row in data:
    pack = find_hora_dia(row[0], horaris, dies_lectius)
    # print('date:', row[0], 'pack', pack)
    if pack is None:
        continue
    hora, dia = pack
    for i, number in enumerate([5, 6, 8, 10, 11]):
        if not math.isnan(row[number]):
            time_table[keys[i]][hora][dia].append(row[number]) # humidity
time_table

{'eCO2 in ppm (AMS CCS811 - eCO2)': [[[744.0,
    744.0,
    752.0,
    749.0,
    761.0,
    761.0,
    770.0,
    795.0,
    770.0,
    783.0,
    793.0,
    783.0,
    802.0,
    791.0,
    791.0,
    802.0,
    798.0,
    785.0,
    785.0,
    798.0,
    776.0,
    787.0,
    798.0,
    785.0,
    785.0,
    752.0,
    770.0,
    770.0,
    770.0,
    761.0,
    761.0,
    761.0,
    752.0,
    763.0,
    752.0,
    752.0,
    748.0,
    748.0,
    748.0,
    748.0,
    748.0,
    748.0,
    737.0,
    735.0,
    735.0,
    735.0,
    1093.0,
    1032.0,
    1044.0,
    1056.0,
    1056.0,
    1068.0,
    1056.0,
    1068.0,
    1081.0,
    1093.0,
    1113.0,
    1118.0,
    1134.0,
    1150.0,
    1163.0,
    1184.0,
    1184.0,
    1184.0,
    1181.0,
    1181.0,
    1198.0,
    1195.0,
    1212.0,
    1212.0,
    1212.0,
    1212.0,
    1231.0,
    1231.0,
    1231.0,
    1245.0,
    1245.0,
    1245.0,
    1313.0,
    1299.0,
    1302.0,
    1285.0,
    1285.0,
    1274.0,
   

In [105]:
for key in keys:
    for i in range(len(horaris)):
        for j in range(len(dies_lectius)):
            time_table[key][i][j] = mean(time_table[key][i][j])
time_table

{'eCO2 in ppm (AMS CCS811 - eCO2)': [[1062.8942731277532,
   1190.3508771929824,
   1525.1106194690265,
   1258.261992619926,
   1225.345864661654],
  [1106.3899082568807,
   1113.2981651376147,
   1409.5156950672647,
   1282.0981132075472,
   1265.8868778280544],
  [1007.0675675675676,
   1072.099099099099,
   1249.0452488687783,
   1177.314393939394,
   1100.9772727272727],
  [933.7955555555556,
   1034.9144144144145,
   979.4444444444445,
   1051.6531365313654,
   997.4559585492228],
  [891.8045454545454,
   895.45197740113,
   901.6541353383459,
   1122.6654135338347,
   922.2487046632124],
  [860.131221719457,
   772.2134831460675,
   867.2528301886792,
   981.996282527881,
   807.7747747747748],
  [883.7300884955753,
   757.4756756756757,
   934.7728937728938,
   864.6802973977696,
   960.4254385964912],
  [715.5739910313902,
   765.4215246636771,
   899.2621722846442,
   786.4719101123595,
   856.8603603603603]],
 'TVOC in ppb (AMS CCS811 - TVOC)': [[105.77973568281938,
   143.1

In [106]:
for key in keys:
    for i in range(len(horaris)):
        for j in range(len(dies_lectius)):
            time_table[key][i][j] = round(time_table[key][i][j], 1)

In [107]:
time_table

{'eCO2 in ppm (AMS CCS811 - eCO2)': [[1062.9, 1190.4, 1525.1, 1258.3, 1225.3],
  [1106.4, 1113.3, 1409.5, 1282.1, 1265.9],
  [1007.1, 1072.1, 1249.0, 1177.3, 1101.0],
  [933.8, 1034.9, 979.4, 1051.7, 997.5],
  [891.8, 895.5, 901.7, 1122.7, 922.2],
  [860.1, 772.2, 867.3, 982.0, 807.8],
  [883.7, 757.5, 934.8, 864.7, 960.4],
  [715.6, 765.4, 899.3, 786.5, 856.9]],
 'TVOC in ppb (AMS CCS811 - TVOC)': [[105.8, 143.1, 288.9, 196.9, 130.5],
  [113.8, 124.1, 210.5, 172.9, 149.3],
  [99.8, 108.5, 143.6, 141.3, 112.8],
  [90.7, 96.6, 88.4, 108.5, 91.1],
  [81.2, 75.1, 75.9, 136.7, 79.0],
  [70.2, 56.3, 70.7, 92.5, 61.7],
  [73.2, 54.0, 82.2, 70.2, 84.9],
  [47.6, 55.2, 76.0, 58.4, 69.5]],
 'PM 1 in ug/m3 (PMS5003_AVG-PM1)': [[14.7, 10.9, 7.9, 10.4, 10.0],
  [16.0, 8.1, 4.5, 10.1, 10.5],
  [24.2, 12.6, 9.8, 9.6, 12.1],
  [9.4, 13.6, 11.4, 9.4, 14.4],
  [11.6, 14.7, 7.1, 17.3, 10.6],
  [11.8, 10.8, 11.7, 20.9, 7.7],
  [13.2, 7.8, 6.3, 18.2, 12.3],
  [9.9, 7.5, 5.0, 18.1, 9.3]],
 'PM 10 in ug/m3 