In [None]:
# imports
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
import matplotlib.ticker as ticker
import matplotlib.dates as mdates
from datetime import datetime
import sys
import seaborn as sns
import calplot
import csv

In [None]:
print("Starting data validation...")

In [None]:
import matplotlib as mpl
mpl.rcParams['agg.path.chunksize'] = 10000

In [None]:
print("Reading data files")
path_plus = "data/plus/"
path_regular = "data/regular/"

files_plus = [f for f in glob.glob(path_plus + "*.csv")]
files_regular = [f for f in glob.glob(path_regular + "*.csv")]

In [None]:
# read files into dataframes

df_plus = []
for file in files_plus:
    df_plus.append(pd.read_csv(file))

df_regular = []
for file in files_regular:
    df_regular.append(pd.read_csv(file))

In [None]:
actual_temp_file = "data/actual_temp.csv"
df_actual_temp = pd.read_csv(actual_temp_file)

actual_humidity_file = "data/actual_humidity.csv"
df_actual_humidity = pd.read_csv(actual_humidity_file)

In [None]:
def plot_chart(df, start_date, end_date, fig, ax, target_column='temperature', label="NONE"):
    df['date_time'] = pd.to_datetime(df['date_time'])
    df.index = pd.to_datetime(df['date_time'])
    
    date_mask = (df.index >= start_date) & (df.index <= end_date)    
    df_ranged = df.loc[date_mask]  
    sns.lineplot(x = df_ranged.index.date, y = df_ranged[target_column], ax = ax, label = label)

In [None]:
# target_rpi_id = [30]
target_rpi_id = [20, 21, 37, 39, 41, 45, 50]
label_dict = {
    "20" : "Device #20: House B - Bedroom",
    "21" : "Device #21: House B - Kitchen",
    "22" : "Device #22: House C - Room B",
    "23" : "Device #23: House C - Room A",
    "30" : "Device #30: House C - Room A",
    "37" : "Device #37: House A - Guest Room",
    "39" : "Device #39: House A - Kitchen",
    "41" : "Device #41: House A - Guest Room",
    "45" : "Device #45: House A - Living Room",
    "50" : "Device #50: House A - Master Bedroom"
}

start_date = '2021-9-15 00:00:00'
end_date = '2021-12-31 23:59:59'
day_interval = 15

# start_date = '2022-5-31 00:00:00'
# end_date = '2022-6-30 23:59:59'
# day_interval = 5

text_size = 16

target_columns=['temperature', 'humidity']
# target_columns=['humidity']

In [None]:
for target_column in target_columns:
    fig, ax = plt.subplots(figsize=(18, 12))
    ax.set_title(target_column.capitalize(), fontsize=20)  

    for df in df_plus:
        rpi_id = df['rpi_id'][0]
        if rpi_id in target_rpi_id:
            label = label_dict.get(str(rpi_id))
            plot_chart(df, start_date, end_date, fig, ax, target_column, label)
            
    for df in df_regular:
        rpi_id = df['rpi_id'][0]
        if rpi_id in target_rpi_id:
            label = label_dict.get(str(rpi_id))
            plot_chart(df, start_date, end_date, fig, ax, target_column, label)
    
    if target_column == 'temperature':
        df_actual_temp.index = pd.to_datetime(df_actual_temp['Date'])
        date_mask = (df_actual_temp.index >= start_date) & (df_actual_temp.index <= end_date)        
        df_ranged = df_actual_temp.loc[date_mask]  
        sns.lineplot(x=df_ranged.index.date, y=df_ranged['Avg'], ax = ax, label='Actual Outside Temperature', linewidth=4)
        plt.ylabel('Degree Celsius', fontsize=text_size)
    elif target_column == 'humidity':
        df_actual_humidity.index = pd.to_datetime(df_actual_humidity['Date'])
        date_mask = (df_actual_humidity.index >= start_date) & (df_actual_humidity.index <= end_date)
        df_ranged = df_actual_humidity.loc[date_mask]  
        sns.lineplot(x=df_ranged.index.date, y=df_ranged['Avg'], ax = ax, label='Actual Outside Humidity', linewidth=4)
        plt.ylabel('Humidity (%)', fontsize=text_size)
        
    output_path = 'output/4/'
    file_name = output_path + 'validate_'+ target_column +'.png'
    
    plt.legend(fontsize=text_size)
    plt.ylabel('Relative Humidity (%)', fontsize=text_size)
    plt.xlabel(' ', fontsize=text_size)
    plt.xticks(fontsize=text_size)
    plt.yticks(fontsize=text_size)
    
    ax.xaxis.set_major_locator(mdates.DayLocator(interval = day_interval))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    plt.gcf().autofmt_xdate()
    
    
    fig.autofmt_xdate()
    plt.tight_layout()
    plt.savefig(file_name, dpi=300)
    plt.close('all')


In [None]:
def plot_day_chart(df, start_date, end_date, fig, ax, target_column='light', label="NONE"):
    df['date_time'] = pd.to_datetime(df['date_time'])
    df.index = pd.to_datetime(df['date_time'])
    
    date_mask = (df.index >= start_date) & (df.index <= end_date)    
    df_ranged = df.loc[date_mask]
    sns.lineplot(x = df_ranged.index.hour, y = df_ranged[target_column], ax = ax, label = label)


In [None]:
start_date = '2022-1-15 00:00:00'
# end_date = '2022-1-15 23:59:59'
end_date = '2022-1-16 1:00:00'

text_size = 16

target_column = 'light'
# target_column = 'humidity'

# target_rpi_id = [30]
target_rpi_id = [20, 21, 37, 39, 41, 45, 50]

    
fig, ax = plt.subplots(figsize=(18, 12))
ax.set_title(target_column.capitalize() + ' readings over 24 hours', fontsize=20)  

for df in df_plus:
    rpi_id = df['rpi_id'][0]
    if rpi_id in target_rpi_id:
        label = label_dict.get(str(rpi_id))
        plot_day_chart(df, start_date, end_date, fig, ax, target_column, label)

for df in df_regular:
    rpi_id = df['rpi_id'][0]
    if rpi_id in target_rpi_id:
        label = label_dict.get(str(rpi_id))
        plot_day_chart(df, start_date, end_date, fig, ax, target_column, label)



output_path = 'output/4/'
file_name = output_path + 'validate_'+ target_column +'.png'

plt.legend(fontsize=text_size)
plt.xlabel('Hours', fontsize=text_size)
plt.ylabel('lux', fontsize=text_size)
plt.xticks(fontsize=text_size)
plt.yticks(fontsize=text_size)


fig.autofmt_xdate()
plt.tight_layout()
plt.savefig(file_name, dpi=300)
plt.close('all')