# REDUCEDHEATCARB sanity check

In [None]:
import pandas as pd
import numpy as np

rhc_file_path='reducedheatcarb_raw_measurements.parquet'
# usually, two decimals suffice for displaying DataFrames (NB internally, precision may be higher)
pd.options.display.precision = 2

import sys
sys.path.append('../data/')
sys.path.append('../view/')

%load_ext autoreload


%matplotlib inline
%matplotlib widget
import pylab as plt
import itertools
from plotter import Plot
from tqdm.notebook import tqdm

In [None]:
units_to_mathtext = property_types = {
    'ppm' : r'$ppm$',
    'kWh' : r'$kWh$',
    'm3' : r'$m^{3}$',
    'degC' : r'$°C$',
    'W' : r'$W$',
    'V' : r'$V$',
    '0' : r'$[-]$',
    'bool': r'$0 = False; 1 = True$',
    'W_m_2' : r'$W\cdotm^{-1}$'
}

In [None]:
%%time
# Attempt to read the Parquet file
try:
    df = pd.read_parquet(
        rhc_file_path, 
        engine='pyarrow',
        use_nullable_dtypes=True
        )
    print("File was successfully read without specifying compression codec.")
except Exception as e:
    print(f"Error reading file: {e}")


In [None]:
df.info()

In [None]:
df

## Initial exploration: size, ids, start & stop times per id

In [None]:
df.info()

In [None]:
len(df)

In [None]:
%%time
# deduplicate the measurements
df = df.reset_index().drop_duplicates().set_index(['id', 'source_category', 'source_type', 'timestamp', 'property']).sort_index()

In [None]:
len(df)

In [None]:
list(df.index.get_level_values('id').unique())

In [None]:
len(df.index.get_level_values('id').unique())

In [None]:
%%time
df.reset_index().groupby(['id', 'source_type'])['timestamp'].agg(['min', 'max'])

In [None]:
df['value'].count()

In [None]:
df.duplicated().any()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df

In [None]:
list(df.index.get_level_values('source_category').unique())

In [None]:
list(df.index.get_level_values('source_type').unique())

In [None]:
list(df.index.get_level_values('property').unique())

In [None]:
df.groupby(['source_type']).size()

In [None]:
df.groupby(['source_category', 'source_type', 'property']).size()

## Plot data


In [None]:
Plot.nfh_measurements_plot(df, properties = ['temp_in__degC'], ids = [412715])


In [None]:
Plot.nfh_measurements_plot(df, properties = ['temp_in__degC'])

In [None]:
Plot.nfh_measurements_plot(df, properties = ['battery_voltage__V', 'co2__ppm'])

In [None]:
# Plot.nfh_measurements_plot(df, source_types = ['enelogic'], ids = [412715])
# Plot.nfh_measurements_plot(df, source_types = ['enelogic'], ids = [404873])

Plot.nfh_measurements_plot(df, source_types = ['enelogic'])


In [None]:
# Plot.nfh_measurements_plot(df, source_types = ['twomes-p1-reader-firmware'], ids = [412715])
# Plot.nfh_measurements_plot(df, source_types = ['twomes-p1-reader-firmware'], ids = [404873])

Plot.nfh_measurements_plot(df, source_types = ['twomes-p1-reader-firmware'])


In [None]:
Plot.nfh_measurements_plot(df, source_types = ['enelogic', 'twomes-p1-reader-firmware'], units = ['kWh'], ids = [412715])
# Plot.nfh_measurements_plot(df, source_types = ['enelogic', 'twomes-p1-reader-firmware'], ids = [404873])
# Plot.nfh_measurements_plot(df, source_types = ['enelogic', 'twomes-p1-reader-firmware'])

In [None]:
Plot.nfh_measurements_plot(df, source_types = ['twomes-co2-occupancy-scd41-m5coreink-firmware'], ids = [412715])
# Plot.nfh_measurements_plot(df, source_types = ['twomes-co2-occupancy-scd41-m5coreink-firmware'], ids = [404873])
# Plot.nfh_measurements_plot(df, source_types = ['twomes-co2-occupancy-scd41-m5coreink-firmware'])

In [None]:
Plot.nfh_measurements_plot(df, source_types = ['twomes-co2-occupancy-scd41-m5coreink-firmware'], properties=['co2__ppm', 'onboarded__p', 'occupancy__p', 'temp_in__degC'], ids = [456638])
# Plot.nfh_measurements_plot(df, source_types = ['twomes-co2-occupancy-scd41-m5coreink-firmware'], ids = [404873])
# Plot.nfh_measurements_plot(df, source_types = ['twomes-co2-occupancy-scd41-m5coreink-firmware'])

In [None]:
Plot.nfh_measurements_plot(df, properties=['temp_in__degC'])

In [None]:
Plot.nfh_measurements_plot(df, properties=['co2__ppm'])


In [None]:
Plot.nfh_measurements_plot(df, ids = [412715])
# Plot.nfh_measurements_plot(df, ids = [404873])
# Plot.nfh_measurements_plot(df)