In [None]:
import pandas as pd

In [None]:
# data input csv
data_path = 'save_data_col.csv'

# guide of the meters with their meter model types
info_path = 'all_headers.csv'

In [None]:
# clear data frame
df = pd.DataFrame()
result_df = pd.DataFrame()

In [None]:
df = pd.read_csv(data_path, encoding='utf-8')

In [None]:
info_df = pd.read_csv(info_path, encoding='utf-8')

# remove extra columns
info_df.drop(columns={'header1','header2'}, axis=1, inplace=True)

#
info_df['meter_name'] = info_df['meter_name'].str.replace(' ', '_')

In [None]:
df['datetime'] = pd.to_datetime(df['datetime'])

# remove total watt hour column, it is not relevant to these calculations
df.drop('total_watt_hour', axis=1, inplace=True)

In [None]:
# process data for only dates sept 7-9
df = df[df['datetime'].dt.date.isin([
    pd.to_datetime('2025-09-07').date(),
    pd.to_datetime('2025-09-08').date(),
    pd.to_datetime('2025-09-09').date()])]

In [None]:
# sort by meter name and the datetime
df = df.sort_values(by=['meter_name', 'datetime'])

In [None]:
# set of all meters of EPM7000 model
# PQM2 meter models are already in kw (so we won't divide them by 1000 to become megawatts)
model_check = set(info_df[info_df['meter_model'].str.contains('EPM7000')]['meter_name'])

In [None]:
# go through each meter
# for each meter find average of 3 phase watt total within each 15 min interval
# divide average by 1000 for kw

# create col that contains the interval that the row belongs to
df['interval'] = df['datetime'].dt.floor('15min')

# create data frame catagorized by the meter name and interval calculate the mean
result_df = df.groupby(['meter_name', 'interval'])['3_phase_watt_total'].mean().reset_index()

# s
result_df['kw'] = result_df['3_phase_watt_total'].copy()
result_df.loc[result_df['meter_name'].isin(model_check), 'kw'] /= 1000

In [None]:
# rename columns
# reorder columns
# delete 3 phase column
result_df.rename(columns={'interval': 'datetime'}, inplace=True)
result_df.drop('3_phase_watt_total', axis=1, inplace=True)
result_df = result_df[['datetime', 'meter_name', 'kw']]

In [None]:
result_df.to_csv('sept07-09_kw.csv', index=False)

In [None]:
print(result_df.head(20))

In [None]:
print(result_df)

In [None]:
# compare brians data to aurora

In [9]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import numpy as np

In [2]:
# load data from csvs
brian = pd.read_csv('sept07-09_kw.csv', encoding='utf-8')
aurora = pd.read_csv('blue_pillar_data.csv', encoding='utf-8')

# convert datetime collumn to a datetime type
brian['datetime'] = pd.to_datetime(brian['datetime'])
aurora['datetime'] = pd.to_datetime(aurora['datetime'])

In [14]:
merged_df = pd.DataFrame()

In [15]:
# merge the data frames together so blue_pillar_kw 
merged_df = pd.merge(brian, aurora, on=['meter_name', 'datetime'], how='outer')
merged_df.columns = merged_df.columns.str.lower().str.replace(' ', '_')

# create list of unique meters
meters = merged_df['meter_name'].unique()

In [16]:
merged_df.to_csv('merged_brian_aurora.csv')

In [17]:
# function to get the meters info/comparison
def get_comparison_info(meters, merged_df):
    # create data frame to hold information
    info_df = pd.DataFrame({
        'meter_name': meters,
        'brians': '',
        'bluepillar': ''
    })

    # make meter_name the index
    info_df.set_index('meter_name', inplace=True)
    
    for meter in meters:
        meter_data = merged_df[merged_df['meter_name'] == meter].sort_values('datetime')

        #add check that theres like actual points in the data
        # check validity of brian's kw data for each meter
        if meter_data['kw'].all() == 0:
            info_df.loc[meter, 'brians'] = 'zeros'
        elif meter_data['kw'].isna().all():
            info_df.loc[meter, 'brians'] = 'missing'
        else:
            info_df.loc[meter, 'brians'] = 'ok'

        # check validity of bluepillars's kw data for each meter
        if meter_data['blue_pillar_kw'].all() == 0:
            info_df.loc[meter, 'bluepillar'] = 'zeros'
        elif meter_data['blue_pillar_kw'].isna().all():
            info_df.loc[meter, 'bluepillar'] = 'missing'
        else:
            info_df.loc[meter, 'bluepillar'] = 'ok'

        # check if both are 'ok', then calculate match
        if info_df.loc[meter, 'brians'] == 'ok' and info_df.loc[meter,'bluepillar'] == 'ok':
            # get non-null values for comparison (keeps rows ONLY if BOTH columns have non-null values)
            valid_data = meter_data.dropna(subset=['kw', 'blue_pillar_kw']).copy()

            if len(valid_data) > 0:
                # calculate correlation or other metric
                correlation = valid_data['kw'].corr(valid_data['blue_pillar_kw'])

                # calculate percentage difference (how close the actual values are to each other):
                # difference between the two measurements (absolute value to get how different)
                difference = abs(valid_data['kw'] - valid_data['blue_pillar_kw'])

                # replace any zeros in 'kw' column with 'NaN' to avoid division by zero errors (to get meaningful % difference)
                valid_data['kw'] = valid_data['kw'].replace(0, np.nan)

                # percent difference column
                valid_data['pct_diff'] = (difference / valid_data['kw']) * 100

                # average percent difference for meter
                avg_pct_diff = valid_data['pct_diff'].mean()
                
                # threshold for "close enough" (correlation > 0.95 or avg diff < 5%)
                # r = 1.0 is perfect positive correlation, want diff % small as possible
                if correlation > 0.95 and avg_pct_diff < 10:
                    info_df.loc[meter, 'match'] = 'yes'
                else:
                    info_df.loc[meter, 'match'] = f'no (r={correlation:.2f}, diff={avg_pct_diff:.1f}%)'
            else:
                info_df.loc[meter, 'match'] = 'no data'
        else:
            info_df.loc[meter, 'match'] = 'n/a'

            
    return info_df

In [18]:
info_df = get_comparison_info(meters, merged_df)
info_df.to_csv('brian_bp_comparison_info.csv', index=True)

In [None]:
# function to create pdf of the plots
def create_plots_to_pdf(meters, filename):
    with PdfPages(filename) as pdf:
        for meter in meters:
            meter_data = merged_df[merged_df['meter_name'] == meter].sort_values('datetime')
    
            # get figure object (entire window) and axes object (plot area)
            fig, ax = plt.subplots(figsize=(14, 6))
        
            ax.plot(meter_data['datetime'], meter_data['kw'], label='kw', alpha=0.7) # alpha is transparency of the line
            ax.plot(meter_data['datetime'], meter_data['blue_pillar_kw'], label='blue_pillar_kw', alpha=0.7)
            
            ax.set_xlabel('datetime')
            ax.set_ylabel('kw')
            ax.set_title(f'{meter}')
            ax.legend()
            ax.grid(True, alpha=0.3)
            plt.tight_layout()
            
            # Save current plot to pdf
            pdf.savefig(fig)
            plt.close(fig)

In [None]:
create_plots_to_pdf(meters, 'aurora_vs_curr.pdf')