In [8]:
import pandas as pd

# Load and scale data for different bins
def load_and_scale(file_path, scale_factor):
    df = pd.read_csv(file_path)
    df['HP_kWh'] *= scale_factor
    df['timestamp'] = pd.to_datetime(df['timestamp'])  # Ensure timestamp is in datetime format
    return df

# Load the data
dffnsbbin1 = load_and_scale('FNSB_Bin1.csv', 23016)
dffnsbbin2 = load_and_scale('FNSB_Bin2.csv', 1263)
dffnsbbin3 = load_and_scale('FNSB_Bin3.csv', 14)
dfmatbin1 = load_and_scale('Mat_Su_Bin1.csv', 31691)
dfmatbin2 = load_and_scale('Mat_Su_Bin2.csv', 22)
dfmatbin3 = load_and_scale('Mat_Su_Bin3.csv', 2)
dfkenbin1 = load_and_scale('KPB_Bin1.csv', 3458)
dfkenbin2 = load_and_scale('KPB_Bin2.csv', 603)
dfkenbin3 = load_and_scale('KPB_Bin3.csv', 18)
dfancbin1 = load_and_scale('ANC_Bin1.csv', 14134)
dfancbin2 = load_and_scale('ANC_Bin2.csv', 472)
dfancbin3 = load_and_scale('ANC_Bin3.csv', 48)
dfdenbin1 = load_and_scale('Denali_Bin1.csv', 2401)
dfdenbin2 = load_and_scale('Denali_Bin2.csv', 2)

dfsal1 = load_and_scale('Salcha_bin1.csv', 275)
dfsal2 = load_and_scale('Salcha_bin2.csv', 183)
dfsal3 = load_and_scale('Salcha_bin3.csv', 18)
dffbk1 = load_and_scale('FBK_bin1.csv', 15840)
dffbk2 = load_and_scale('FBK_bin2.csv', 853)
dffbk3 = load_and_scale('FBK_bin3.csv', 10)
dfnp1 = load_and_scale('NP_bin1.csv', 6329)
dfnp2 = load_and_scale('NP_bin2.csv', 778)
dfnp3 = load_and_scale('NP_bin3.csv', 7)
dfken1 = load_and_scale('Kenai_bin1.csv', 3458)
dfken2 = load_and_scale('Kenai_bin2.csv', 603)
dfken3 = load_and_scale('Kenai_bin3.csv', 18)
dfwas1 = load_and_scale('Wasilla_bin1.csv', 21322)
dfwas2 = load_and_scale('Wasilla_bin2.csv', 16)
dfwas3 = load_and_scale('Wasilla_bin3.csv', 2)
dfnik1 = load_and_scale('Nikiski_bin1.csv', 386)
dfnik2 = load_and_scale('Nikiski_bin2.csv', 156)
dfnik3 = load_and_scale('Nikiski_bin3.csv', 12)
dfsold1 = load_and_scale('Soldotna_bin1.csv', 4653)
dfsold2 = load_and_scale('Soldotna_bin2.csv', 437)
dfsold3 = load_and_scale('Soldotna_bin3.csv', 9)
dfpalm1 = load_and_scale('Palmer_bin1.csv', 9656)
dfpalm2 = load_and_scale('Palmer_bin2.csv', 115)
dfpalm3 = load_and_scale('Palmer_bin3.csv', 7)
dfhoust1 = load_and_scale('Houston_bin1.csv', 503)
dfhoust2 = load_and_scale('Houston_bin2.csv', 87)
dfhoust3 = load_and_scale('Houston_bin3.csv', 7)
dfanc_bin1 = load_and_scale('ANC_bin1.csv', 14134)
dfanc_bin2 = load_and_scale('ANC_bin2.csv', 472)
dfanc_bin3 = load_and_scale('ANC_bin3.csv', 48)
dfden_bin1 = load_and_scale('Denali_bin1.csv', 2401)
dfden_bin2 = load_and_scale('Denali_bin2.csv', 2)

# Function to merge dataframes without averaging
def merge_dataframes(dfs):
    df_merged = pd.merge(dfs[0][['timestamp', 'HP_kWh']], dfs[1][['timestamp', 'HP_kWh']], on='timestamp', suffixes=('_1', '_2'))
    for i, df in enumerate(dfs[2:], start=3):  # start=3 because we already have _1 and _2
        df_merged = pd.merge(df_merged, df[['timestamp', 'HP_kWh']], on='timestamp')
        df_merged.rename(columns={'HP_kWh': f'HP_kWh_{i}'}, inplace=True)  # Correctly rename the last merged column
    return df_merged

dfBin3 = pd.DataFrame()
dfBin3['timestamp'] = dffnsbbin1['timestamp']
dfBin3['Total_HP_kWh_b3'] = (
    dffnsbbin1['HP_kWh'] + dffnsbbin2['HP_kWh'] + dffnsbbin3['HP_kWh'] +
    dfmatbin1['HP_kWh'] + dfmatbin2['HP_kWh'] + dfmatbin3['HP_kWh'] +
    dfkenbin1['HP_kWh'] + dfkenbin2['HP_kWh'] + dfkenbin3['HP_kWh'] +
    dfancbin1['HP_kWh'] + dfancbin2['HP_kWh'] + dfancbin3['HP_kWh'] +
    dfdenbin1['HP_kWh'] + dfdenbin2['HP_kWh']
)

# Merge and average for each set of Salcha, Fairbanks, and North Pole
dfsal = merge_dataframes([dfsal1, dfsal2, dfsal3])
dffbk = merge_dataframes([dffbk1, dffbk2, dffbk3])
dfnp = merge_dataframes([dfnp1, dfnp2, dfnp3])
dfken = merge_dataframes([dfken1, dfken2, dfken3])
dfwas = merge_dataframes([dfwas1, dfwas2, dfwas3])
dfpalm = merge_dataframes([dfpalm1, dfpalm2, dfpalm3])
dfsold = merge_dataframes([dfsold1, dfsold2, dfsold3])
dfhoust = merge_dataframes([dfhoust1, dfhoust2, dfhoust3])
dfnik = merge_dataframes([dfnik1, dfnik2, dfnik3])
dfanc = merge_dataframes([dfanc_bin1, dfanc_bin2, dfanc_bin3])
dfden = merge_dataframes([dfden_bin1, dfden_bin2])


# Combining averages
dfc3 = pd.DataFrame()
dfc3['timestamp'] = dfsal['timestamp']
dfc3['Total_HP_kWh'] = (
    dfsal.filter(like='HP_kWh').sum(axis=1) +
    dffbk.filter(like='HP_kWh').sum(axis=1) +
    dfnp.filter(like='HP_kWh').sum(axis=1) +
    dfken.filter(like='HP_kWh').sum(axis=1) +
    dfwas.filter(like='HP_kWh').sum(axis=1) +
    dfpalm.filter(like='HP_kWh').sum(axis=1) +
    dfnik.filter(like='HP_kWh').sum(axis=1) +
    dfhoust.filter(like='HP_kWh').sum(axis=1) +
    dfsold.filter(like='HP_kWh').sum(axis=1) +
    dfanc.filter(like='HP_kWh').sum(axis=1) + 
    dfden.filter(like='HP_kWh').sum(axis=1)
)

# Merging FNSB total with combined averages
merged_df = pd.merge(dfBin3[['timestamp', 'Total_HP_kWh_b3']], dfc3, on='timestamp')
merged_df['variance'] = (merged_df['Total_HP_kWh_b3'] - merged_df['Total_HP_kWh']).abs()
merged_df['percent_difference'] = merged_df['variance'] / merged_df[['Total_HP_kWh_b3', 'Total_HP_kWh']].mean(axis=1) * 100

# Sorting and selecting top percent differences
top_percent_differences = merged_df.sort_values(by='percent_difference', ascending=False).head(5)
# Sorting and selecting top variances
top_variances = merged_df.sort_values(by='variance', ascending=False).head(5)

# Display the results
print("Top 5 percent differences:")
print(top_percent_differences[['timestamp', 'Total_HP_kWh_b3', 'Total_HP_kWh', 'variance', 'percent_difference']])



Top 5 percent differences:
               timestamp  Total_HP_kWh_b3  Total_HP_kWh      variance  \
3639 2018-06-01 15:30:00     12552.155217   2080.889858  10471.265360   
3640 2018-06-01 16:30:00     32786.078230   5435.259245  27350.818984   
5103 2018-08-01 15:30:00     32786.078230   5435.259245  27350.818984   
3641 2018-06-01 17:30:00     32786.078230   5435.259245  27350.818984   
5102 2018-08-01 14:30:00     32786.078230   5435.259245  27350.818984   

      percent_difference  
3639          143.118063  
3640          143.118063  
5103          143.118063  
3641          143.118063  
5102          143.118063  


In [9]:
import pandas as pd

# Load and scale data for different bins
def load_and_scale(file_path, scale_factor):
    df = pd.read_csv(file_path)
    df['HP_kWh'] *= scale_factor
    df['timestamp'] = pd.to_datetime(df['timestamp'])  # Ensure timestamp is in datetime format
    return df

# Load the data
dffnsbbin1 = load_and_scale('FNSB_Bin1.csv', 23016)
dffnsbbin2 = load_and_scale('FNSB_Bin2.csv', 1263)
dffnsbbin3 = load_and_scale('FNSB_Bin3.csv', 14)
dfmatbin1 = load_and_scale('Mat_Su_Bin1.csv', 31691)
dfmatbin2 = load_and_scale('Mat_Su_Bin2.csv', 22)
dfmatbin3 = load_and_scale('Mat_Su_Bin3.csv', 2)
dfkenbin1 = load_and_scale('KPB_Bin1.csv', 3458)
dfkenbin2 = load_and_scale('KPB_Bin2.csv', 603)
dfkenbin3 = load_and_scale('KPB_Bin3.csv', 18)
dfancbin1 = load_and_scale('ANC_Bin1.csv', 14134)
dfancbin2 = load_and_scale('ANC_Bin2.csv', 472)
dfancbin3 = load_and_scale('ANC_Bin3.csv', 48)
dfdenbin1 = load_and_scale('Denali_Bin1.csv', 2401)
dfdenbin2 = load_and_scale('Denali_Bin2.csv', 2)

dfsal1 = load_and_scale('Salcha_bin1.csv', 275)
dfsal2 = load_and_scale('Salcha_bin2.csv', 183)
dfsal3 = load_and_scale('Salcha_bin3.csv', 18)
dffbk1 = load_and_scale('FBK_bin1.csv', 15840)
dffbk2 = load_and_scale('FBK_bin2.csv', 853)
dffbk3 = load_and_scale('FBK_bin3.csv', 10)
dfnp1 = load_and_scale('NP_bin1.csv', 6329)
dfnp2 = load_and_scale('NP_bin2.csv', 778)
dfnp3 = load_and_scale('NP_bin3.csv', 7)
dfken1 = load_and_scale('Kenai_bin1.csv', 3458)
dfken2 = load_and_scale('Kenai_bin2.csv', 603)
dfken3 = load_and_scale('Kenai_bin3.csv', 18)
dfwas1 = load_and_scale('Wasilla_bin1.csv', 21322)
dfwas2 = load_and_scale('Wasilla_bin2.csv', 16)
dfwas3 = load_and_scale('Wasilla_bin3.csv', 2)
dfnik1 = load_and_scale('Nikiski_bin1.csv', 386)
dfnik2 = load_and_scale('Nikiski_bin2.csv', 156)
dfnik3 = load_and_scale('Nikiski_bin3.csv', 12)
dfsold1 = load_and_scale('Soldotna_bin1.csv', 4653)
dfsold2 = load_and_scale('Soldotna_bin2.csv', 437)
dfsold3 = load_and_scale('Soldotna_bin3.csv', 9)
dfpalm1 = load_and_scale('Palmer_bin1.csv', 9656)
dfpalm2 = load_and_scale('Palmer_bin2.csv', 115)
dfpalm3 = load_and_scale('Palmer_bin3.csv', 7)
dfhoust1 = load_and_scale('Houston_bin1.csv', 503)
dfhoust2 = load_and_scale('Houston_bin2.csv', 87)
dfhoust3 = load_and_scale('Houston_bin3.csv', 7)
dfanc_bin1 = load_and_scale('ANC_bin1.csv', 14134)
dfanc_bin2 = load_and_scale('ANC_bin2.csv', 472)
dfanc_bin3 = load_and_scale('ANC_bin3.csv', 48)
dfden_bin1 = load_and_scale('Denali_bin1.csv', 2401)
dfden_bin2 = load_and_scale('Denali_bin2.csv', 2)

# Function to merge dataframes without averaging
def merge_dataframes(dfs):
    df_merged = pd.merge(dfs[0][['timestamp', 'HP_kWh']], dfs[1][['timestamp', 'HP_kWh']], on='timestamp', suffixes=('_1', '_2'))
    for i, df in enumerate(dfs[2:], start=3):  # start=3 because we already have _1 and _2
        df_merged = pd.merge(df_merged, df[['timestamp', 'HP_kWh']], on='timestamp')
        df_merged.rename(columns={'HP_kWh': f'HP_kWh_{i}'}, inplace=True)  # Correctly rename the last merged column
    return df_merged

# Combine data for dfBin3
dfBin3 = pd.DataFrame()
dfBin3['timestamp'] = dffnsbbin1['timestamp']
dfBin3['Total_HP_kWh_b3'] = (
    dffnsbbin1['HP_kWh'] + dffnsbbin2['HP_kWh'] + dffnsbbin3['HP_kWh'] +
    dfmatbin1['HP_kWh'] + dfmatbin2['HP_kWh'] + dfmatbin3['HP_kWh'] +
    dfkenbin1['HP_kWh'] + dfkenbin2['HP_kWh'] + dfkenbin3['HP_kWh'] +
    dfancbin1['HP_kWh'] + dfancbin2['HP_kWh'] + dfancbin3['HP_kWh'] +
    dfdenbin1['HP_kWh'] + dfdenbin2['HP_kWh']
)

# Merge and average for each set of Salcha, Fairbanks, and North Pole
dfsal = merge_dataframes([dfsal1, dfsal2, dfsal3])
dffbk = merge_dataframes([dffbk1, dffbk2, dffbk3])
dfnp = merge_dataframes([dfnp1, dfnp2, dfnp3])
dfken = merge_dataframes([dfken1, dfken2, dfken3])
dfwas = merge_dataframes([dfwas1, dfwas2, dfwas3])
dfpalm = merge_dataframes([dfpalm1, dfpalm2, dfpalm3])
dfsold = merge_dataframes([dfsold1, dfsold2, dfsold3])
dfhoust = merge_dataframes([dfhoust1, dfhoust2, dfhoust3])
dfnik = merge_dataframes([dfnik1, dfnik2, dfnik3])
dfanc = merge_dataframes([dfanc_bin1, dfanc_bin2, dfanc_bin3])
dfden = merge_dataframes([dfden_bin1, dfden_bin2])

# Combining averages for dfc3
dfc3 = pd.DataFrame()
dfc3['timestamp'] = dfsal['timestamp']
dfc3['Total_HP_kWh'] = (
    dfsal.filter(like='HP_kWh').sum(axis=1) +
    dffbk.filter(like='HP_kWh').sum(axis=1) +
    dfnp.filter(like='HP_kWh').sum(axis=1) +
    dfken.filter(like='HP_kWh').sum(axis=1) +
    dfwas.filter(like='HP_kWh').sum(axis=1) +
    dfpalm.filter(like='HP_kWh').sum(axis=1) +
    dfnik.filter(like='HP_kWh').sum(axis=1) +
    dfhoust.filter(like='HP_kWh').sum(axis=1) +
    dfsold.filter(like='HP_kWh').sum(axis=1) +
    dfanc.filter(like='HP_kWh').sum(axis=1) + 
    dfden.filter(like='HP_kWh').sum(axis=1)
)

# Calculate total HP_kWh for each DataFrame
total_HP_kWh_b3 = dfBin3['Total_HP_kWh_b3'].sum()
total_HP_kWh_c3 = dfc3['Total_HP_kWh'].sum()

# Calculate the absolute variance and percent difference
variance = abs(total_HP_kWh_b3 - total_HP_kWh_c3)
percent_difference = (variance / ((total_HP_kWh_b3 + total_HP_kWh_c3) / 2)) * 100

# Display the results
print(f"Total HP_kWh in dfBin3: {total_HP_kWh_b3}")
print(f"Total HP_kWh in dfc3: {total_HP_kWh_c3}")
print(f"Variance: {variance}")
print(f"Percent Difference: {percent_difference:.2f}%")


Total HP_kWh in dfBin3: 30271887619.78201
Total HP_kWh in dfc3: 13633021149.963104
Variance: 16638866469.818905
Percent Difference: 75.80%
