In [10]:
import pandas as pd

# Load and scale data for different bins
def load_and_scale(file_path, scale_factor):
    df = pd.read_csv(file_path)
    df['HP_kWh'] *= scale_factor
    return df

# C3 dataframes
dfsal1 = load_and_scale('Salcha_bin1.csv', 275)
dfsal2 = load_and_scale('Salcha_bin2.csv', 183)
dfsal3 = load_and_scale('Salcha_bin3.csv', 18)
dffbk1 = load_and_scale('FBK_bin1.csv', 15840)
dffbk2 = load_and_scale('FBK_bin2.csv', 853)
dffbk3 = load_and_scale('FBK_bin3.csv', 10)
dfnp1 = load_and_scale('NP_bin1.csv', 6329)
dfnp2 = load_and_scale('NP_bin2.csv', 778)
dfnp3 = load_and_scale('NP_bin3.csv', 7)
dfken1 = load_and_scale('Kenai_bin1.csv', 3458)
dfken2 = load_and_scale('Kenai_bin2.csv', 603)
dfken3 = load_and_scale('Kenai_bin3.csv', 18)
dfwas1 = load_and_scale('Wasilla_bin1.csv', 21322)
dfwas2 = load_and_scale('Wasilla_bin2.csv', 16)
dfwas3 = load_and_scale('Wasilla_bin3.csv', 2)
dfnik1 = load_and_scale('Nikiski_bin1.csv', 386)
dfnik2 = load_and_scale('Nikiski_bin2.csv', 156)
dfnik3 = load_and_scale('Nikiski_bin3.csv', 12)
dfsold1 = load_and_scale('Soldotna_bin1.csv', 4653)
dfsold2 = load_and_scale('Soldotna_bin2.csv', 437)
dfsold3 = load_and_scale('Soldotna_bin3.csv', 9)
dfpalm1 = load_and_scale('Palmer_bin1.csv', 9656)
dfpalm2 = load_and_scale('Palmer_bin2.csv', 115)
dfpalm3 = load_and_scale('Palmer_bin3.csv', 7)
dfhoust1 = load_and_scale('Houston_bin1.csv', 503)
dfhoust2 = load_and_scale('Houston_bin2.csv', 87)
dfhoust3 = load_and_scale('Houston_bin3.csv', 7)
dfanc_bin1 = load_and_scale('ANC_bin1.csv', 14134)
dfanc_bin2 = load_and_scale('ANC_bin2.csv', 472)
dfanc_bin3 = load_and_scale('ANC_bin3.csv', 48)
dfden_bin1 = load_and_scale('Denali_bin1.csv', 2401)
dfden_bin2 = load_and_scale('Denali_bin2.csv', 2)


# Function to merge dataframes without averaging
def merge_dataframes(dfs):
    df_merged = pd.merge(dfs[0][['timestamp', 'HP_kWh']], dfs[1][['timestamp', 'HP_kWh']], on='timestamp', suffixes=('_1', '_2'))
    for i, df in enumerate(dfs[2:], start=3):  # start=3 because we already have _1 and _2
        df_merged = pd.merge(df_merged, df[['timestamp', 'HP_kWh']], on='timestamp')
        df_merged.rename(columns={'HP_kWh': f'HP_kWh_{i}'}, inplace=True)  # Correctly rename the last merged column
    return df_merged

# Merge dataframes for each set
dfsal = merge_dataframes([dfsal1, dfsal2, dfsal3])
dffbk = merge_dataframes([dffbk1, dffbk2, dffbk3])
dfnp = merge_dataframes([dfnp1, dfnp2, dfnp3])
dfken = merge_dataframes([dfken1, dfken2, dfken3])
dfwas = merge_dataframes([dfwas1, dfwas2, dfwas3])
dfpalm = merge_dataframes([dfpalm1, dfpalm2, dfpalm3])
dfsold = merge_dataframes([dfsold1, dfsold2, dfsold3])
dfhoust = merge_dataframes([dfhoust1, dfhoust2, dfhoust3])
dfnik = merge_dataframes([dfnik1, dfnik2, dfnik3])
dfanc = merge_dataframes([dfanc_bin1, dfanc_bin2, dfanc_bin3])
dfden = merge_dataframes([dfden_bin1, dfden_bin2])

# Combining HP_kWh columns
dfc3 = pd.DataFrame()
dfc3['timestamp'] = dfsal['timestamp']
dfc3['Total_HP_kWh'] = (
    dfsal.filter(like='HP_kWh').sum(axis=1) +
    dffbk.filter(like='HP_kWh').sum(axis=1) +
    dfnp.filter(like='HP_kWh').sum(axis=1) +
    dfken.filter(like='HP_kWh').sum(axis=1) +
    dfwas.filter(like='HP_kWh').sum(axis=1) +
    dfpalm.filter(like='HP_kWh').sum(axis=1) +
    dfnik.filter(like='HP_kWh').sum(axis=1) +
    dfhoust.filter(like='HP_kWh').sum(axis=1) +
    dfsold.filter(like='HP_kWh').sum(axis=1) +
    dfanc.filter(like='HP_kWh').sum(axis=1) + 
    dfden.filter(like='HP_kWh').sum(axis=1)
)

df_salcha = pd.read_csv('Salcha_C1.csv')
df_fbx = pd.read_csv('Fairbanks_C1.csv')
df_np = pd.read_csv('North Pole_C1.csv')
df_ken = pd.read_csv('Kenai_C1.csv')
df_was = pd.read_csv('Wasilla_C1.csv')
df_nik = pd.read_csv('Nikiski_C1.csv')
df_palm = pd.read_csv('Palmer_C1.csv')
df_houst = pd.read_csv('Houston_C1.csv')
df_sol = pd.read_csv('Soldotna_C1.csv')
df_anc = pd.read_csv('ANC_B1.csv')
df_den = pd.read_csv('Denali_B1.csv')

df_salcha['HP_kWh'] *= 476
df_fbx['HP_kWh'] *= 16703
df_np['HP_kWh'] *= 7114
df_ken ['HP_kWh'] *= 4077
df_was ['HP_kWh'] *= 21340
df_nik ['HP_kWh'] *= 554
df_palm ['HP_kWh'] *= 9778
df_houst ['HP_kWh'] *= 597
df_sol ['HP_kWh'] *= 5099
df_anc ['HP_kWh'] *= 14654
df_den ['HP_kWh'] *= 2403


df_C1 = pd.DataFrame()
df_C1['timestamp'] = df_salcha['timestamp']  # Assuming timestamp is aligned and exists in all datasets
df_C1['HP_kWh_c1'] = (df_salcha['HP_kWh'] + df_fbx['HP_kWh'] + df_np['HP_kWh'] + 
                      df_ken['HP_kWh'] + df_nik['HP_kWh'] + df_sol['HP_kWh'] + 
                      df_palm['HP_kWh'] + df_houst['HP_kWh'] + df_was['HP_kWh'] + df_anc['HP_kWh'] + df_den['HP_kWh'])

# Merging and calculating variances
merged_df = pd.merge(df_C1, dfc3, on='timestamp', suffixes=('_c1', '_c3'))

# Check columns to ensure correct names
print("Columns in merged_df:", merged_df.columns)

# Calculating variances and percent differences
merged_df['variance'] = (merged_df['HP_kWh_c1'] - merged_df['Total_HP_kWh']).abs()
merged_df['percent_difference'] = merged_df['variance'] / merged_df[['HP_kWh_c1', 'Total_HP_kWh']].mean(axis=1) * 100

# Sorting and selecting top variances
top_variances = merged_df.sort_values(by='percent_difference', ascending=False).head(5)

# Display the results
print(top_variances[['timestamp', 'HP_kWh_c1', 'Total_HP_kWh', 'variance', 'percent_difference']])


Columns in merged_df: Index(['timestamp', 'HP_kWh_c1', 'Total_HP_kWh'], dtype='object')
                timestamp      HP_kWh_c1   Total_HP_kWh       variance  \
3729  2018-06-05 09:30:00  393375.445665  219008.532297  174366.913368   
3717  2018-06-04 21:30:00  326844.457914  182938.319321  143906.138594   
5823  2018-08-31 15:30:00  492196.394562  276565.285451  215631.109111   
3728  2018-06-05 08:30:00  457894.873501  257814.650840  200080.222661   
3730  2018-06-05 10:30:00  204164.159131  114954.363371   89209.795760   

      percent_difference  
3729           56.946922  
3717           56.457827  
5823           56.098298  
3728           55.911013  
3730           55.910133  


In [12]:
import pandas as pd

# Load and scale data for different bins
def load_and_scale(file_path, scale_factor):
    df = pd.read_csv(file_path)
    df['HP_kWh'] *= scale_factor
    return df

# Load and scale data for all bins
dfsal1 = load_and_scale('Salcha_bin1.csv', 275)
dfsal2 = load_and_scale('Salcha_bin2.csv', 183)
dfsal3 = load_and_scale('Salcha_bin3.csv', 18)
dffbk1 = load_and_scale('FBK_bin1.csv', 15840)
dffbk2 = load_and_scale('FBK_bin2.csv', 853)
dffbk3 = load_and_scale('FBK_bin3.csv', 10)
dfnp1 = load_and_scale('NP_bin1.csv', 6329)
dfnp2 = load_and_scale('NP_bin2.csv', 778)
dfnp3 = load_and_scale('NP_bin3.csv', 7)
dfken1 = load_and_scale('Kenai_bin1.csv', 3458)
dfken2 = load_and_scale('Kenai_bin2.csv', 603)
dfken3 = load_and_scale('Kenai_bin3.csv', 18)
dfwas1 = load_and_scale('Wasilla_bin1.csv', 21322)
dfwas2 = load_and_scale('Wasilla_bin2.csv', 16)
dfwas3 = load_and_scale('Wasilla_bin3.csv', 2)
dfnik1 = load_and_scale('Nikiski_bin1.csv', 386)
dfnik2 = load_and_scale('Nikiski_bin2.csv', 156)
dfnik3 = load_and_scale('Nikiski_bin3.csv', 12)
dfsold1 = load_and_scale('Soldotna_bin1.csv', 4653)
dfsold2 = load_and_scale('Soldotna_bin2.csv', 437)
dfsold3 = load_and_scale('Soldotna_bin3.csv', 9)
dfpalm1 = load_and_scale('Palmer_bin1.csv', 9656)
dfpalm2 = load_and_scale('Palmer_bin2.csv', 115)
dfpalm3 = load_and_scale('Palmer_bin3.csv', 7)
dfhoust1 = load_and_scale('Houston_bin1.csv', 503)
dfhoust2 = load_and_scale('Houston_bin2.csv', 87)
dfhoust3 = load_and_scale('Houston_bin3.csv', 7)
dfanc_bin1 = load_and_scale('ANC_bin1.csv', 14134)
dfanc_bin2 = load_and_scale('ANC_bin2.csv', 472)
dfanc_bin3 = load_and_scale('ANC_bin3.csv', 48)
dfden_bin1 = load_and_scale('Denali_bin1.csv', 2401)
dfden_bin2 = load_and_scale('Denali_bin2.csv', 2)

# Function to merge dataframes without averaging
def merge_dataframes(dfs):
    df_merged = pd.merge(dfs[0][['timestamp', 'HP_kWh']], dfs[1][['timestamp', 'HP_kWh']], on='timestamp', suffixes=('_1', '_2'))
    for i, df in enumerate(dfs[2:], start=3):  # start=3 because we already have _1 and _2
        df_merged = pd.merge(df_merged, df[['timestamp', 'HP_kWh']], on='timestamp')
        df_merged.rename(columns={'HP_kWh': f'HP_kWh_{i}'}, inplace=True)  # Correctly rename the last merged column
    return df_merged

# Merge dataframes for each set
dfsal = merge_dataframes([dfsal1, dfsal2, dfsal3])
dffbk = merge_dataframes([dffbk1, dffbk2, dffbk3])
dfnp = merge_dataframes([dfnp1, dfnp2, dfnp3])
dfken = merge_dataframes([dfken1, dfken2, dfken3])
dfwas = merge_dataframes([dfwas1, dfwas2, dfwas3])
dfpalm = merge_dataframes([dfpalm1, dfpalm2, dfpalm3])
dfsold = merge_dataframes([dfsold1, dfsold2, dfsold3])
dfhoust = merge_dataframes([dfhoust1, dfhoust2, dfhoust3])
dfnik = merge_dataframes([dfnik1, dfnik2, dfnik3])
dfanc = merge_dataframes([dfanc_bin1, dfanc_bin2, dfanc_bin3])
dfden = merge_dataframes([dfden_bin1, dfden_bin2])

# Combining HP_kWh columns
dfc3 = pd.DataFrame()
dfc3['timestamp'] = dfsal['timestamp']
dfc3['Total_HP_kWh'] = (
    dfsal.filter(like='HP_kWh').sum(axis=1) +
    dffbk.filter(like='HP_kWh').sum(axis=1) +
    dfnp.filter(like='HP_kWh').sum(axis=1) +
    dfken.filter(like='HP_kWh').sum(axis=1) +
    dfwas.filter(like='HP_kWh').sum(axis=1) +
    dfpalm.filter(like='HP_kWh').sum(axis=1) +
    dfnik.filter(like='HP_kWh').sum(axis=1) +
    dfhoust.filter(like='HP_kWh').sum(axis=1) +
    dfsold.filter(like='HP_kWh').sum(axis=1) +
    dfanc.filter(like='HP_kWh').sum(axis=1) + 
    dfden.filter(like='HP_kWh').sum(axis=1)
)

df_salcha = pd.read_csv('Salcha_C1.csv')
df_fbx = pd.read_csv('Fairbanks_C1.csv')
df_np = pd.read_csv('North Pole_C1.csv')
df_ken = pd.read_csv('Kenai_C1.csv')
df_was = pd.read_csv('Wasilla_C1.csv')
df_nik = pd.read_csv('Nikiski_C1.csv')
df_palm = pd.read_csv('Palmer_C1.csv')
df_houst = pd.read_csv('Houston_C1.csv')
df_sol = pd.read_csv('Soldotna_C1.csv')
df_anc = pd.read_csv('ANC_B1.csv')
df_den = pd.read_csv('Denali_B1.csv')


df_salcha['HP_kWh'] *= 476
df_fbx['HP_kWh'] *= 16703
df_np['HP_kWh'] *= 7114
df_ken ['HP_kWh'] *= 4077
df_was ['HP_kWh'] *= 21340
df_nik ['HP_kWh'] *= 554
df_palm ['HP_kWh'] *= 9778
df_houst ['HP_kWh'] *= 597
df_sol ['HP_kWh'] *= 5099
df_anc ['HP_kWh'] *= 14654
df_den ['HP_kWh'] *= 2403

df_C1 = pd.DataFrame()
df_C1['timestamp'] = df_salcha['timestamp']  # Assuming timestamp is aligned and exists in all datasets
df_C1['HP_kWh_c1'] = (df_salcha['HP_kWh'] + df_fbx['HP_kWh'] + df_np['HP_kWh'] + 
                      df_ken['HP_kWh'] + df_nik['HP_kWh'] + df_sol['HP_kWh'] + 
                      df_palm['HP_kWh'] + df_houst['HP_kWh'] + df_was['HP_kWh'] + df_anc['HP_kWh'] + df_den['HP_kWh'])

total_HP_kWh_c1 = df_C1['HP_kWh_c1'].sum()
total_HP_kWh_c3 = dfc3['Total_HP_kWh'].sum()

variance = abs(total_HP_kWh_c1 - total_HP_kWh_c3)
percent_difference = (variance / ((total_HP_kWh_c1 + total_HP_kWh_c3) / 2)) * 100

# Display the results
print(f"Total HP_kWh in C1: {total_HP_kWh_c1}")
print(f"Total HP_kWh in C3: {total_HP_kWh_c3}")
print(f"Variance: {variance}")
print(f"Percent Difference: {percent_difference:.2f}%")


Total HP_kWh in C1: 14344500283.49129
Total HP_kWh in C3: 13633021149.963104
Variance: 711479133.5281849
Percent Difference: 5.09%
