In [1]:
import pandas as pd

# Load and scale data for different bins
def load_and_scale(file_path, scale_factor):
    df = pd.read_csv(file_path)
    df['HP_kWh'] *= scale_factor
    return df

dfsal1 = load_and_scale('Salcha_bin1.csv', 275)
dfsal2 = load_and_scale('Salcha_bin2.csv', 183)
dfsal3 = load_and_scale('Salcha_bin3.csv', 18)
dffbk1 = load_and_scale('FBK_bin1.csv', 15840)
dffbk2 = load_and_scale('FBK_bin2.csv', 853)
dffbk3 = load_and_scale('FBK_bin3.csv', 10)
dfnp1 = load_and_scale('NP_bin1.csv', 6329)
dfnp2 = load_and_scale('NP_bin2.csv', 778)
dfnp3 = load_and_scale('NP_bin3.csv', 7)

# Function to merge dataframes without averaging
def merge_dataframes(dfs):
    df_merged = pd.merge(dfs[0][['timestamp', 'HP_kWh']], dfs[1][['timestamp', 'HP_kWh']], on='timestamp', suffixes=('_1', '_2'))
    for i, df in enumerate(dfs[2:], start=3):  # start=3 because we already have _1 and _2
        df_merged = pd.merge(df_merged, df[['timestamp', 'HP_kWh']], on='timestamp')
        df_merged.rename(columns={'HP_kWh': f'HP_kWh_{i}'}, inplace=True)  # Correctly rename the last merged column
    return df_merged

# Merge dataframes for each set
dfsal = merge_dataframes([dfsal1, dfsal2, dfsal3])
dffbk = merge_dataframes([dffbk1, dffbk2, dffbk3])
dfnp = merge_dataframes([dfnp1, dfnp2, dfnp3])

# Combining HP_kWh columns
dfc3 = pd.DataFrame()
dfc3['timestamp'] = dfsal['timestamp']
dfc3['Total_HP_kWh'] = dfsal.iloc[:, 1:].sum(axis=1) + dffbk.iloc[:, 1:].sum(axis=1) + dfnp.iloc[:, 1:].sum(axis=1)

df_salcha = pd.read_csv('Salcha_C1.csv')
df_fbx = pd.read_csv('Fairbanks_C1.csv')
df_np = pd.read_csv('North Pole_C1.csv')

df_salcha['HP_kWh'] *= 476
df_fbx['HP_kWh'] *= 16703
df_np['HP_kWh'] *= 7114

df_C1 = pd.DataFrame()
df_C1['timestamp'] = df_salcha['timestamp']  # Assuming timestamp is aligned and exists in all datasets
df_C1['HP_kWh_c1'] = df_salcha['HP_kWh'] + df_fbx['HP_kWh'] + df_np['HP_kWh']

# Merging and calculating variances
merged_df = pd.merge(df_C1, dfc3, on='timestamp', suffixes=('_c1', '_c3'))
merged_df['variance'] = (merged_df['HP_kWh_c1'] - merged_df['Total_HP_kWh']).abs() # Adjust column names correctly
merged_df['percent_difference'] = merged_df['variance'] / merged_df[['HP_kWh_c1', 'Total_HP_kWh']].mean(axis=1) * 100

# Sorting and selecting top variances
top_variances = merged_df.sort_values(by='percent_difference', ascending=False).head(5)

# Display the results
print(top_variances[['timestamp', 'HP_kWh_c1', 'Total_HP_kWh', 'variance', 'percent_difference']])


               timestamp      HP_kWh_c1   Total_HP_kWh      variance  \
192  2018-01-09 00:30:00  141450.515759  130282.390043  11168.125716   
193  2018-01-09 01:30:00  141450.515759  130282.390043  11168.125716   
215  2018-01-09 23:30:00  138894.140726  128152.693408  10741.447318   
194  2018-01-09 02:30:00  138005.317836  127451.861273  10553.456563   
556  2018-01-24 04:30:00  274635.376527  253819.338002  20816.038525   

     percent_difference  
192            8.219929  
193            8.219929  
215            8.044617  
194            7.951156  
556            7.878078  


In [3]:
import pandas as pd

# Load and scale data for different bins
def load_and_scale(file_path, scale_factor):
    df = pd.read_csv(file_path)
    df['HP_kWh'] *= scale_factor
    return df

dfsal1 = load_and_scale('Salcha_bin1.csv', 275)
dfsal2 = load_and_scale('Salcha_bin2.csv', 183)
dfsal3 = load_and_scale('Salcha_bin3.csv', 18)
dffbk1 = load_and_scale('FBK_bin1.csv', 15840)
dffbk2 = load_and_scale('FBK_bin2.csv', 853)
dffbk3 = load_and_scale('FBK_bin3.csv', 10)
dfnp1 = load_and_scale('NP_bin1.csv', 6329)
dfnp2 = load_and_scale('NP_bin2.csv', 778)
dfnp3 = load_and_scale('NP_bin3.csv', 7)

# Function to merge dataframes without averaging
def merge_dataframes(dfs):
    df_merged = pd.merge(dfs[0][['timestamp', 'HP_kWh']], dfs[1][['timestamp', 'HP_kWh']], on='timestamp', suffixes=('_1', '_2'))
    for i, df in enumerate(dfs[2:], start=3):  # start=3 because we already have _1 and _2
        df_merged = pd.merge(df_merged, df[['timestamp', 'HP_kWh']], on='timestamp')
        df_merged.rename(columns={'HP_kWh': f'HP_kWh_{i}'}, inplace=True)  # Correctly rename the last merged column
    return df_merged

# Merge dataframes for each set
dfsal = merge_dataframes([dfsal1, dfsal2, dfsal3])
dffbk = merge_dataframes([dffbk1, dffbk2, dffbk3])
dfnp = merge_dataframes([dfnp1, dfnp2, dfnp3])

# Combining HP_kWh columns
dfc3 = pd.DataFrame()
dfc3['timestamp'] = dfsal['timestamp']
dfc3['Total_HP_kWh'] = dfsal.iloc[:, 1:].sum(axis=1) + dffbk.iloc[:, 1:].sum(axis=1) + dfnp.iloc[:, 1:].sum(axis=1)

df_salcha = pd.read_csv('Salcha_C1.csv')
df_fbx = pd.read_csv('Fairbanks_C1.csv')
df_np = pd.read_csv('North Pole_C1.csv')

df_salcha['HP_kWh'] *= 476
df_fbx['HP_kWh'] *= 16703
df_np['HP_kWh'] *= 7114

df_C1 = pd.DataFrame()
df_C1['timestamp'] = df_salcha['timestamp']  # Assuming timestamp is aligned and exists in all datasets
df_C1['HP_kWh'] = df_salcha['HP_kWh'] + df_fbx['HP_kWh'] + df_np['HP_kWh']

total_HP_kWh_c1 = df_C1['HP_kWh'].sum()
total_HP_kWh_c3 = dfc3['Total_HP_kWh'].sum()

variance = abs(total_HP_kWh_c1 - total_HP_kWh_c3)
percent_difference = (variance / ((total_HP_kWh_c1 + total_HP_kWh_c3) / 2)) * 100

# Display the results
print(f"Total HP_kWh in C1: {total_HP_kWh_c1}")
print(f"Total HP_kWh in C3: {total_HP_kWh_c3}")
print(f"Variance: {variance}")
print(f"Percent Difference: {percent_difference:.2f}%")


Total HP_kWh in C1: 1972762933.4505243
Total HP_kWh in C3: 1945444370.6714609
Variance: 27318562.779063463
Percent Difference: 1.39%
