In [2]:
import pandas as pd

# Load and scale data for different bins
def load_and_scale(file_path, scale_factor):
    df = pd.read_csv(file_path)
    df['HP_kWh'] *= scale_factor
    return df

# Load the data
dffnsbbin1 = load_and_scale('FNSB_Bin1.csv', 23016)
dffnsbbin2 = load_and_scale('FNSB_Bin2.csv', 1263)
dffnsbbin3 = load_and_scale('FNSB_Bin3.csv', 14)
dfsal1 = load_and_scale('Salcha_bin1.csv', 275)
dfsal2 = load_and_scale('Salcha_bin2.csv', 183)
dfsal3 = load_and_scale('Salcha_bin3.csv', 18)
dffbk1 = load_and_scale('FBK_bin1.csv', 15840)
dffbk2 = load_and_scale('FBK_bin2.csv', 853)
dffbk3 = load_and_scale('FBK_bin3.csv', 10)
dfnp1 = load_and_scale('NP_bin1.csv', 6329)
dfnp2 = load_and_scale('NP_bin2.csv', 778)
dfnp3 = load_and_scale('NP_bin3.csv', 7)

# Function to merge dataframes without averaging
def merge_dataframes(dfs):
    df_merged = pd.merge(dfs[0][['timestamp', 'HP_kWh']], dfs[1][['timestamp', 'HP_kWh']], on='timestamp', suffixes=('_1', '_2'))
    for i, df in enumerate(dfs[2:], start=3):  # start=3 because we already have _1 and _2
        df_merged = pd.merge(df_merged, df[['timestamp', 'HP_kWh']], on='timestamp')
        df_merged.rename(columns={'HP_kWh': f'HP_kWh_{i}'}, inplace=True)  # Correctly rename the last merged column
    return df_merged



# Merge FNSB bins without averaging (just summing the columns)
dfnsb = merge_dataframes([dffnsbbin1, dffnsbbin2, dffnsbbin3])
dfnsb['Total_HP_kWh_b3'] = dfnsb[[f'HP_kWh_{i}' for i in range(1, 4)]].sum(axis=1)

# Merge and average for each set of Salcha, Fairbanks, and North Pole
dfsal = merge_dataframes([dfsal1, dfsal2, dfsal3])
dffbk = merge_dataframes([dffbk1, dffbk2, dffbk3])
dfnp = merge_dataframes([dfnp1, dfnp2, dfnp3])

# Combining averages
dfc3 = pd.DataFrame()
dfc3['timestamp'] = dfsal['timestamp']
dfc3['Total_HP_kWh'] = dfsal.iloc[:, 1:].sum(axis=1) + dffbk.iloc[:, 1:].sum(axis=1) + dfnp.iloc[:, 1:].sum(axis=1)

# Merging FNSB total with combined averages
merged_df = pd.merge(dfnsb[['timestamp', 'Total_HP_kWh_b3']], dfc3, on='timestamp')
merged_df['variance'] = (merged_df['Total_HP_kWh_b3'] - merged_df['Total_HP_kWh']).abs()
merged_df['percent_difference'] = merged_df['variance'] / merged_df[['Total_HP_kWh_b3', 'Total_HP_kWh']].mean(axis=1) * 100

# Sorting and selecting top variances
top_variances = merged_df.sort_values(by='variance', ascending=False).head(5)

# Display the results
print(top_variances[['timestamp', 'Total_HP_kWh_b3', 'Total_HP_kWh', 'variance', 'percent_difference']])



Empty DataFrame
Columns: [timestamp, Total_HP_kWh_b3, Total_HP_kWh, variance, percent_difference]
Index: []


In [8]:
import pandas as pd

# Load and scale data for different bins
def load_and_scale(file_path, scale_factor):
    df = pd.read_csv(file_path)
    df['HP_kWh'] *= scale_factor
    return df

# Load the data
dffnsbbin1 = load_and_scale('FNSB_Bin1.csv', 23016)
dffnsbbin2 = load_and_scale('FNSB_Bin2.csv', 1263)
dffnsbbin3 = load_and_scale('FNSB_Bin3.csv', 14)
dfsal1 = load_and_scale('Salcha_bin1.csv', 275)
dfsal2 = load_and_scale('Salcha_bin2.csv', 183)
dfsal3 = load_and_scale('Salcha_bin3.csv', 18)
dffbk1 = load_and_scale('FBK_bin1.csv', 15840)
dffbk2 = load_and_scale('FBK_bin2.csv', 853)
dffbk3 = load_and_scale('FBK_bin3.csv', 10)
dfnp1 = load_and_scale('NP_bin1.csv', 6329)
dfnp2 = load_and_scale('NP_bin2.csv', 778)
dfnp3 = load_and_scale('NP_bin3.csv', 7)

# Function to merge dataframes without averaging
def merge_dataframes(dfs):
    df_merged = pd.merge(dfs[0], dfs[1], on='timestamp', suffixes=('_1', '_2'))
    for i, df in enumerate(dfs[2:], start=3):
        df_merged = pd.merge(df_merged, df[['timestamp', 'HP_kWh']], on='timestamp')
        df_merged.rename(columns={'HP_kWh': f'HP_kWh_{i}'}, inplace=True)

    return df_merged

# Function to merge dataframes and calculate average
def merge_dataframes(dfs):
    df_merged = pd.merge(dfs[0][['timestamp', 'HP_kWh']], dfs[1][['timestamp', 'HP_kWh']], on='timestamp', suffixes=('_1', '_2'))
    for i, df in enumerate(dfs[2:], start=3):  # start=3 because we already have _1 and _2
        df_merged = pd.merge(df_merged, df[['timestamp', 'HP_kWh']], on='timestamp')
        df_merged.rename(columns={'HP_kWh': f'HP_kWh_{i}'}, inplace=True)  # Correctly rename the last merged column
    return df_merged


# Merge for FNSB bins without averaging
dfnsb = merge_dataframes([dffnsbbin1, dffnsbbin2, dffnsbbin3])

# Calculate total HP_kWh for each merged dataframe
total_HP_kWh_b3 = dfnsb[['HP_kWh_1', 'HP_kWh_2', 'HP_kWh_3']].sum(axis=1).sum()

# Merge and average for each set of Salcha, Fairbanks, and North Pole
dfsal = merge_dataframes([dfsal1, dfsal2, dfsal3])
dffbk = merge_dataframes([dffbk1, dffbk2, dffbk3])
dfnp = merge_dataframes([dfnp1, dfnp2, dfnp3])

# Combine the averages of Salcha, Fairbanks, and North Pole
dfc3 = pd.DataFrame()
dfc3['timestamp'] = dfsal['timestamp']
dfc3['Total_HP_kWh'] = dfsal.iloc[:, 1:].sum(axis=1) + dffbk.iloc[:, 1:].sum(axis=1) + dfnp.iloc[:, 1:].sum(axis=1)

total_HP_kWh_c3 = dfc3['Total_HP_kWh'].sum()

variance = abs(total_HP_kWh_b3 - total_HP_kWh_c3)
percent_difference = (variance / ((total_HP_kWh_b3 + total_HP_kWh_c3) / 2)) * 100

# Display the results
print(f"Total HP_kWh in dfnsb: {total_HP_kWh_b3}")
print(f"Total HP_kWh in dfc3: {total_HP_kWh_c3}")
print(f"Variance: {variance}")
print(f"Percent Difference: {percent_difference:.2f}%")


Total HP_kWh in dfnsb: 6398057239.171097
Total HP_kWh in dfc3: 71463865.70687741
Variance: 6326593373.464219
Percent Difference: 195.58%


In [4]:
import pandas as pd

# Load and scale data for different bins
def load_and_scale(file_path, scale_factor):
    df = pd.read_csv(file_path)
    df['HP_kWh'] *= scale_factor
    df['timestamp'] = pd.to_datetime(df['timestamp'])  # Ensure timestamp is in datetime format
    return df

# Load the data
dffnsbbin1 = load_and_scale('FNSB_Bin1.csv', 23016)
dffnsbbin2 = load_and_scale('FNSB_Bin2.csv', 1263)
dffnsbbin3 = load_and_scale('FNSB_Bin3.csv', 14)
dfsal1 = load_and_scale('Salcha_bin1.csv', 275)
dfsal2 = load_and_scale('Salcha_bin2.csv', 183)
dfsal3 = load_and_scale('Salcha_bin3.csv', 18)
dffbk1 = load_and_scale('FBK_bin1.csv', 15840)
dffbk2 = load_and_scale('FBK_bin2.csv', 853)
dffbk3 = load_and_scale('FBK_bin3.csv', 10)
dfnp1 = load_and_scale('NP_bin1.csv', 6329)
dfnp2 = load_and_scale('NP_bin2.csv', 778)
dfnp3 = load_and_scale('NP_bin3.csv', 7)

# Function to merge dataframes without averaging
def merge_dataframes(dfs):
    df_merged = pd.merge(dfs[0][['timestamp', 'HP_kWh']], dfs[1][['timestamp', 'HP_kWh']], on='timestamp', suffixes=('_1', '_2'))
    for i, df in enumerate(dfs[2:], start=3):  # start=3 because we already have _1 and _2
        df_merged = pd.merge(df_merged, df[['timestamp', 'HP_kWh']], on='timestamp')
        df_merged.rename(columns={'HP_kWh': f'HP_kWh_{i}'}, inplace=True)  # Correctly rename the last merged column
    return df_merged

# Merge FNSB bins without averaging (just summing the columns)
dfnsb = merge_dataframes([dffnsbbin1, dffnsbbin2, dffnsbbin3])
dfnsb['Total_HP_kWh_b3'] = dfnsb[[f'HP_kWh_{i}' for i in range(1, 4)]].sum(axis=1)

# Merge and average for each set of Salcha, Fairbanks, and North Pole
dfsal = merge_dataframes([dfsal1, dfsal2, dfsal3])
dffbk = merge_dataframes([dffbk1, dffbk2, dffbk3])
dfnp = merge_dataframes([dfnp1, dfnp2, dfnp3])

# Combining averages
dfc3 = pd.DataFrame()
dfc3['timestamp'] = dfsal['timestamp']
dfc3['Total_HP_kWh'] = dfsal.iloc[:, 1:].sum(axis=1) + dffbk.iloc[:, 1:].sum(axis=1) + dfnp.iloc[:, 1:].sum(axis=1)

# Merging FNSB total with combined averages
merged_df = pd.merge(dfnsb[['timestamp', 'Total_HP_kWh_b3']], dfc3, on='timestamp', how='inner')
merged_df['variance'] = (merged_df['Total_HP_kWh_b3'] - merged_df['Total_HP_kWh']).abs()
merged_df['percent_difference'] = merged_df['variance'] / merged_df[['Total_HP_kWh_b3', 'Total_HP_kWh']].mean(axis=1) * 100

# Sorting and selecting top percent differences
top_percent_differences = merged_df.sort_values(by='percent_difference', ascending=False).head(5)

# Display the results
print(top_percent_differences[['timestamp', 'Total_HP_kWh_b3', 'Total_HP_kWh', 'variance', 'percent_difference']])


               timestamp  Total_HP_kWh_b3   Total_HP_kWh      variance  \
1439 2018-03-01 23:30:00     2.754826e+06  424512.487579  2.330314e+06   
1438 2018-03-01 22:30:00     2.716071e+06  423562.588732  2.292509e+06   
1437 2018-03-01 21:30:00     2.615492e+06  414893.577823  2.200598e+06   
8136 2018-12-06 00:30:00     2.545466e+06  407913.362682  2.137553e+06   
1711 2018-03-13 07:30:00     1.705185e+06  273602.386172  1.431582e+06   

      percent_difference  
1439          146.591096  
1438          146.036688  
1437          145.235540  
8136          144.753000  
1711          144.692914  


In [6]:
import pandas as pd

# Load and scale data for different bins
def load_and_scale(file_path, scale_factor):
    df = pd.read_csv(file_path)
    df['HP_kWh'] *= scale_factor
    df['timestamp'] = pd.to_datetime(df['timestamp'])  # Ensure timestamp is in datetime format
    return df

# Load the data
dffnsbbin1 = load_and_scale('FNSB_Bin1.csv', 23016)
dffnsbbin2 = load_and_scale('FNSB_Bin2.csv', 1263)
dffnsbbin3 = load_and_scale('FNSB_Bin3.csv', 14)
dfsal1 = load_and_scale('Salcha_bin1.csv', 275)
dfsal2 = load_and_scale('Salcha_bin2.csv', 183)
dfsal3 = load_and_scale('Salcha_bin3.csv', 18)
dffbk1 = load_and_scale('FBK_bin1.csv', 15840)
dffbk2 = load_and_scale('FBK_bin2.csv', 853)
dffbk3 = load_and_scale('FBK_bin3.csv', 10)
dfnp1 = load_and_scale('NP_bin1.csv', 6329)
dfnp2 = load_and_scale('NP_bin2.csv', 778)
dfnp3 = load_and_scale('NP_bin3.csv', 7)

# Function to merge dataframes without averaging
def merge_dataframes(dfs):
    df_merged = pd.merge(dfs[0], dfs[1], on='timestamp', suffixes=('_1', '_2'))
    for i, df in enumerate(dfs[2:], start=3):
        df_merged = pd.merge(df_merged, df[['timestamp', 'HP_kWh']], on='timestamp')
        df_merged.rename(columns={'HP_kWh': f'HP_kWh_{i}'}, inplace=True)
    return df_merged

# Merge for FNSB bins without averaging
dfnsb = merge_dataframes([dffnsbbin1, dffnsbbin2, dffnsbbin3])

# Calculate total HP_kWh for each merged dataframe
dfnsb['Total_HP_kWh_b3'] = dfnsb[['HP_kWh_1', 'HP_kWh_2', 'HP_kWh_3']].sum(axis=1)
total_HP_kWh_b3 = dfnsb['Total_HP_kWh_b3'].sum()

# Merge and average for each set of Salcha, Fairbanks, and North Pole
dfsal = merge_dataframes([dfsal1, dfsal2, dfsal3])
dffbk = merge_dataframes([dffbk1, dffbk2, dffbk3])
dfnp = merge_dataframes([dfnp1, dfnp2, dfnp3])

# Combine the totals of Salcha, Fairbanks, and North Pole
dfc3 = pd.DataFrame()
dfc3['timestamp'] = dfsal['timestamp']
dfc3['Total_HP_kWh'] = dfsal.filter(like='HP_kWh').sum(axis=1) + dffbk.filter(like='HP_kWh').sum(axis=1) + dfnp.filter(like='HP_kWh').sum(axis=1)
total_HP_kWh_c3 = dfc3['Total_HP_kWh'].sum()

# Calculate variance and percent difference
variance = abs(total_HP_kWh_b3 - total_HP_kWh_c3)
percent_difference = (variance / ((total_HP_kWh_b3 + total_HP_kWh_c3) / 2)) * 100

# Display the results
print(f"Total HP_kWh in dfnsb: {total_HP_kWh_b3}")
print(f"Total HP_kWh in dfc3: {total_HP_kWh_c3}")
print(f"Variance: {variance}")
print(f"Percent Difference: {percent_difference:.2f}%")


Total HP_kWh in dfnsb: 6398057239.171097
Total HP_kWh in dfc3: 1945444370.6714609
Variance: 4452612868.499636
Percent Difference: 106.73%
