In [171]:
import pandas as pd

# Load the Excel with actual data headers (assuming header is on row 2)
df1 = pd.read_excel("novolex_validation_data.xlsx", header=[0,1])
df2 = pd.read_excel("rwc_validation_data.xlsx", header=[0,1])


# Step 2: Combine column structure — preserve df1 order, then add missing columns from df2
df1_cols = list(df1.columns)
df2_cols = list(df2.columns)

# Combine while preserving df1's order
combined_cols = df1_cols.copy()

# Add any missing columns from df2 in-place — insert after same root if possible
for col in df2_cols:
    if col not in combined_cols:
        # Try to insert after last column with same level_0
        siblings = [c for c in combined_cols if c[0] == col[0]]
        if siblings:
            insert_pos = combined_cols.index(siblings[-1]) + 1
        else:
            insert_pos = len(combined_cols)
        combined_cols.insert(insert_pos, col)

# Step 3: Concatenate and reorder
merged_df = pd.concat([df1, df2], ignore_index=True, sort=False)
merged_df = merged_df[combined_cols]  # Reorder by combined structure


In [172]:
merged_df.columns

MultiIndex([('Unnamed: 0_level_0',                  'Remarks/Key notes'),
            (    'Billing Period',                      'Billing Date '),
            (    'Billing Period',                              'Month'),
            (    'Billing Period',                               'From'),
            (    'Billing Period',                                 'To'),
            (    'Billing Period',                         'No of Days'),
            (               'kWh',                          'Day \nkWh'),
            (               'kWh',                         'Night\nkWh'),
            (               'kWh',                           'Off-Peak'),
            (               'kWh',                     'Super-Off-Peak'),
            (               'kWh',                           'Only kWh'),
            (               'kWh',                          'Total kWh'),
            (               'kWh',                       ' kWh per day'),
            (               'kWh',    

In [173]:
merged_df[('Billing Period','Billing Date ')]= pd.to_datetime(merged_df[('Billing Period','Billing Date ')], format='%d/%m/%Y')
merged_df[('Billing Period','Billing Date ')] = merged_df[('Billing Period','Billing Date ')].dt.strftime('%m/%d/%y')
#---------------------------------------
merged_df[('Billing Period','Month')] = merged_df['Billing Period','Month'].dt.strftime('%b-%y')

#------------------------------------------------------------------
merged_df[('Billing Period','From')]= pd.to_datetime(merged_df[('Billing Period','From')], format='%d/%m/%Y')
merged_df[('Billing Period','From')] = merged_df[('Billing Period','From')].dt.strftime('%m/%d/%y')
#------------------------------------------------------------------
merged_df[('Billing Period','To')]= pd.to_datetime(merged_df[('Billing Period','To')], format='%d/%m/%Y')
merged_df[('Billing Period','To')] = merged_df[('Billing Period','To')].dt.strftime('%m/%d/%y')

In [174]:
# Keep only rows where 'kWh %' under 'kWh' is numeric
# df_cleaned = merged_df[pd.to_numeric(merged_df[('kWh', 'kWh %')], errors='coerce').notna()]
merged_df[('kWh', 'kWh %')] = pd.to_numeric(
    merged_df[('kWh', 'kWh %')], errors='coerce'
)
df_cleaned = merged_df.copy()


In [175]:
df_cleaned[('kWh', 'kWh %')]  = (df_cleaned[('kWh', 'kWh %')] * 100).round(0)
df_cleaned[('Taxes','Alabama State Taxes\n$.1')]  = (df_cleaned[('Taxes','Alabama State Taxes\n$.1')] * 100).round(2)
df_cleaned[('Taxes','City of Cullman Tax \n$.1')] = (df_cleaned[('Taxes','City of Cullman Tax \n$.1')] * 100).round(2)
df_cleaned[('Taxes','Total tax\n%')] = (df_cleaned[('Taxes','Total tax\n%')] * 100).round(2)
df_cleaned[('$ Amount', 'Blended rate\n$/kWh')] = df_cleaned[('$ Amount', 'Blended rate\n$/kWh')].round(3)
df_cleaned[('$ Amount','Blended rate\n$/kWh\n(With VAT)')] = df_cleaned[('$ Amount','Blended rate\n$/kWh\n(With VAT)')].round(2)
df_cleaned[('$ Amount', 'Blended rate\n$/kWh\n(Without VAT)')] = df_cleaned[('$ Amount', 'Blended rate\n$/kWh\n(Without VAT)')].round(2)




 

In [176]:
df_cleaned

Unnamed: 0_level_0,Unnamed: 0_level_0,Billing Period,Billing Period,Billing Period,Billing Period,Billing Period,kWh,kWh,kWh,kWh,...,Taxes,$ Amount,$ Amount,$ Amount,$ Amount,$ Amount,$ Amount,$ Amount,$ Amount,$ Amount
Unnamed: 0_level_1,Remarks/Key notes,Billing Date,Month,From,To,No of Days,Day \nkWh,Night\nkWh,Off-Peak,Super-Off-Peak,...,City of Cullman Tax \n$.1,Generation/Retail\n $ amount,Total\n $ amount\n(With VAT),Total $ amount\n(Without VAT),Total $ amount,Generation/Retail rate\n$/kWh,Blended rate\n$/kWh\n(With VAT),Blended rate\n$/kWh\n(Without VAT),Distribution\n $ amount,Blended rate\n$/kWh
0,,03/06/24,Mar-24,02/01/24,02/29/24,28,242413.4,72594.89,,,...,,,69080.24,57575.87,,0,0.22,0.18,,
1,,04/06/24,Apr-24,03/01/24,03/31/24,30,245223.8,75892.39,,,...,,,70568.17,58815.81,,0,0.22,0.18,,
2,,05/14/24,May-24,04/01/24,04/30/24,29,272376.2,83089.7,,,...,,,73901.95,61584.95,,0,0.21,0.17,,
3,,06/13/24,Jun-24,05/01/24,05/31/24,30,296960.7,93978.1,,,...,,,81385.67,67830.39,,0,0.21,0.17,,
4,,07/06/24,Jul-24,06/01/24,06/30/24,29,311678.0,98229.2,,,...,,,85087.9,70906.58,,0,0.21,0.17,,
5,,08/06/24,Aug-24,07/01/24,07/31/24,30,365071.2,113220.5,,,...,,,98361.69,81977.08,,0,0.21,0.17,,
6,,09/08/24,Sep-24,08/01/24,08/31/24,30,306021.5,94871.0,,,...,,,82795.98,69005.65,,0,0.21,0.17,,
7,,10/06/24,Oct-24,09/01/24,09/30/24,29,292707.3,87842.5,,,...,,,80306.98,66931.48,,0,0.21,0.18,,
8,,11/12/24,Nov-24,10/01/24,10/31/24,30,270283.8,76057.39,,,...,,,82986.14,69173.11,,0,0.24,0.2,,
9,,12/06/24,Dec-24,11/01/24,11/30/24,29,244718.9,68130.5,,,...,,,75295.09,62763.92,,0,0.24,0.2,,


In [177]:
df_cleaned.to_excel("combined_validation_data.xlsx")

In [None]:
Before processing : Billing Dates String:
['04/30/2024', '01/30/2024', '02/29/2024', '03/30/2024', '06/30/2024', '12/30/2023', '05/30/2024', '07/30/2024', '09/30/2023', '11/30/2023', '10/30/2023', '08/30/2024']
After Processing --------Converted Billing Dates String: 
['4/30/24', '1/30/24', '2/29/24', '3/30/24', '6/30/24', '12/30/23', '5/30/24', '7/30/24', '9/30/23', '11/30/23', '10/30/23', '8/30/24']
After osrting ----Converted Billing Dates: 
[datetime.datetime(2023, 9, 30, 0, 0), datetime.datetime(2023, 10, 30, 0, 0), datetime.datetime(2023, 11, 30, 0, 0), datetime.datetime(2023, 12, 30, 0, 0), datetime.datetime(2024, 1, 30, 0, 0), datetime.datetime(2024, 2, 29, 0, 0), datetime.datetime(2024, 3, 30, 0, 0), datetime.datetime(2024, 4, 30, 0, 0), datetime.datetime(2024, 5, 30, 0, 0), datetime.datetime(2024, 6, 30, 0, 0), datetime.datetime(2024, 7, 30, 0, 0), datetime.datetime(2024, 8, 30, 0, 0)]