In [17]:
import pandas as pd

# Step 1: Load data
df = pd.read_csv('Combined_data_backfilled.csv', parse_dates=['dateTimeStmp'])

# Step 2: Define columns to check (D-3 ~ D+3, excluding center)
cols_to_check = ['D-3', 'D-2', 'D-1', 'D+1', 'D+2', 'D+3']

# Step 3: Identify rows that were backfilled (raw_MM_Wh == 0 and backfilled_MM_Wh ≠ 0)
backfilled_rows = df['raw_MM_Wh'] == 0
predicted_rows = df['backfilled_MM_Wh'] != 0
invalid_mask = backfilled_rows & predicted_rows & (df[cols_to_check] == 0).any(axis=1)

# Step 4: Create a new verified column
df['verified_backfilled_MM_Wh'] = df['backfilled_MM_Wh']
df.loc[invalid_mask, 'verified_backfilled_MM_Wh'] = 0

# Step 5: Summary report
total_backfilled = (backfilled_rows & predicted_rows).sum()
invalid_count = invalid_mask.sum()
valid_count = total_backfilled - invalid_count

print("✅ Step 4 Backfill Legality Check Summary:")
print(f"Total backfilled rows: {total_backfilled}")
print(f"Invalid backfilled rows (have 0 in D-3 ~ D+3): {invalid_count}")
print(f"Valid backfilled rows: {valid_count}")

✅ Step 4 Backfill Legality Check Summary:
Total backfilled rows: 543
Invalid backfilled rows (have 0 in D-3 ~ D+3): 0
Valid backfilled rows: 543


In [None]:
# Step 6 (optional): Save the verified result
df.to_csv('backfilled_data_verified.csv', index=False)
print("📁 Saved verified backfilled data to 'backfilled_data_verified.csv'")