In [103]:
import pandas as pd
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
from copy import copy
from openpyxl.cell.cell import MergedCell

In [104]:
# === CONFIGURATION ===
date_str = "8th March"
target_date = pd.to_datetime("2024-03-8")

# === Load Source Data ===
source_file = "03 March  25 Gross Gen.xlsx"
summary_df = pd.read_excel(source_file, sheet_name="Summary", header=None)


In [105]:
summary_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,,,,,,,,,,,
1,,,Thika power limited,,,,,ISSUE DATE: 1-Mar-24,,,
2,,,,,,,,REV: 0,,,
3,,,,,,,,DOC NO: 12,,,
4,GROSS GENERATION SUMMARY ( MMU ),,,,,,,,,,


In [106]:
print(f"Total rows in summary_df: {len(summary_df)}")


Total rows in summary_df: 50


In [107]:
# Extract data section and clean it
data_df = summary_df.iloc[9:].copy()

# Reset index
data_df = data_df.reset_index(drop=True)

# Rename columns manually
data_df.columns = [
    "DATE", "DG SET .1", "DG SET .2", "DG SET .3", "DG SET .4", 
    "DG SET .5", "STG", "DAILY TOTAL MWH", "PLANT GROSS", "ENG GROSS", "STG2"
]

# Add a new parsed date column (but don’t drop rows!)
data_df["DATE_PARSED"] = pd.to_datetime(data_df["DATE"], errors='coerce')



In [108]:
def extract_monthly_mwh(data_df, total_label="Total MWH", mwh_column_name="DAILY TOTAL MWH"):
    # Search all columns for a match (case-insensitive, trims spaces)
    total_row = data_df[data_df.apply(
        lambda row: row.astype(str).str.strip().str.lower().str.contains(total_label.lower()).any(), axis=1
    )]

    if total_row.empty:
        print(f"No row found containing '{total_label}'")
        return None 

    # Get value from the correct column
    value = total_row[mwh_column_name].values[0] if mwh_column_name in total_row.columns else None
    print(f"Monthly MWH from row labeled '{total_label}': {value}")
    return value

extract_monthly_mwh(data_df)

Monthly MWH from row labeled 'Total MWH': 8001.872000000076


8001.872000000076

In [109]:
data_df.head()

Unnamed: 0,DATE,DG SET .1,DG SET .2,DG SET .3,DG SET .4,DG SET .5,STG,DAILY TOTAL MWH,PLANT GROSS,ENG GROSS,STG2,DATE_PARSED
0,2024-03-01 00:00:00,43.712,42.944,85.696,85.688,56.256,0.0,314.296,314296.0,314296.0,0.0,2024-03-01
1,2024-03-02 00:00:00,64.128,27.552,34.112,29.432,67.296,0.0,222.52,222520.0,222520.0,0.0,2024-03-02
2,2024-03-03 00:00:00,84.16,50.24,50.464,82.688,49.888,0.0,317.44,317440.0,317440.0,0.0,2024-03-03
3,2024-03-04 00:00:00,91.296,60.736,56.992,85.688,56.608,0.0,351.32,351320.0,351320.0,0.0,2024-03-04
4,2024-03-05 00:00:00,71.84,69.568,116.608,114.744,78.016,18.9,469.676,469676.0,450776.0,18900.0,2024-03-05


In [110]:
data_df.tail(15)

Unnamed: 0,DATE,DG SET .1,DG SET .2,DG SET .3,DG SET .4,DG SET .5,STG,DAILY TOTAL MWH,PLANT GROSS,ENG GROSS,STG2,DATE_PARSED
26,2024-03-27 00:00:00,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,2024-03-27
27,2024-03-28 00:00:00,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,2024-03-28
28,2024-03-29 00:00:00,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,2024-03-29
29,2024-03-30 00:00:00,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,2024-03-30
30,2024-03-31 00:00:00,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,2024-03-31
31,Total MWH,1700.832,1427.616,1824.576,1514.992,1304.256,229.6,8001.872,8001872.0,7772272.0,229600.0,NaT
32,,,,,,,,,,,,NaT
33,,,,,,,,,,,,NaT
34,,Verified by:,,,,,,,,,,NaT
35,,,,,,,,,,,,NaT


In [111]:
data_df = data_df[pd.to_datetime(data_df["DATE"], errors='coerce').notna()]
data_df["DATE"] = pd.to_datetime(data_df["DATE"])


In [112]:
data_df.head()

Unnamed: 0,DATE,DG SET .1,DG SET .2,DG SET .3,DG SET .4,DG SET .5,STG,DAILY TOTAL MWH,PLANT GROSS,ENG GROSS,STG2,DATE_PARSED
0,2024-03-01,43.712,42.944,85.696,85.688,56.256,0.0,314.296,314296.0,314296.0,0.0,2024-03-01
1,2024-03-02,64.128,27.552,34.112,29.432,67.296,0.0,222.52,222520.0,222520.0,0.0,2024-03-02
2,2024-03-03,84.16,50.24,50.464,82.688,49.888,0.0,317.44,317440.0,317440.0,0.0,2024-03-03
3,2024-03-04,91.296,60.736,56.992,85.688,56.608,0.0,351.32,351320.0,351320.0,0.0,2024-03-04
4,2024-03-05,71.84,69.568,116.608,114.744,78.016,18.9,469.676,469676.0,450776.0,18900.0,2024-03-05


In [113]:
# Find the MWH value for the date
daily_row = data_df[data_df["DATE"] == target_date]
mwh_value = daily_row["DAILY TOTAL MWH"].values[0] if not daily_row.empty else None


In [114]:
def extract_monthly_mwh(data_df, total_label="Total MWH", mwh_column_name="DAILY TOTAL MWH"):
    # Search each row for a cell that exactly (or nearly) contains "Total MWH"
    total_row = data_df[
        data_df.apply(lambda row: row.astype(str).str.strip().str.lower().eq(total_label.lower()).any(), axis=1)
    ]

    if total_row.empty:
        print(f"No row found containing '{total_label}'")
        return None 

    # Extract the value from the MWH column if it exists
    if mwh_column_name in total_row.columns:
        value = total_row[mwh_column_name].values[0]
        print(f"Monthly MWH from row labeled '{total_label}': {value}")
        return value
    else:
        print(f"'{mwh_column_name}' not found in the columns.")
        return None



In [115]:
extract_monthly_mwh(data_df)

No row found containing 'Total MWH'


In [116]:
print(data_df.tail(10))  # Show last 10 rows


         DATE DG SET .1 DG SET .2 DG SET .3 DG SET .4 DG SET .5 STG  \
21 2024-03-22         0         0         0         0         0   0   
22 2024-03-23         0         0         0         0         0   0   
23 2024-03-24         0         0         0         0         0   0   
24 2024-03-25         0         0         0         0         0   0   
25 2024-03-26         0         0         0         0         0   0   
26 2024-03-27         0         0         0         0         0   0   
27 2024-03-28         0         0         0         0         0   0   
28 2024-03-29         0         0         0         0         0   0   
29 2024-03-30         0         0         0         0         0   0   
30 2024-03-31         0         0         0         0         0   0   

   DAILY TOTAL MWH PLANT GROSS ENG GROSS STG2 DATE_PARSED  
21               0           0         0    0  2024-03-22  
22               0           0         0    0  2024-03-23  
23               0           0        

In [117]:
# === Write to Daily Report ===
if mwh_value is not None:
    report_file = "Daily production report March 2025.xlsx"
    wb = load_workbook(report_file)
    
     # Check if a sheet with the given date_str exists
    if date_str in wb.sheetnames:
        # If the sheet exists, update the value in cell B8
        ws = wb[date_str]
        ws["B8"] = mwh_value
        wb.save("Daily production report March 2025.xlsx")
        print(f"Inserted {mwh_value} into sheet '{date_str}' cell B8.")
    
    # If the sheet doesn't exist, create a new sheet with the date_str name
    else:
        # Find an existing sheet to duplicate (use the first sheet here as an example)
        sheet_to_duplicate = wb.worksheets[2]  # You can adjust this if you want a specific sheet
        
        # Create a new sheet by copying the content of the original sheet
        new_sheet = wb.copy_worksheet(sheet_to_duplicate)

        
        
        # Rename the new sheet to the current date_str
        new_sheet.title = date_str

        for row in sheet_to_duplicate.iter_rows():
            for cell in row:
            # Skip MergedCells that are not the actual top-left anchor
                if isinstance(cell, MergedCell):
                    continue

                new_cell = new_sheet.cell(row=cell.row, column=cell.column)
                                        
                # Copy styles
                if cell.has_style:
                    new_cell.font = copy(cell.font)
                    new_cell.border = copy(cell.border)
                    new_cell.fill = copy(cell.fill)
                    new_cell.number_format = copy(cell.number_format)
                    new_cell.protection = copy(cell.protection)
                    new_cell.alignment = copy(cell.alignment)

            
                # Copy only formulas or static labels (no user-filled values)
                if cell.data_type == 'f':
                    new_cell.value = f"={cell.value}"
                elif isinstance(cell.value, str) and cell.value.strip() != "":
                    new_cell.value = cell.value  # Copy headers/static text
                else:
                    new_cell.value = None  # Clear user-entered numbers or blanks

        # Copy merged cell ranges
        for merged_range in sheet_to_duplicate.merged_cells.ranges:
            new_sheet.merge_cells(str(merged_range))
        # Copy column widths
        for col in sheet_to_duplicate.column_dimensions:
            new_sheet.column_dimensions[col].width = sheet_to_duplicate.column_dimensions[col].width

        # Copy row heights
        for row_dim in sheet_to_duplicate.row_dimensions:
            new_sheet.row_dimensions[row_dim].height = sheet_to_duplicate.row_dimensions[row_dim].height

        #Rename cell B3 with correct name
        new_sheet["B3"].value = f"{date_str} 2025"


        
        # Assign the new sheet to `ws` so we can insert value below
        ws = new_sheet
        print(f"New sheet '{date_str}' has been created.")
        
   

    # Insert the value into B8, regardless of whether the sheet existed or was just created
    ws["B8"] = mwh_value

    # Insert the monthly total MWH into B9
    monthly_mwh = extract_monthly_mwh(data_df)  # Call your earlier function here
    if monthly_mwh is not None:
        ws["B9"] = monthly_mwh
        print(f"Inserted monthly MWH value {monthly_mwh} into cell B9.")
    else:
        print("Monthly MWH value not found. Skipping B9 insertion.")

    # Step: Extract Annual MWH from Gross Gen Summary 2025
    try:
        gross_wb = load_workbook("Gross Gen. Summary 2025.xlsx", data_only=True)
        gross_ws = gross_wb.active  # Or specify by name if needed: gross_wb["SomeSheetName"]
        annual_mwh_value = gross_ws["H17"].value
        print(f"Annual MWH value extracted from H17: {annual_mwh_value}")
    except Exception as e:
        annual_mwh_value = None
        print(f"Error extracting annual MWH value: {e}")

    # Step: Add to B10 in the Daily Report
    if annual_mwh_value is not None:
        ws["B10"] = annual_mwh_value
        print(f"Inserted annual MWH value {annual_mwh_value} into cell B10.")
    else:
        print("Annual MWH value not found. Skipping B10 insertion.")


    wb.save("Daily production report March 2025 - updated.xlsx")

    print(f"Inserted {mwh_value} into sheet '{date_str}' cell B8.")
else:
    print("No data found for that date.")

New sheet '8th March' has been created.
No row found containing 'Total MWH'
Monthly MWH value not found. Skipping B9 insertion.
Annual MWH value extracted from H17: 32133.028000000064
Inserted annual MWH value 32133.028000000064 into cell B10.
Inserted 388.46399999993446 into sheet '8th March' cell B8.


In [118]:
# Step: Extract Annual MWH from Gross Gen Summary 2025
try:
    gross_wb = load_workbook("Gross Gen. Summary 2025.xlsx", data_only=True)
    gross_ws = gross_wb.active  # Or specify by name if needed: gross_wb["SomeSheetName"]
    annual_mwh_value = gross_ws["H17"].value
    print(f"Annual MWH value extracted from H17: {annual_mwh_value}")
except Exception as e:
    annual_mwh_value = None
    print(f"Error extracting annual MWH value: {e}")

# Step: Add to B10 in the Daily Report
if annual_mwh_value is not None:
    ws["B10"] = annual_mwh_value
    print(f"Inserted annual MWH value {annual_mwh_value} into cell B10.")
else:
    print("Annual MWH value not found. Skipping B10 insertion.")


Annual MWH value extracted from H17: 32133.028000000064
Inserted annual MWH value 32133.028000000064 into cell B10.
