# All OpEx IT Expenses - 2026 Budget Summary
This notebook extracts the 'All OpEx IT Expenses' value from Column H in each specified sheet, where Column B matches the target label.

In [2]:
import pandas as pd
import os
from pathlib import Path

# Configuration
excel_filename = "2026 BRM Budget Template - 7.07.2025_Offline.xlsx"

# Try different possible locations for the Excel file (prioritizing known location)
possible_paths = [
    f"data/raw/{excel_filename}",  # Known correct location - prioritize this
    f"../data/raw/{excel_filename}",  # Parent data folder
    excel_filename,  # Current directory
    f"C:/Users/cglynn/Documents/{excel_filename}",  # Documents
    f"C:/Users/cglynn/Downloads/{excel_filename}"   # Downloads
]

excel_path = None
for path in possible_paths:
    if os.path.exists(path):
        excel_path = path
        print(f"✅ Found Excel file at: {path}")
        break

if excel_path is None:
    print(f"❌ Excel file '{excel_filename}' not found in any of these locations:")
    for path in possible_paths:
        print(f"   - {os.path.abspath(path)}")
    print("\nPlease:")
    print("1. Check the filename is correct")
    print("2. Place the file in one of the above locations, or")
    print("3. Update the excel_filename variable with the correct path")
    raise FileNotFoundError(f"Excel file not found: {excel_filename}")

# Load the Excel file
try:
    xls = pd.ExcelFile(excel_path)
    print(f"📊 Successfully loaded Excel file with {len(xls.sheet_names)} sheets")
except Exception as e:
    print(f"❌ Error loading Excel file: {e}")
    raise

# List of target worksheets to analyse
target_sheets = [
    "224 - Gillingham", "227 - Stonehouse", "230 - Isai", "231 - Blois & Cergy",
    "234 - Izmir Plant", "235 - Beval", "247 - Rzeszow", "249 - Krakow",
    "576 - Dusseldorf", "126 - Warwick", "128 - Buckinghamshire", "149 - Izmir"
]

print(f"🎯 Target sheets to analyze: {len(target_sheets)}")

# Check which target sheets exist in the workbook
available_target_sheets = [sheet for sheet in target_sheets if sheet in xls.sheet_names]
missing_sheets = [sheet for sheet in target_sheets if sheet not in xls.sheet_names]

print(f"✅ Available target sheets: {len(available_target_sheets)}")
if missing_sheets:
    print(f"⚠️  Missing sheets: {missing_sheets}")

✅ Found Excel file at: ../data/raw/2026 BRM Budget Template - 7.07.2025_Offline.xlsx
📊 Successfully loaded Excel file with 46 sheets
🎯 Target sheets to analyze: 12
✅ Available target sheets: 11
⚠️  Missing sheets: ['576 - Dusseldorf']
📊 Successfully loaded Excel file with 46 sheets
🎯 Target sheets to analyze: 12
✅ Available target sheets: 11
⚠️  Missing sheets: ['576 - Dusseldorf']


In [3]:
# Extract values where Column B == "All OpEx IT Expenses" and Column H is the 2026 Budget
opex_specific_rows = []
processing_log = []

print("🔍 Processing sheets for 'All OpEx IT Expenses' data...\n")

for sheet in available_target_sheets:
    try:
        print(f"Processing: {sheet}")
        df = xls.parse(sheet)
        
        # Ensure we have string column names
        df.columns = df.columns.map(str)
        
        # Check if we have enough columns
        if df.shape[1] <= 7:
            processing_log.append(f"❌ {sheet}: Not enough columns (only {df.shape[1]})")
            continue
            
        # Look for the target row
        matches = df[df.iloc[:, 1].astype(str).str.strip() == "All OpEx IT Expenses"]
        
        if matches.empty:
            processing_log.append(f"⚠️  {sheet}: 'All OpEx IT Expenses' row not found")
            continue
            
        if len(matches) > 1:
            processing_log.append(f"⚠️  {sheet}: Multiple matches found, using first one")
        
        # Extract the budget value from column H (index 7)
        budget_val = pd.to_numeric(matches.iloc[0, 7], errors='coerce')
        
        if pd.isna(budget_val):
            processing_log.append(f"⚠️  {sheet}: Invalid/missing budget value in column H")
            budget_val = 0
        
        opex_specific_rows.append({
            "Site": sheet,
            "2026 Budget (All OpEx IT Expenses)": budget_val,
            "Raw Value": matches.iloc[0, 7]  # Keep original for debugging
        })
        
        processing_log.append(f"✅ {sheet}: ${budget_val:,.0f}")
        
    except Exception as e:
        processing_log.append(f"❌ {sheet}: Error processing - {str(e)}")

# Display processing log
print("\n📋 Processing Summary:")
for log_entry in processing_log:
    print(log_entry)

# Create and format results
if opex_specific_rows:
    opex_all_summary = pd.DataFrame(opex_specific_rows)
    
    # Calculate total
    total_budget = opex_all_summary["2026 Budget (All OpEx IT Expenses)"].sum()
    
    # Format the budget column for display
    opex_display = opex_all_summary.copy()
    opex_display["2026 Budget (All OpEx IT Expenses)"] = (
        opex_display["2026 Budget (All OpEx IT Expenses)"]
        .apply(lambda x: f"${int(round(x)):,}" if pd.notna(x) else "$0")
    )
    
    # Remove the raw value column for display
    opex_display = opex_display.drop(columns=['Raw Value'])
    
    print(f"\n💰 Total 2026 OpEx IT Budget: ${total_budget:,.0f}")
    print(f"📊 Successfully processed {len(opex_specific_rows)} out of {len(target_sheets)} target sheets")
    
    # Display the results
    opex_display
else:
    print("\n❌ No data extracted. Please check the file structure and sheet names.")
    pd.DataFrame()  # Return empty dataframe

🔍 Processing sheets for 'All OpEx IT Expenses' data...

Processing: 224 - Gillingham
Processing: 227 - Stonehouse
Processing: 230 - Isai
Processing: 231 - Blois & Cergy
Processing: 234 - Izmir Plant
Processing: 235 - Beval
Processing: 247 - Rzeszow
Processing: 249 - Krakow
Processing: 126 - Warwick
Processing: 128 - Buckinghamshire
Processing: 149 - Izmir

📋 Processing Summary:
✅ 224 - Gillingham: $0
✅ 227 - Stonehouse: $0
✅ 230 - Isai: $0
✅ 231 - Blois & Cergy: $0
✅ 234 - Izmir Plant: $0
✅ 235 - Beval: $0
✅ 247 - Rzeszow: $0
✅ 249 - Krakow: $0
✅ 126 - Warwick: $4,633,077
✅ 128 - Buckinghamshire: $0
✅ 149 - Izmir: $0

💰 Total 2026 OpEx IT Budget: $4,633,077
📊 Successfully processed 11 out of 12 target sheets


In [4]:
# Additional Analysis and Export
if opex_specific_rows:
    print("📈 Detailed Analysis:")
    print("=" * 50)
    
    # Sort by budget amount (descending)
    sorted_summary = opex_all_summary.sort_values('2026 Budget (All OpEx IT Expenses)', ascending=False)
    
    print(f"🏆 Highest Budget Site: {sorted_summary.iloc[0]['Site']} - ${sorted_summary.iloc[0]['2026 Budget (All OpEx IT Expenses)']:,.0f}")
    print(f"💰 Average Budget per Site: ${sorted_summary['2026 Budget (All OpEx IT Expenses)'].mean():,.0f}")
    print(f"📊 Median Budget: ${sorted_summary['2026 Budget (All OpEx IT Expenses)'].median():,.0f}")
    
    # Sites with zero or missing budgets
    zero_budget_sites = sorted_summary[sorted_summary['2026 Budget (All OpEx IT Expenses)'] == 0]['Site'].tolist()
    if zero_budget_sites:
        print(f"⚠️  Sites with zero/missing budgets: {zero_budget_sites}")
    
    # Export to CSV
    output_path = "data/processed/opex_it_expenses_2026_summary.csv"
    try:
        # Ensure directory exists
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        
        # Export with original numeric values
        export_df = opex_all_summary.drop(columns=['Raw Value']).copy()
        export_df.to_csv(output_path, index=False)
        print(f"\n💾 Results exported to: {output_path}")
    except Exception as e:
        print(f"❌ Export failed: {e}")
    
    # Display sorted results
    print(f"\n📋 All Sites (sorted by budget):")
    for idx, row in sorted_summary.iterrows():
        print(f"   {row['Site']:<25} ${row['2026 Budget (All OpEx IT Expenses)']:>10,.0f}")
        
else:
    print("No data available for analysis.")

📈 Detailed Analysis:
🏆 Highest Budget Site: 126 - Warwick - $4,633,077
💰 Average Budget per Site: $421,189
📊 Median Budget: $0
⚠️  Sites with zero/missing budgets: ['227 - Stonehouse', '224 - Gillingham', '230 - Isai', '231 - Blois & Cergy', '235 - Beval', '234 - Izmir Plant', '247 - Rzeszow', '249 - Krakow', '128 - Buckinghamshire', '149 - Izmir']

💾 Results exported to: data/processed/opex_it_expenses_2026_summary.csv

📋 All Sites (sorted by budget):
   126 - Warwick             $ 4,633,077
   227 - Stonehouse          $         0
   224 - Gillingham          $         0
   230 - Isai                $         0
   231 - Blois & Cergy       $         0
   235 - Beval               $         0
   234 - Izmir Plant         $         0
   247 - Rzeszow             $         0
   249 - Krakow              $         0
   128 - Buckinghamshire     $         0
   149 - Izmir               $         0


In [5]:
# 🔧 Troubleshooting: Explore Excel File Structure
# Uncomment and run this cell if you need to investigate the file structure

# print("📋 All available sheets in the workbook:")
# for i, sheet_name in enumerate(xls.sheet_names, 1):
#     print(f"   {i:2d}. {sheet_name}")

# # Example: Look at the first few rows of a specific sheet to understand structure
# sample_sheet = available_target_sheets[0] if available_target_sheets else xls.sheet_names[0]
# print(f"\n🔍 Sample data from '{sample_sheet}' (first 10 rows, columns A-J):")
# sample_df = xls.parse(sample_sheet)
# print(sample_df.iloc[:10, :10].to_string())

# # Look for rows containing "OpEx" to understand variations
# print(f"\n🔍 All rows containing 'OpEx' in column B of '{sample_sheet}':")
# opex_rows = sample_df[sample_df.iloc[:, 1].astype(str).str.contains('OpEx', case=False, na=False)]
# if not opex_rows.empty:
#     print(opex_rows.iloc[:, :5].to_string())
# else:
#     print("   No rows found containing 'OpEx'")

print("💡 Uncomment the code above to explore the Excel file structure if needed.")

💡 Uncomment the code above to explore the Excel file structure if needed.


In [6]:
# 📁 Verify CSV Export Location
import os

print(f"📂 Current working directory: {os.getcwd()}")

# Check if the CSV file exists at the expected location
csv_path = "data/processed/opex_it_expenses_2026_summary.csv"
abs_csv_path = os.path.abspath(csv_path)

print(f"📄 Expected CSV path: {abs_csv_path}")
print(f"📋 CSV file exists: {os.path.exists(csv_path)}")

if os.path.exists(csv_path):
    # Read and display the first few rows of the exported file
    import pandas as pd
    exported_df = pd.read_csv(csv_path)
    print(f"\n📊 Exported CSV content ({len(exported_df)} rows):")
    print(exported_df.to_string(index=False))
else:
    print("❌ CSV file not found at expected location")
    # Check current directory for any CSV files
    csv_files = [f for f in os.listdir('.') if f.endswith('.csv')]
    if csv_files:
        print(f"📁 CSV files in current directory: {csv_files}")
    else:
        print("No CSV files found in current directory")

📂 Current working directory: c:\Users\cglynn\myPython\esol_2025\notebooks
📄 Expected CSV path: c:\Users\cglynn\myPython\esol_2025\notebooks\data\processed\opex_it_expenses_2026_summary.csv
📋 CSV file exists: True

📊 Exported CSV content (11 rows):
                 Site  2026 Budget (All OpEx IT Expenses)
     224 - Gillingham                        0.000000e+00
     227 - Stonehouse                        0.000000e+00
           230 - Isai                        0.000000e+00
  231 - Blois & Cergy                        0.000000e+00
    234 - Izmir Plant                        0.000000e+00
          235 - Beval                        0.000000e+00
        247 - Rzeszow                        0.000000e+00
         249 - Krakow                        0.000000e+00
        126 - Warwick                        4.633077e+06
128 - Buckinghamshire                        0.000000e+00
          149 - Izmir                        0.000000e+00
