In [2]:
import pandas as pd
import numpy as np
import re
from collections import defaultdict

# === [Extract] ===
# Load the Excel file
file_path = "DELICIOUS_FISH"
sheet_name = "Centros de custo 2024"
df_raw = pd.read_excel(file_path, sheet_name=sheet_name)

# Identify the 'TOTAL' column (assumed to be the last one)
total_col = df_raw.columns[-1]

# Initialize storage for per-tanque data
tank_data = {}
current_tank = None

# === [Transform: Step 1 - Extract Variables Per Tanque] ===
for idx, row in df_raw.iterrows():
    first_cell = str(row.iloc[0]) if pd.notna(row.iloc[0]) else ""
    
    # Detect new tanque block
    if "TILÁPIA SETOR" in first_cell and "TANQUE" in first_cell:
        current_tank = first_cell.strip()
        tank_data[current_tank] = {}
    
    # Extract variable and total cost within block
    if current_tank:
        var_name = row.iloc[3]  # variable name in column 4 (index 3)
        if pd.notna(var_name):
            total_value = row[total_col]
            if pd.notna(total_value) and isinstance(total_value, (int, float, np.number)):
                # Remove TANQUE suffix from variable
                clean_var = re.sub(r'TANQUE \d+', '', var_name, flags=re.IGNORECASE).strip()
                tank_data[current_tank][clean_var] = total_value

# Convert to initial wide-format DataFrame
df_wide = pd.DataFrame.from_dict(tank_data, orient='index').reset_index()
df_wide = df_wide.rename(columns={'index': 'Tanque'})

# === [Transform: Step 2 - Normalize Variable Names Across Tanques] ===
# Create map of base variable names to original column names
variable_map = defaultdict(list)
for col in df_wide.columns[1:]:
    base_name = re.sub(r'TANQUE \d+', '', col, flags=re.IGNORECASE).strip()
    variable_map[base_name].append(col)

# Normalize columns by summing grouped columns
df_normalized = pd.DataFrame()
df_normalized['Tanque'] = df_wide['Tanque']
for base_var, cols in variable_map.items():
    df_normalized[base_var] = df_wide[cols].sum(axis=1, skipna=True)

# === [Load] ===
# Final DataFrame `df_normalized` is ready for export or analysis


FileNotFoundError: [Errno 2] No such file or directory: 'DELICIOUS_FISH'