In [1]:
# Interactive Development Notebook - Algoritmo GD Project
# Load real project data and keep DataFrames in memory for development

import sys
import os
from pathlib import Path
import pandas as pd
import numpy as np
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional, Tuple
import warnings

# Add project root to path so we can import from src/
project_root = Path.cwd()
if 'src' not in sys.path:
    sys.path.insert(0, str(project_root))

print("🚀 Interactive Development Environment - Algoritmo GD Project")
print("=" * 70)

# =============================================================================
# 1. IMPORT PROJECT MODULES AND CONFIGURATION
# =============================================================================

try:
    # Import project configuration and modules
    from src.config import CONFIG, PROJECT_NAME
    from src.models import DescansosDataModel
    from base_data_project.utils import create_components
    from base_data_project.log_config import setup_logger
    from base_data_project.storage.containers import CSVDataContainer, DBDataContainer
    
    print("✅ Project modules imported successfully")
    print(f"📁 Project: {PROJECT_NAME}")
    print(f"🗂️  Root directory: {project_root}")
    
except ImportError as e:
    print(f"❌ Error importing project modules: {e}")
    print("Make sure you're running this notebook from the project root directory")
    raise

# Configure logging
logger = setup_logger(PROJECT_NAME, log_level=logging.INFO)

# =============================================================================
# 2. CONFIGURATION AND EXTERNAL DATA SETUP
# =============================================================================

print("\n📋 Setting up configuration and external data...")

# Use the real project configuration
use_db = True  # Set to True if you want to use database, False for CSV
external_call_data = CONFIG.get('external_call_data', {
    'current_process_id': 249652,
    'api_proc_id': 999,
    'wfm_proc_id': 249652,
    'wfm_user': 'WFM',
    'start_date': '2025-01-01',
    'end_date': '2025-12-31',
    'wfm_proc_colab': None,
})

print(f"📊 Data source: {'Database' if use_db else 'CSV files'}")
print(f"📅 Date range: {external_call_data['start_date']} to {external_call_data['end_date']}")
print(f"🔢 Process ID: {external_call_data['current_process_id']}")

# =============================================================================
# 3. INITIALIZE DATA MANAGER AND COMPONENTS
# =============================================================================

print("\n🔧 Initializing data manager and components...")

try:
    # Create data manager using the project's utility function
    data_manager, process_manager = create_components(
        use_db=use_db, 
        no_tracking=True,  # Disable tracking for development
        config=CONFIG,
        project_name=PROJECT_NAME  # Pass project name explicitly
    )
    print("✅ Data manager created successfully")
    
except Exception as e:
    print(f"❌ Error creating data manager: {e}")
    raise

# =============================================================================
# 4. LOAD PROJECT DATA INTO MEMORY
# =============================================================================

print("\n📊 Loading project data into memory...")

# Create the appropriate data container based on configuration
if use_db:
    data_container = DBDataContainer(
        project_name=PROJECT_NAME,
        config=CONFIG
    )
else:
    data_container = CSVDataContainer(
        project_name=PROJECT_NAME,
        config=CONFIG
    )

# Initialize the data model with real project structure and data container
data_model = DescansosDataModel(
    project_name=PROJECT_NAME, 
    external_data=external_call_data,
    data_container=data_container
)

print("✅ Data model initialized")

# Context manager for data manager connection
with data_manager:
    
    # =============================================================================
    # 4.1 LOAD PROCESS DATA (Stage 1)
    # =============================================================================
    
    print("\n🔄 Stage 1: Loading process data...")
    
    try:
        # Get entities to load from configuration
        entities_dict = CONFIG.get('available_entities_processing', {})
        
        success = data_model.load_process_data(data_manager, entities_dict)
        
        if success:
            print("✅ Process data loaded successfully")
            print(f"   📋 Valid employees: {len(data_model.auxiliary_data.get('valid_emp', []))} records")
            print(f"   🏢 Unit ID: {data_model.auxiliary_data.get('unit_id')}")
            print(f"   🏭 Section ID: {data_model.auxiliary_data.get('secao_id')}")
            print(f"   👤 Position IDs: {data_model.auxiliary_data.get('posto_id_list')}")
        else:
            print("❌ Failed to load process data")
            
    except Exception as e:
        print(f"❌ Error in Stage 1: {e}")
        logger.error(f"Stage 1 error: {e}", exc_info=True)
    
    # =============================================================================
    # 4.2 LOAD DETAILED DATA FOR EACH POSITION (Stage 2)
    # =============================================================================
    
    print("\n🔄 Stage 2: Loading detailed data for positions...")
    
    posto_id_list = data_model.auxiliary_data.get('posto_id_list', [])

    
    if posto_id_list:
        # Process first position as example (you can modify this)
        posto_id = posto_id_list[1]
        print(f"📍 Processing position ID: {posto_id}")
        
        try:
            # Load colaborador info
            success = data_model.load_colaborador_info(data_manager, posto_id)
            if success:
                print(f"   ✅ Colaborador info loaded")
                df_colaborador = data_model.raw_data.get('df_colaborador')
                if df_colaborador is not None:
                    print(f"      📊 {len(df_colaborador)} employee records")
            
            # Load estimativas info  
            success = data_model.load_estimativas_info(
                data_manager, 
                posto_id, 
                external_call_data['start_date'], 
                external_call_data['end_date']
            )
            if success:
                print(f"   ✅ Estimativas info loaded")
                df_estimativas = data_model.raw_data.get('df_estimativas')
                if df_estimativas is not None:
                    print(f"      📈 {len(df_estimativas)} estimate records")
            
            # Load calendario info
            success = data_model.load_calendario_info(
                data_manager,
                external_call_data['current_process_id'],
                posto_id,
                external_call_data['start_date'],
                external_call_data['end_date']
            )
            if success:
                print(f"   ✅ Calendario info loaded")
                df_calendario = data_model.raw_data.get('df_calendario')
                if df_calendario is not None:
                    print(f"      📅 Calendar matrix: {df_calendario.shape}")
            
        except Exception as e:
            print(f"   ❌ Error loading data for position {posto_id}: {e}")
            logger.error(f"Position {posto_id} error: {e}", exc_info=True)
    


🚀 Interactive Development Environment - Algoritmo GD Project
2025-06-26 12:47:59,349 |     INFO | Logger initialized for algoritmo_GD
✅ Project modules imported successfully
📁 Project: algoritmo_GD
🗂️  Root directory: c:\Users\joao.soares\Documents\GitHub\algortimo-gd

📋 Setting up configuration and external data...
📊 Data source: Database
📅 Date range: 2025-01-01 to 2025-12-31
🔢 Process ID: 249730

🔧 Initializing data manager and components...
Creating components for project: algoritmo_GD
2025-06-26 12:48:00,674 |     INFO | Data manager for 'db' not registered, trying built-in managers
2025-06-26 12:48:00,675 |     INFO | Initialized BaseDataManager
✅ Data manager created successfully

📊 Loading project data into memory...
2025-06-26 12:48:00,676 |     INFO | Initialized DBDataContainer
2025-06-26 12:48:00,677 |     INFO | Initializing database data container with URL: oracle+cx_oracle://JOAO_SOARES:5R_}a+2|4DU~d@10.175.28.20:1523/?service_name=WFM_ALCAMPO_TST01
2025-06-26 12:48:02,4

In [2]:
with data_manager:
    # =============================================================================
    # 4.3 PERFORM DATA TRANSFORMATIONS (Stage 3)
    # =============================================================================
    
    print("\n🔄 Stage 3: Performing data transformations...")
    
    try:
        print("--------------------------------")
        print("Pre-tratamento")
        print(f"df_granularidade: {data_model.auxiliary_data['df_granularidade']}")
        print(f"df_faixa_horario: {data_model.auxiliary_data['df_faixa_horario']}")
        print(f"df_feriados: {data_model.auxiliary_data['df_feriados']}")
        print(f"df_estrutura_wfm: {data_model.auxiliary_data['df_estrutura_wfm']}")
        print(f"df_estimativas_raw: {data_model.raw_data['df_estimativas']}")
        print("--------------------------------")
        # Load estimativas transformations
        success = data_model.load_estimativas_transformations()
        if success:
            print("   ✅ Estimativas transformations completed")
        
        # Load colaborador transformations  
        success = data_model.load_colaborador_transformations()
        if success:
            print("   ✅ Colaborador transformations completed")
        
        # Load calendario transformations
        success = data_model.load_calendario_transformations()
        if success:
            print("   ✅ Calendario transformations completed")
        
        # Store matriz2_bk before func_inicializa
        data_model.medium_data['matriz2_bk'] = data_model.raw_data['df_calendario'].copy()
        
        # Debug: Print matriz2_bk info
        matriz2_bk = data_model.medium_data['matriz2_bk']
        print(f"\n🔍 Debug matriz2_bk before func_inicializa:")
        print(f"   Shape: {matriz2_bk.shape}")
        print(f"   First few rows:\n{matriz2_bk.head()}")
        print(f"   Columns: {matriz2_bk.columns.tolist()}")

        print("--------------------------------")
        print("Pos-tratamento")
        print(f"df_granularidade: {data_model.auxiliary_data['df_granularidade']}")
        print(f"df_estimativas_raw: {data_model.raw_data['df_estimativas']}")
        print("--------------------------------")

        # Debug: Print raw_data['df_estimativas'] before func_inicializa
        print("\n🔍 Debug raw_data['df_estimativas'] before func_inicializa:")
        df_est = data_model.raw_data['df_estimativas']
        print(f"   Shape: {df_est.shape}")
        print(f"   Columns: {df_est.columns.tolist()}")
        print(f"   First few rows:\n{df_est.head()}")
    except Exception as e:
        print(f"   ❌ Error in transformations: {e}")
        logger.error(f"Transformation error: {e}", exc_info=True)

print("\n🎉 Data loading completed!")

2025-06-26 12:48:34,662 |     INFO | Connected to database: oracle+cx_oracle://JOAO_SOARES:5R_}a+2|4DU~d@10.175.28.20:1523/?service_name=WFM_ALCAMPO_TST01

🔄 Stage 3: Performing data transformations...
--------------------------------
Pre-tratamento
df_granularidade:       fk_unidade     unidade  fk_secao               secao  fk_tipo_posto  \
0          01015  HP LEGANES  10150184  PF MOST-Pescaderia            153   
1          01015  HP LEGANES  10150184  PF MOST-Pescaderia            153   
2          01015  HP LEGANES  10150184  PF MOST-Pescaderia            153   
3          01015  HP LEGANES  10150184  PF MOST-Pescaderia            153   
4          01015  HP LEGANES  10150184  PF MOST-Pescaderia            153   
...          ...         ...       ...                 ...            ...   
23720      01015  HP LEGANES  10150184  PF MOST-Pescaderia            153   
23721      01015  HP LEGANES  10150184  PF MOST-Pescaderia            153   
23722      01015  HP LEGANES  10150184 

  output_final = output_final.fillna(0)
  matriz_ma[non_date_columns] = matriz_ma[non_date_columns].fillna(0)


2025-06-26 12:48:40,433 |     INFO | DEBUG: Final matrix shape=(16, 731)
2025-06-26 12:48:40,433 |     INFO | DEBUG: Successfully stored df_calendario
   ✅ Calendario transformations completed

🔍 Debug matriz2_bk before func_inicializa:
   Shape: (16, 731)
   First few rows:
          0           1           2           3           4           5    \
0         Dia  2025-01-01  2025-01-01  2025-01-02  2025-01-02  2025-01-03   
1       TURNO           M           T           M           T           M   
2    TIPO_DIA           -           -           -           -           -   
3  0005016794           -           -           -           -           -   
4  0005037932           -           -           -           -           -   

          6           7           8           9    ...         721  \
0  2025-01-03  2025-01-04  2025-01-04  2025-01-05  ...  2025-12-27   
1           T           M           T           M  ...           M   
2           -           -           -           -  

In [3]:
print(data_model.raw_data['df_estimativas'])
print(data_model.raw_data['df_colaborador'])

          data  media_turno  max_turno  min_turno  sd_turno turno  \
0   2025-01-01          0.0        0.0        0.0       0.0     M   
1   2025-01-02          0.0        0.0        0.0       0.0     M   
2   2025-01-03          0.0        0.0        0.0       0.0     M   
3   2025-01-04          0.0        0.0        0.0       0.0     M   
4   2025-01-05          0.0        0.0        0.0       0.0     M   
..         ...          ...        ...        ...       ...   ...   
725 2025-12-27          0.0        0.0        0.0       0.0     T   
726 2025-12-28          0.0        0.0        0.0       0.0     T   
727 2025-12-29          0.0        0.0        0.0       0.0     T   
728 2025-12-30          0.0        0.0        0.0       0.0     T   
729 2025-12-31          0.0        0.0        0.0       0.0     T   

    fk_tipo_posto    data_turno  
0            None  2025-01-01_M  
1            None  2025-01-02_M  
2            None  2025-01-03_M  
3            None  2025-01-04_M  
4

In [4]:
with data_manager:
    try: 
        # Perform func_inicializa
        success = data_model.func_inicializa(
            start_date=external_call_data['start_date'],
            end_date=external_call_data['end_date'],
            fer=data_model.auxiliary_data.get('df_festivos'),
            closed_days=data_model.auxiliary_data.get('df_closed_days')
        )
        if success:
            print("   ✅ func_inicializa completed")
            
            # Debug: Print medium_data['df_estimativas'] after func_inicializa
            print("\n🔍 Debug medium_data['df_estimativas'] after func_inicializa:")
            df_est = data_model.medium_data['df_estimativas']
            print(f"   Shape: {df_est.shape}")
            print(f"   Columns: {df_est.columns.tolist()}")
            print(f"   First few rows:\n{df_est.head()}")
            
    except Exception as e:
        print(f"   ❌ Error in transformations: {e}")
        logger.error(f"Transformation error: {e}", exc_info=True)

print("\n🎉 Data loading completed!")


2025-06-26 12:48:40,496 |     INFO | Connected to database: oracle+cx_oracle://JOAO_SOARES:5R_}a+2|4DU~d@10.175.28.20:1523/?service_name=WFM_ALCAMPO_TST01
2025-06-26 12:48:40,498 |     INFO | === Debug matrizB_og (df_estimativas) ===
2025-06-26 12:48:40,500 |     INFO | Shape: (730, 8)
2025-06-26 12:48:40,501 |     INFO | Columns: ['data', 'media_turno', 'max_turno', 'min_turno', 'sd_turno', 'turno', 'fk_tipo_posto', 'data_turno']
2025-06-26 12:48:40,507 |     INFO | First few rows:
        data  media_turno  max_turno  min_turno  sd_turno turno fk_tipo_posto  \
0 2025-01-01          0.0        0.0        0.0       0.0     M          None   
1 2025-01-02          0.0        0.0        0.0       0.0     M          None   
2 2025-01-03          0.0        0.0        0.0       0.0     M          None   
3 2025-01-04          0.0        0.0        0.0       0.0     M          None   
4 2025-01-05          0.0        0.0        0.0       0.0     M          None   

     data_turno  
0  2025

  matrizB_ini.loc[matrizB_ini['data'].isin(special_dates), 'min_turno'] = matrizB_ini['max_turno']
  mask_friday = (matrizB_ini['data'].isin(friday_dates)) & (matrizB_ini['turno'] == 'M')


2025-06-26 12:48:40,883 |     INFO | Columns in matrizA_og after processing: ['fk_colaborador', 'unidade', 'secao', 'posto', 'convenio', 'nome', 'matricula', 'min_dia_trab', 'max_dia_trab', 'tipo_turno', 'seq_turno', 't_total', 'l_total', 'dyf_max_t', 'q', 'c2d', 'c3d', 'cxx', 'semana_1', 'out', 'ciclo', 'data_admissao', 'data_demissao', 'fk_tipo_posto', 'h_tm_in', 'h_tm_out', 'h_tt_in', 'h_tt_out', 'h_seg_in', 'h_seg_out', 'h_ter_in', 'h_ter_out', 'h_qua_in', 'h_qua_out', 'h_qui_in', 'h_qui_out', 'h_sex_in', 'h_sex_out', 'h_sab_in', 'h_sab_out', 'h_dom_in', 'h_dom_out', 'h_fer_in', 'h_fer_out', 'limite_superior_manha', 'limite_inferior_tarde', 'emp', 'lq', 'min', 'max', 'tipo_contrato', 'ld', 'l_dom', 'lq_og', 'total_dom_fes', 'total_fes', 'total_holidays', 'descansos_atrb', 'COLABORADOR', 'LD_at', 'LQ_at', 'LRES_at', 'CXX_at', 'C2D_at', 'C3D_at']
2025-06-26 12:48:49,699 |     INFO | matrizB_m:            data  media_turno  max_turno  min_turno  sd_turno turno  \
0    2025-01-01      

In [5]:

# =============================================================================
# 5. ORGANIZE DATAFRAMES FOR EASY ACCESS
# =============================================================================

print("\n📊 Organizing DataFrames for interactive access...")

# Extract all DataFrames from the data model
auxiliary_dataframes = {}
raw_dataframes = {}
medium_dataframes = {}
rare_dataframes = {}
formatted_dataframes = {}

# Auxiliary data
for key, value in data_model.auxiliary_data.items():
    if isinstance(value, pd.DataFrame):
        auxiliary_dataframes[key] = value

# Raw data  
for key, value in data_model.raw_data.items():
    if isinstance(value, pd.DataFrame):
        raw_dataframes[key] = value

# Medium data (transformed)
for key, value in data_model.medium_data.items():
    if isinstance(value, pd.DataFrame):
        medium_dataframes[key] = value

# Rare data (algorithm results)
for key, value in data_model.rare_data.items():
    if isinstance(value, pd.DataFrame):
        rare_dataframes[key] = value

# Formatted data (final output)
for key, value in data_model.formatted_data.items():
    if isinstance(value, pd.DataFrame):
        formatted_dataframes[key] = value

# =============================================================================
# 6. DISPLAY AVAILABLE DATAFRAMES
# =============================================================================

print("\n📋 AVAILABLE DATAFRAMES")
print("=" * 70)

all_dataframes = {
    "🗂️ AUXILIARY": auxiliary_dataframes,
    "📁 RAW": raw_dataframes, 
    "⚙️ MEDIUM (Transformed)": medium_dataframes,
    "💎 RARE (Algorithm Results)": rare_dataframes,
    "📊 FORMATTED (Final)": formatted_dataframes
}

for category, dataframes in all_dataframes.items():
    if dataframes:
        print(f"\n{category}:")
        for name, df in dataframes.items():
            print(f"   📋 {name:<25} → {df.shape[0]:>6} rows × {df.shape[1]:>3} columns")
    else:
        print(f"\n{category}: (no DataFrames yet)")

# =============================================================================
# 7. QUICK ACCESS VARIABLES AND UTILITY FUNCTIONS
# =============================================================================

print(f"\n🔗 QUICK ACCESS VARIABLES")
print("=" * 70)

# Make key DataFrames easily accessible with simple variable names
try:
    if 'valid_emp' in auxiliary_dataframes:
        valid_emp = auxiliary_dataframes['valid_emp']
        print(f"✅ valid_emp           → {valid_emp.shape}")
    
    if 'df_colaborador' in raw_dataframes:
        df_colaborador = raw_dataframes['df_colaborador']
        print(f"✅ df_colaborador      → {df_colaborador.shape}")
    
    if 'df_estimativas' in raw_dataframes:
        df_estimativas = raw_dataframes['df_estimativas']
        print(f"✅ df_estimativas      → {df_estimativas.shape}")
    
    if 'df_calendario' in raw_dataframes:
        df_calendario = raw_dataframes['df_calendario']
        print(f"✅ df_calendario       → {df_calendario.shape}")
    
    if 'matrizA_bk' in medium_dataframes:
        matrizA_bk = medium_dataframes['matrizA_bk']
        print(f"✅ matrizA_bk          → {matrizA_bk.shape}")
    
    if 'matriz2_bk' in medium_dataframes:
        matriz2_bk = medium_dataframes['matriz2_bk']
        print(f"✅ matriz2_bk          → {matriz2_bk.shape}")
    
    if 'matrizB_bk' in medium_dataframes:
        matrizB_bk = medium_dataframes['matrizB_bk']
        print(f"✅ matrizB_bk          → {matrizB_bk.shape}")
        
except Exception as e:
    print(f"⚠️ Some DataFrames may not be available yet: {e}")

# =============================================================================
# 8. UTILITY FUNCTIONS FOR DATA EXPLORATION
# =============================================================================

def explore_df(df, name="DataFrame"):
    """Explore a DataFrame with detailed information"""
    print(f"\n🔍 EXPLORING: {name}")
    print("=" * 60)
    print(f"📏 Shape: {df.shape[0]} rows × {df.shape[1]} columns")
    print(f"💾 Memory usage: {df.memory_usage(deep=True).sum() / 1024:.1f} KB")
    
    print(f"\n📋 Columns ({len(df.columns)}):")
    for i, col in enumerate(df.columns):
        dtype = df[col].dtype
        null_count = df[col].isnull().sum()
        print(f"   {i+1:2d}. {col:<20} ({dtype}) - {null_count} nulls")
    
    print(f"\n📊 First 3 rows:")
    print(df.head(3).to_string())
    
    # Numeric summary
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    if len(numeric_cols) > 0:
        print(f"\n📈 Numeric columns summary:")
        print(df[numeric_cols].describe())
    
    return df

def compare_dfs(*dataframes, names=None):
    """Compare multiple DataFrames"""
    if names is None:
        names = [f"DataFrame_{i+1}" for i in range(len(dataframes))]
    
    print(f"\n🔄 COMPARING DATAFRAMES")
    print("=" * 60)
    
    for name, df in zip(names, dataframes):
        print(f"📋 {name:<20} → {df.shape[0]:>6} rows × {df.shape[1]:>3} columns")
    
    # Check for common columns
    if len(dataframes) > 1:
        all_columns = [set(df.columns) for df in dataframes]
        common_cols = set.intersection(*all_columns)
        
        print(f"\n🔗 Common columns ({len(common_cols)}):")
        for col in sorted(common_cols):
            print(f"   • {col}")

def show_sample_data(df_dict, category_name, n_rows=3):
    """Show sample data from DataFrames in a category"""
    print(f"\n📖 SAMPLE DATA: {category_name}")
    print("=" * 60)
    
    for name, df in df_dict.items():
        print(f"\n🔹 {name} (showing {min(n_rows, len(df))} rows):")
        if len(df) > 0:
            print(df.head(n_rows).to_string())
        else:
            print("   (empty DataFrame)")

def search_columns(pattern, df_dict=None):
    """Search for columns matching a pattern across all DataFrames"""
    if df_dict is None:
        df_dict = {**auxiliary_dataframes, **raw_dataframes, **medium_dataframes}
    
    print(f"\n🔍 SEARCHING COLUMNS: '{pattern}'")
    print("=" * 60)
    
    found = False
    for df_name, df in df_dict.items():
        matching_cols = [col for col in df.columns if pattern.lower() in col.lower()]
        if matching_cols:
            found = True
            print(f"\n📋 {df_name}:")
            for col in matching_cols:
                print(f"   • {col}")
    
    if not found:
        print(f"❌ No columns found matching '{pattern}'")

def df_info():
    """Show information about all available DataFrames"""
    print(f"\n📊 ALL DATAFRAMES INFO")
    print("=" * 70)
    
    categories = [
        ("🗂️ AUXILIARY", auxiliary_dataframes),
        ("📁 RAW", raw_dataframes),
        ("⚙️ MEDIUM", medium_dataframes),
        ("💎 RARE", rare_dataframes),
        ("📊 FORMATTED", formatted_dataframes)
    ]
    
    for category_name, df_dict in categories:
        if df_dict:
            print(f"\n{category_name}:")
            for name, df in df_dict.items():
                memory_mb = df.memory_usage(deep=True).sum() / (1024 * 1024)
                print(f"   📋 {name:<25} → {df.shape[0]:>6} rows × {df.shape[1]:>3} cols ({memory_mb:.1f} MB)")

# =============================================================================
# 9. INSTRUCTIONS AND EXAMPLES
# =============================================================================

print(f"\n🛠️ UTILITY FUNCTIONS AVAILABLE:")
print("=" * 70)
print("🔍 explore_df(dataframe, 'name')              → Detailed DataFrame exploration")
print("🔄 compare_dfs(df1, df2, names=['A', 'B'])    → Compare multiple DataFrames")  
print("📖 show_sample_data(df_dict, 'category', 5)   → Show sample data from category")
print("🔍 search_columns('pattern')                  → Find columns matching pattern")
print("📊 df_info()                                  → Show all DataFrames info")

print(f"\n💡 EXAMPLE USAGE:")
print("=" * 70)
print("# Explore specific DataFrames")
print("explore_df(valid_emp, 'Valid Employees')")
print("explore_df(df_colaborador, 'Employee Details')")
print("")
print("# Compare DataFrames")
print("compare_dfs(df_colaborador, matrizA_bk, names=['Raw', 'Processed'])")
print("")
print("# Show sample data")
print("show_sample_data(raw_dataframes, 'Raw Data', 3)")
print("")
print("# Search for specific columns")
print("search_columns('matricula')")
print("search_columns('data')")
print("")
print("# Access DataFrames directly")
print("valid_emp.head()")
print("df_colaborador.describe()")
print("matrizA_bk.columns")

print(f"\n🎯 DIRECT ACCESS TO PROJECT DATA:")
print("=" * 70)
print("📊 data_model.auxiliary_data    → Dictionary with auxiliary data")
print("📁 data_model.raw_data          → Dictionary with raw DataFrames")  
print("⚙️ data_model.medium_data       → Dictionary with transformed DataFrames")
print("💎 data_model.rare_data         → Dictionary with algorithm results")
print("📋 data_model.formatted_data    → Dictionary with final formatted data")
print("")
print("📊 auxiliary_dataframes         → Easy access to auxiliary DataFrames")
print("📁 raw_dataframes              → Easy access to raw DataFrames")
print("⚙️ medium_dataframes           → Easy access to medium DataFrames")

print(f"\n✨ READY FOR INTERACTIVE DEVELOPMENT!")
print("🔧 All project DataFrames are loaded and available in memory")
print("📝 Use the utility functions above to explore and analyze the data")
print("🚀 Start developing your data transformations!")


📊 Organizing DataFrames for interactive access...

📋 AVAILABLE DATAFRAMES

🗂️ AUXILIARY:
   📋 messages_df               →      0 rows ×   0 columns
   📋 params_lq                 →      5 rows ×   2 columns
   📋 valid_emp                 →     14 rows ×   8 columns
   📋 colabs_id_list            →     14 rows ×   0 columns
   📋 df_festivos               →     14 rows ×   2 columns
   📋 df_turnos                 →      2 rows ×   6 columns
   📋 df_calendario_passado     →      0 rows ×   0 columns
   📋 df_count                  →      0 rows ×   0 columns
   📋 df_estrutura_wfm          →   2777 rows ×   6 columns
   📋 df_feriados               →   1743 rows ×   8 columns
   📋 df_faixa_horario          →   1862 rows ×  19 columns
   📋 df_orcamento              →  23725 rows ×  11 columns
   📋 df_granularidade          →  23725 rows ×  11 columns
   📋 df_calendario_past        →      0 rows ×   0 columns
   📋 df_ausencias_ferias       →    803 rows ×   7 columns
   📋 df_ciclos_90        

In [6]:
    # =============================================================================
    # 4.3 PERFORM DATA TRANSFORMATIONS (Stage 3)
    # =============================================================================
with data_manager:
    print("\n🔄 Stage 3: Performing data transformations...")
    
    try:
        # Load estimativas transformations
        success = data_model.load_estimativas_transformations()
        if success:
            print("   ✅ Estimativas transformations completed")
        
        # Load colaborador transformations  
        success = data_model.load_colaborador_transformations()
        if success:
            print("   ✅ Colaborador transformations completed")
        
        # Load calendario transformations
        success = data_model.load_calendario_transformations()
        if success:
            print("   ✅ Calendario transformations completed")
        
        # Store matriz2_bk before func_inicializa
        data_model.medium_data['matriz2_bk'] = data_model.raw_data['df_calendario'].copy()
        
        # Debug: Print matriz2_bk info
        matriz2_bk = data_model.medium_data['matriz2_bk']
        print(f"\n🔍 Debug matriz2_bk before func_inicializa:")
        print(f"   Shape: {matriz2_bk.shape}")
        print(f"   First few rows:\n{matriz2_bk.head()}")
        print(f"   Columns: {matriz2_bk.columns.tolist()}")

        # Debug: Print raw_data['df_estimativas'] before func_inicializa
        print("\n🔍 Debug raw_data['df_estimativas'] before func_inicializa:")
        df_est = data_model.raw_data['df_estimativas']
        print(f"   Shape: {df_est.shape}")
        print(f"   Columns: {df_est.columns.tolist()}")
        print(f"   First few rows:\n{df_est.head()}")

        # Perform func_inicializa
        success = data_model.func_inicializa(
            start_date=external_call_data['start_date'],
            end_date=external_call_data['end_date'],
            fer=data_model.auxiliary_data.get('df_festivos'),
            closed_days=data_model.auxiliary_data.get('df_closed_days')
        )
        if success:
            print("   ✅ func_inicializa completed")
            
            # Debug: Print medium_data['df_estimativas'] after func_inicializa
            print("\n🔍 Debug medium_data['df_estimativas'] after func_inicializa:")
            df_est = data_model.medium_data['df_estimativas']
            print(f"   Shape: {df_est.shape}")
            print(f"   Columns: {df_est.columns.tolist()}")
            print(f"   First few rows:\n{df_est.head()}")
            
    except Exception as e:
        print(f"   ❌ Error in transformations: {e}")
        logger.error(f"Transformation error: {e}", exc_info=True)

print("\n🎉 Data loading completed!")

# =============================================================================
# 5. ORGANIZE DATAFRAMES FOR EASY ACCESS
# =============================================================================

print("\n📊 Organizing DataFrames for interactive access...")

# Extract all DataFrames from the data model
auxiliary_dataframes = {}
raw_dataframes = {}
medium_dataframes = {}
rare_dataframes = {}
formatted_dataframes = {}

# Auxiliary data
for key, value in data_model.auxiliary_data.items():
    if isinstance(value, pd.DataFrame):
        auxiliary_dataframes[key] = value

# Raw data  
for key, value in data_model.raw_data.items():
    if isinstance(value, pd.DataFrame):
        raw_dataframes[key] = value

# Medium data (transformed)
for key, value in data_model.medium_data.items():
    if isinstance(value, pd.DataFrame):
        medium_dataframes[key] = value

# Rare data (algorithm results)
for key, value in data_model.rare_data.items():
    if isinstance(value, pd.DataFrame):
        rare_dataframes[key] = value

# Formatted data (final output)
for key, value in data_model.formatted_data.items():
    if isinstance(value, pd.DataFrame):
        formatted_dataframes[key] = value

# =============================================================================
# 6. DISPLAY AVAILABLE DATAFRAMES
# =============================================================================

print("\n📋 AVAILABLE DATAFRAMES")
print("=" * 70)

all_dataframes = {
    "🗂️ AUXILIARY": auxiliary_dataframes,
    "📁 RAW": raw_dataframes, 
    "⚙️ MEDIUM (Transformed)": medium_dataframes,
    "💎 RARE (Algorithm Results)": rare_dataframes,
    "📊 FORMATTED (Final)": formatted_dataframes
}

for category, dataframes in all_dataframes.items():
    if dataframes:
        print(f"\n{category}:")
        for name, df in dataframes.items():
            print(f"   📋 {name:<25} → {df.shape[0]:>6} rows × {df.shape[1]:>3} columns")
    else:
        print(f"\n{category}: (no DataFrames yet)")

# =============================================================================
# 7. QUICK ACCESS VARIABLES AND UTILITY FUNCTIONS
# =============================================================================

print(f"\n🔗 QUICK ACCESS VARIABLES")
print("=" * 70)

# Make key DataFrames easily accessible with simple variable names
try:
    if 'valid_emp' in auxiliary_dataframes:
        valid_emp = auxiliary_dataframes['valid_emp']
        print(f"✅ valid_emp           → {valid_emp.shape}")
    
    if 'df_colaborador' in raw_dataframes:
        df_colaborador = raw_dataframes['df_colaborador']
        print(f"✅ df_colaborador      → {df_colaborador.shape}")
    
    if 'df_estimativas' in raw_dataframes:
        df_estimativas = raw_dataframes['df_estimativas']
        print(f"✅ df_estimativas      → {df_estimativas.shape}")
    
    if 'df_calendario' in raw_dataframes:
        df_calendario = raw_dataframes['df_calendario']
        print(f"✅ df_calendario       → {df_calendario.shape}")
    
    if 'matrizA_bk' in medium_dataframes:
        matrizA_bk = medium_dataframes['matrizA_bk']
        print(f"✅ matrizA_bk          → {matrizA_bk.shape}")
    
    if 'matriz2_bk' in medium_dataframes:
        matriz2_bk = medium_dataframes['matriz2_bk']
        print(f"✅ matriz2_bk          → {matriz2_bk.shape}")
    
    if 'matrizB_bk' in medium_dataframes:
        matrizB_bk = medium_dataframes['matrizB_bk']
        print(f"✅ matrizB_bk          → {matrizB_bk.shape}")
        
except Exception as e:
    print(f"⚠️ Some DataFrames may not be available yet: {e}")

# =============================================================================
# 8. UTILITY FUNCTIONS FOR DATA EXPLORATION
# =============================================================================

def explore_df(df, name="DataFrame"):
    """Explore a DataFrame with detailed information"""
    print(f"\n🔍 EXPLORING: {name}")
    print("=" * 60)
    print(f"📏 Shape: {df.shape[0]} rows × {df.shape[1]} columns")
    print(f"💾 Memory usage: {df.memory_usage(deep=True).sum() / 1024:.1f} KB")
    
    print(f"\n📋 Columns ({len(df.columns)}):")
    for i, col in enumerate(df.columns):
        dtype = df[col].dtype
        null_count = df[col].isnull().sum()
        print(f"   {i+1:2d}. {col:<20} ({dtype}) - {null_count} nulls")
    
    print(f"\n📊 First 3 rows:")
    print(df.head(3).to_string())
    
    # Numeric summary
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    if len(numeric_cols) > 0:
        print(f"\n📈 Numeric columns summary:")
        print(df[numeric_cols].describe())
    
    return df

def compare_dfs(*dataframes, names=None):
    """Compare multiple DataFrames"""
    if names is None:
        names = [f"DataFrame_{i+1}" for i in range(len(dataframes))]
    
    print(f"\n🔄 COMPARING DATAFRAMES")
    print("=" * 60)
    
    for name, df in zip(names, dataframes):
        print(f"📋 {name:<20} → {df.shape[0]:>6} rows × {df.shape[1]:>3} columns")
    
    # Check for common columns
    if len(dataframes) > 1:
        all_columns = [set(df.columns) for df in dataframes]
        common_cols = set.intersection(*all_columns)
        
        print(f"\n🔗 Common columns ({len(common_cols)}):")
        for col in sorted(common_cols):
            print(f"   • {col}")

def show_sample_data(df_dict, category_name, n_rows=3):
    """Show sample data from DataFrames in a category"""
    print(f"\n📖 SAMPLE DATA: {category_name}")
    print("=" * 60)
    
    for name, df in df_dict.items():
        print(f"\n🔹 {name} (showing {min(n_rows, len(df))} rows):")
        if len(df) > 0:
            print(df.head(n_rows).to_string())
        else:
            print("   (empty DataFrame)")

def search_columns(pattern, df_dict=None):
    """Search for columns matching a pattern across all DataFrames"""
    if df_dict is None:
        df_dict = {**auxiliary_dataframes, **raw_dataframes, **medium_dataframes}
    
    print(f"\n🔍 SEARCHING COLUMNS: '{pattern}'")
    print("=" * 60)
    
    found = False
    for df_name, df in df_dict.items():
        matching_cols = [col for col in df.columns if pattern.lower() in col.lower()]
        if matching_cols:
            found = True
            print(f"\n📋 {df_name}:")
            for col in matching_cols:
                print(f"   • {col}")
    
    if not found:
        print(f"❌ No columns found matching '{pattern}'")

def df_info():
    """Show information about all available DataFrames"""
    print(f"\n📊 ALL DATAFRAMES INFO")
    print("=" * 70)
    
    categories = [
        ("🗂️ AUXILIARY", auxiliary_dataframes),
        ("📁 RAW", raw_dataframes),
        ("⚙️ MEDIUM", medium_dataframes),
        ("💎 RARE", rare_dataframes),
        ("📊 FORMATTED", formatted_dataframes)
    ]
    
    for category_name, df_dict in categories:
        if df_dict:
            print(f"\n{category_name}:")
            for name, df in df_dict.items():
                memory_mb = df.memory_usage(deep=True).sum() / (1024 * 1024)
                print(f"   📋 {name:<25} → {df.shape[0]:>6} rows × {df.shape[1]:>3} cols ({memory_mb:.1f} MB)")

# =============================================================================
# 9. INSTRUCTIONS AND EXAMPLES
# =============================================================================

print(f"\n🛠️ UTILITY FUNCTIONS AVAILABLE:")
print("=" * 70)
print("🔍 explore_df(dataframe, 'name')              → Detailed DataFrame exploration")
print("🔄 compare_dfs(df1, df2, names=['A', 'B'])    → Compare multiple DataFrames")  
print("📖 show_sample_data(df_dict, 'category', 5)   → Show sample data from category")
print("🔍 search_columns('pattern')                  → Find columns matching pattern")
print("📊 df_info()                                  → Show all DataFrames info")

print(f"\n💡 EXAMPLE USAGE:")
print("=" * 70)
print("# Explore specific DataFrames")
print("explore_df(valid_emp, 'Valid Employees')")
print("explore_df(df_colaborador, 'Employee Details')")
print("")
print("# Compare DataFrames")
print("compare_dfs(df_colaborador, matrizA_bk, names=['Raw', 'Processed'])")
print("")
print("# Show sample data")
print("show_sample_data(raw_dataframes, 'Raw Data', 3)")
print("")
print("# Search for specific columns")
print("search_columns('matricula')")
print("search_columns('data')")
print("")
print("# Access DataFrames directly")
print("valid_emp.head()")
print("df_colaborador.describe()")
print("matrizA_bk.columns")

print(f"\n🎯 DIRECT ACCESS TO PROJECT DATA:")
print("=" * 70)
print("📊 data_model.auxiliary_data    → Dictionary with auxiliary data")
print("📁 data_model.raw_data          → Dictionary with raw DataFrames")  
print("⚙️ data_model.medium_data       → Dictionary with transformed DataFrames")
print("💎 data_model.rare_data         → Dictionary with algorithm results")
print("📋 data_model.formatted_data    → Dictionary with final formatted data")
print("")
print("📊 auxiliary_dataframes         → Easy access to auxiliary DataFrames")
print("📁 raw_dataframes              → Easy access to raw DataFrames")
print("⚙️ medium_dataframes           → Easy access to medium DataFrames")

print(f"\n✨ READY FOR INTERACTIVE DEVELOPMENT!")
print("🔧 All project DataFrames are loaded and available in memory")
print("📝 Use the utility functions above to explore and analyze the data")
print("🚀 Start developing your data transformations!")

2025-06-26 12:48:49,938 |     INFO | Connected to database: oracle+cx_oracle://JOAO_SOARES:5R_}a+2|4DU~d@10.175.28.20:1523/?service_name=WFM_ALCAMPO_TST01


Error in load_matrices_transformations: "None of [Index(['h_tm_in', 'h_seg_in', 'h_ter_in', 'h_qua_in', 'h_qui_in', 'h_sex_in',\n       'h_sab_in', 'h_dom_in', 'h_fer_in'],\n      dtype='object')] are in the [columns]"
Traceback (most recent call last):
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\src\models.py", line 526, in load_estimativas_transformations
    df_turnos['min_in1'] = df_turnos[columns_in].min(axis=1, skipna=True)
                           ~~~~~~~~~^^^^^^^^^^^^
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\.venv\Lib\site-packages\pandas\core\frame.py", line 4108, in __getitem__
    indexer = self.columns._get_indexer_strict(key, "columns")[1]
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\.venv\Lib\site-packages\pandas\core\indexes\base.py", line 6200, in _get_indexer_strict
    self._raise_if_missing(keyarr, indexer, axis_name)
  File "c:\Users\joao.soares\Documents\


🔄 Stage 3: Performing data transformations...
2025-06-26 12:48:49,950 |     INFO | Starting load_ma_bd processing
2025-06-26 12:48:49,971 |    ERROR | Error in load_ma_bd: 'DataFrame' object has no attribute 'str'
Traceback (most recent call last):
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\src\models.py", line 1074, in load_colaborador_transformations
    matriz_ma['emp'] = matriz_ma['matricula'].astype(str).str.zfill(10)  # Adjust padding as needed
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\.venv\Lib\site-packages\pandas\core\generic.py", line 6299, in __getattr__
    return object.__getattribute__(self, name)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'DataFrame' object has no attribute 'str'
2025-06-26 12:48:49,978 |     INFO | Starting load_m2_bd processing
2025-06-26 12:48:49,978 |     INFO | DEBUG: start_date=2025-01-01, end_date=2025-12-31
2025-06-26 12:48:49,980 |

  matrizB_ini.loc[matrizB_ini['data'].isin(special_dates), 'min_turno'] = matrizB_ini['max_turno']
  mask_friday = (matrizB_ini['data'].isin(friday_dates)) & (matrizB_ini['turno'] == 'M')


2025-06-26 12:48:50,560 |     INFO | Columns in matrizA_og after processing: ['fk_colaborador', 'unidade', 'secao', 'posto', 'convenio', 'nome', 'matricula', 'min_dia_trab', 'max_dia_trab', 'tipo_turno', 'seq_turno', 't_total', 'l_total', 'dyf_max_t', 'q', 'c2d', 'c3d', 'cxx', 'semana_1', 'out', 'ciclo', 'data_admissao', 'data_demissao', 'fk_tipo_posto', 'h_tm_in', 'h_tm_out', 'h_tt_in', 'h_tt_out', 'h_seg_in', 'h_seg_out', 'h_ter_in', 'h_ter_out', 'h_qua_in', 'h_qua_out', 'h_qui_in', 'h_qui_out', 'h_sex_in', 'h_sex_out', 'h_sab_in', 'h_sab_out', 'h_dom_in', 'h_dom_out', 'h_fer_in', 'h_fer_out', 'limite_superior_manha', 'limite_inferior_tarde', 'emp', 'lq', 'min', 'max', 'tipo_contrato', 'ld', 'l_dom', 'lq_og', 'total_dom_fes', 'total_fes', 'total_holidays', 'descansos_atrb', 'COLABORADOR', 'LD_at', 'LQ_at', 'LRES_at', 'CXX_at', 'C2D_at', 'C3D_at']
2025-06-26 12:48:59,344 |     INFO | matrizB_m:            data  media_turno  max_turno  min_turno  sd_turno turno  \
0    2025-01-01      

In [7]:
    # =============================================================================
    # 4.3 PERFORM DATA TRANSFORMATIONS (Stage 3)
    # =============================================================================
with data_manager:    
    print("\n🔄 Stage 3: Performing data transformations...")
    
    try:
        # Load estimativas transformations
        success = data_model.load_estimativas_transformations()
        if success:
            print("   ✅ Estimativas transformations completed")
        
        # Load colaborador transformations  
        success = data_model.load_colaborador_transformations()
        if success:
            print("   ✅ Colaborador transformations completed")
        
        # Load calendario transformations
        success = data_model.load_calendario_transformations()
        if success:
            print("   ✅ Calendario transformations completed")
        
        # Store matriz2_bk before func_inicializa
        data_model.medium_data['matriz2_bk'] = data_model.raw_data['df_calendario'].copy()
        
        # Debug: Print matriz2_bk info
        matriz2_bk = data_model.medium_data['matriz2_bk']
        print(f"\n🔍 Debug matriz2_bk before func_inicializa:")
        print(f"   Shape: {matriz2_bk.shape}")
        print(f"   First few rows:\n{matriz2_bk.head()}")
        print(f"   Columns: {matriz2_bk.columns.tolist()}")

        # Debug: Print raw_data['df_estimativas'] before func_inicializa
        print("\n🔍 Debug raw_data['df_estimativas'] before func_inicializa:")
        df_est = data_model.raw_data['df_estimativas']
        print(f"   Shape: {df_est.shape}")
        print(f"   Columns: {df_est.columns.tolist()}")
        print(f"   First few rows:\n{df_est.head()}")

        # Perform func_inicializa
        success = data_model.func_inicializa(
            start_date=external_call_data['start_date'],
            end_date=external_call_data['end_date'],
            fer=data_model.auxiliary_data.get('df_festivos'),
            closed_days=data_model.auxiliary_data.get('df_closed_days')
        )
        if success:
            print("   ✅ func_inicializa completed")
            
            # Debug: Print medium_data['df_estimativas'] after func_inicializa
            print("\n🔍 Debug medium_data['df_estimativas'] after func_inicializa:")
            df_est = data_model.medium_data['df_estimativas']
            print(f"   Shape: {df_est.shape}")
            print(f"   Columns: {df_est.columns.tolist()}")
            print(f"   First few rows:\n{df_est.head()}")
            
    except Exception as e:
        print(f"   ❌ Error in transformations: {e}")
        logger.error(f"Transformation error: {e}", exc_info=True)

print("\n🎉 Data loading completed!")

# =============================================================================
# 5. ORGANIZE DATAFRAMES FOR EASY ACCESS
# =============================================================================

print("\n📊 Organizing DataFrames for interactive access...")

# Extract all DataFrames from the data model
auxiliary_dataframes = {}
raw_dataframes = {}
medium_dataframes = {}
rare_dataframes = {}
formatted_dataframes = {}

# Auxiliary data
for key, value in data_model.auxiliary_data.items():
    if isinstance(value, pd.DataFrame):
        auxiliary_dataframes[key] = value

# Raw data  
for key, value in data_model.raw_data.items():
    if isinstance(value, pd.DataFrame):
        raw_dataframes[key] = value

# Medium data (transformed)
for key, value in data_model.medium_data.items():
    if isinstance(value, pd.DataFrame):
        medium_dataframes[key] = value

# Rare data (algorithm results)
for key, value in data_model.rare_data.items():
    if isinstance(value, pd.DataFrame):
        rare_dataframes[key] = value

# Formatted data (final output)
for key, value in data_model.formatted_data.items():
    if isinstance(value, pd.DataFrame):
        formatted_dataframes[key] = value

# =============================================================================
# 6. DISPLAY AVAILABLE DATAFRAMES
# =============================================================================

print("\n📋 AVAILABLE DATAFRAMES")
print("=" * 70)

all_dataframes = {
    "🗂️ AUXILIARY": auxiliary_dataframes,
    "📁 RAW": raw_dataframes, 
    "⚙️ MEDIUM (Transformed)": medium_dataframes,
    "💎 RARE (Algorithm Results)": rare_dataframes,
    "📊 FORMATTED (Final)": formatted_dataframes
}

for category, dataframes in all_dataframes.items():
    if dataframes:
        print(f"\n{category}:")
        for name, df in dataframes.items():
            print(f"   📋 {name:<25} → {df.shape[0]:>6} rows × {df.shape[1]:>3} columns")
    else:
        print(f"\n{category}: (no DataFrames yet)")

# =============================================================================
# 7. QUICK ACCESS VARIABLES AND UTILITY FUNCTIONS
# =============================================================================

print(f"\n🔗 QUICK ACCESS VARIABLES")
print("=" * 70)

# Make key DataFrames easily accessible with simple variable names
try:
    if 'valid_emp' in auxiliary_dataframes:
        valid_emp = auxiliary_dataframes['valid_emp']
        print(f"✅ valid_emp           → {valid_emp.shape}")
    
    if 'df_colaborador' in raw_dataframes:
        df_colaborador = raw_dataframes['df_colaborador']
        print(f"✅ df_colaborador      → {df_colaborador.shape}")
    
    if 'df_estimativas' in raw_dataframes:
        df_estimativas = raw_dataframes['df_estimativas']
        print(f"✅ df_estimativas      → {df_estimativas.shape}")
    
    if 'df_calendario' in raw_dataframes:
        df_calendario = raw_dataframes['df_calendario']
        print(f"✅ df_calendario       → {df_calendario.shape}")
    
    if 'matrizA_bk' in medium_dataframes:
        matrizA_bk = medium_dataframes['matrizA_bk']
        print(f"✅ matrizA_bk          → {matrizA_bk.shape}")
    
    if 'matriz2_bk' in medium_dataframes:
        matriz2_bk = medium_dataframes['matriz2_bk']
        print(f"✅ matriz2_bk          → {matriz2_bk.shape}")
    
    if 'matrizB_bk' in medium_dataframes:
        matrizB_bk = medium_dataframes['matrizB_bk']
        print(f"✅ matrizB_bk          → {matrizB_bk.shape}")
        
except Exception as e:
    print(f"⚠️ Some DataFrames may not be available yet: {e}")

# =============================================================================
# 8. UTILITY FUNCTIONS FOR DATA EXPLORATION
# =============================================================================

def explore_df(df, name="DataFrame"):
    """Explore a DataFrame with detailed information"""
    print(f"\n🔍 EXPLORING: {name}")
    print("=" * 60)
    print(f"📏 Shape: {df.shape[0]} rows × {df.shape[1]} columns")
    print(f"💾 Memory usage: {df.memory_usage(deep=True).sum() / 1024:.1f} KB")
    
    print(f"\n📋 Columns ({len(df.columns)}):")
    for i, col in enumerate(df.columns):
        dtype = df[col].dtype
        null_count = df[col].isnull().sum()
        print(f"   {i+1:2d}. {col:<20} ({dtype}) - {null_count} nulls")
    
    print(f"\n📊 First 3 rows:")
    print(df.head(3).to_string())
    
    # Numeric summary
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    if len(numeric_cols) > 0:
        print(f"\n📈 Numeric columns summary:")
        print(df[numeric_cols].describe())
    
    return df

def compare_dfs(*dataframes, names=None):
    """Compare multiple DataFrames"""
    if names is None:
        names = [f"DataFrame_{i+1}" for i in range(len(dataframes))]
    
    print(f"\n🔄 COMPARING DATAFRAMES")
    print("=" * 60)
    
    for name, df in zip(names, dataframes):
        print(f"📋 {name:<20} → {df.shape[0]:>6} rows × {df.shape[1]:>3} columns")
    
    # Check for common columns
    if len(dataframes) > 1:
        all_columns = [set(df.columns) for df in dataframes]
        common_cols = set.intersection(*all_columns)
        
        print(f"\n🔗 Common columns ({len(common_cols)}):")
        for col in sorted(common_cols):
            print(f"   • {col}")

def show_sample_data(df_dict, category_name, n_rows=3):
    """Show sample data from DataFrames in a category"""
    print(f"\n📖 SAMPLE DATA: {category_name}")
    print("=" * 60)
    
    for name, df in df_dict.items():
        print(f"\n🔹 {name} (showing {min(n_rows, len(df))} rows):")
        if len(df) > 0:
            print(df.head(n_rows).to_string())
        else:
            print("   (empty DataFrame)")

def search_columns(pattern, df_dict=None):
    """Search for columns matching a pattern across all DataFrames"""
    if df_dict is None:
        df_dict = {**auxiliary_dataframes, **raw_dataframes, **medium_dataframes}
    
    print(f"\n🔍 SEARCHING COLUMNS: '{pattern}'")
    print("=" * 60)
    
    found = False
    for df_name, df in df_dict.items():
        matching_cols = [col for col in df.columns if pattern.lower() in col.lower()]
        if matching_cols:
            found = True
            print(f"\n📋 {df_name}:")
            for col in matching_cols:
                print(f"   • {col}")
    
    if not found:
        print(f"❌ No columns found matching '{pattern}'")

def df_info():
    """Show information about all available DataFrames"""
    print(f"\n📊 ALL DATAFRAMES INFO")
    print("=" * 70)
    
    categories = [
        ("🗂️ AUXILIARY", auxiliary_dataframes),
        ("📁 RAW", raw_dataframes),
        ("⚙️ MEDIUM", medium_dataframes),
        ("💎 RARE", rare_dataframes),
        ("📊 FORMATTED", formatted_dataframes)
    ]
    
    for category_name, df_dict in categories:
        if df_dict:
            print(f"\n{category_name}:")
            for name, df in df_dict.items():
                memory_mb = df.memory_usage(deep=True).sum() / (1024 * 1024)
                print(f"   📋 {name:<25} → {df.shape[0]:>6} rows × {df.shape[1]:>3} cols ({memory_mb:.1f} MB)")

# =============================================================================
# 9. INSTRUCTIONS AND EXAMPLES
# =============================================================================

print(f"\n🛠️ UTILITY FUNCTIONS AVAILABLE:")
print("=" * 70)
print("🔍 explore_df(dataframe, 'name')              → Detailed DataFrame exploration")
print("🔄 compare_dfs(df1, df2, names=['A', 'B'])    → Compare multiple DataFrames")  
print("📖 show_sample_data(df_dict, 'category', 5)   → Show sample data from category")
print("🔍 search_columns('pattern')                  → Find columns matching pattern")
print("📊 df_info()                                  → Show all DataFrames info")

print(f"\n💡 EXAMPLE USAGE:")
print("=" * 70)
print("# Explore specific DataFrames")
print("explore_df(valid_emp, 'Valid Employees')")
print("explore_df(df_colaborador, 'Employee Details')")
print("")
print("# Compare DataFrames")
print("compare_dfs(df_colaborador, matrizA_bk, names=['Raw', 'Processed'])")
print("")
print("# Show sample data")
print("show_sample_data(raw_dataframes, 'Raw Data', 3)")
print("")
print("# Search for specific columns")
print("search_columns('matricula')")
print("search_columns('data')")
print("")
print("# Access DataFrames directly")
print("valid_emp.head()")
print("df_colaborador.describe()")
print("matrizA_bk.columns")

print(f"\n🎯 DIRECT ACCESS TO PROJECT DATA:")
print("=" * 70)
print("📊 data_model.auxiliary_data    → Dictionary with auxiliary data")
print("📁 data_model.raw_data          → Dictionary with raw DataFrames")  
print("⚙️ data_model.medium_data       → Dictionary with transformed DataFrames")
print("💎 data_model.rare_data         → Dictionary with algorithm results")
print("📋 data_model.formatted_data    → Dictionary with final formatted data")
print("")
print("📊 auxiliary_dataframes         → Easy access to auxiliary DataFrames")
print("📁 raw_dataframes              → Easy access to raw DataFrames")
print("⚙️ medium_dataframes           → Easy access to medium DataFrames")

print(f"\n✨ READY FOR INTERACTIVE DEVELOPMENT!")
print("🔧 All project DataFrames are loaded and available in memory")
print("📝 Use the utility functions above to explore and analyze the data")
print("🚀 Start developing your data transformations!")

2025-06-26 12:48:59,428 |     INFO | Connected to database: oracle+cx_oracle://JOAO_SOARES:5R_}a+2|4DU~d@10.175.28.20:1523/?service_name=WFM_ALCAMPO_TST01


Error in load_matrices_transformations: "None of [Index(['h_tm_in', 'h_seg_in', 'h_ter_in', 'h_qua_in', 'h_qui_in', 'h_sex_in',\n       'h_sab_in', 'h_dom_in', 'h_fer_in'],\n      dtype='object')] are in the [columns]"
Traceback (most recent call last):
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\src\models.py", line 526, in load_estimativas_transformations
    df_turnos['min_in1'] = df_turnos[columns_in].min(axis=1, skipna=True)
                           ~~~~~~~~~^^^^^^^^^^^^
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\.venv\Lib\site-packages\pandas\core\frame.py", line 4108, in __getitem__
    indexer = self.columns._get_indexer_strict(key, "columns")[1]
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\.venv\Lib\site-packages\pandas\core\indexes\base.py", line 6200, in _get_indexer_strict
    self._raise_if_missing(keyarr, indexer, axis_name)
  File "c:\Users\joao.soares\Documents\


🔄 Stage 3: Performing data transformations...
2025-06-26 12:48:59,444 |     INFO | Starting load_ma_bd processing
2025-06-26 12:48:59,458 |    ERROR | Error in load_ma_bd: 'DataFrame' object has no attribute 'str'
Traceback (most recent call last):
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\src\models.py", line 1074, in load_colaborador_transformations
    matriz_ma['emp'] = matriz_ma['matricula'].astype(str).str.zfill(10)  # Adjust padding as needed
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\.venv\Lib\site-packages\pandas\core\generic.py", line 6299, in __getattr__
    return object.__getattribute__(self, name)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'DataFrame' object has no attribute 'str'
2025-06-26 12:48:59,460 |     INFO | Starting load_m2_bd processing
2025-06-26 12:48:59,461 |     INFO | DEBUG: start_date=2025-01-01, end_date=2025-12-31
2025-06-26 12:48:59,461 |

  matrizB_ini.loc[matrizB_ini['data'].isin(special_dates), 'min_turno'] = matrizB_ini['max_turno']
  mask_friday = (matrizB_ini['data'].isin(friday_dates)) & (matrizB_ini['turno'] == 'M')


2025-06-26 12:49:00,098 |     INFO | Columns in matrizA_og after processing: ['fk_colaborador', 'unidade', 'secao', 'posto', 'convenio', 'nome', 'matricula', 'min_dia_trab', 'max_dia_trab', 'tipo_turno', 'seq_turno', 't_total', 'l_total', 'dyf_max_t', 'q', 'c2d', 'c3d', 'cxx', 'semana_1', 'out', 'ciclo', 'data_admissao', 'data_demissao', 'fk_tipo_posto', 'h_tm_in', 'h_tm_out', 'h_tt_in', 'h_tt_out', 'h_seg_in', 'h_seg_out', 'h_ter_in', 'h_ter_out', 'h_qua_in', 'h_qua_out', 'h_qui_in', 'h_qui_out', 'h_sex_in', 'h_sex_out', 'h_sab_in', 'h_sab_out', 'h_dom_in', 'h_dom_out', 'h_fer_in', 'h_fer_out', 'limite_superior_manha', 'limite_inferior_tarde', 'emp', 'lq', 'min', 'max', 'tipo_contrato', 'ld', 'l_dom', 'lq_og', 'total_dom_fes', 'total_fes', 'total_holidays', 'descansos_atrb', 'COLABORADOR', 'LD_at', 'LQ_at', 'LRES_at', 'CXX_at', 'C2D_at', 'C3D_at']
2025-06-26 12:49:09,134 |     INFO | matrizB_m:            data  media_turno  max_turno  min_turno  sd_turno turno  \
0    2025-01-01      

In [8]:
# Debug func_inicializa MatrizB Processing
# This focuses on the specific part where df_estimativas gets processed

def debug_func_inicializa_matrizb(data_model):
    """
    Debug the MatrizB processing in func_inicializa where df_estimativas is handled
    """
    print("🔍 DEBUGGING func_inicializa MatrizB Processing")
    print("=" * 60)
    
    # Get the starting data
    matrizB_og = data_model.raw_data.get('df_estimativas', pd.DataFrame()).copy()
    matriz2_bk = data_model.medium_data.get('matriz2_bk', pd.DataFrame())
    
    print(f"📊 Starting data:")
    print(f"   matrizB_og (df_estimativas): {matrizB_og.shape}")
    print(f"   matriz2_bk: {matriz2_bk.shape}")
    
    if len(matrizB_og) == 0:
        print("❌ matrizB_og is empty - this is the source of the problem!")
        return
    
    print(f"\n📋 matrizB_og columns: {list(matrizB_og.columns)}")
    print(f"📊 matrizB_og sample:")
    print(matrizB_og.head(3))
    
    # Get year from matrizB_og
    if 'data' in matrizB_og.columns:
        ano = pd.to_datetime(matrizB_og['data'].min()).year
        print(f"\n📅 Year from data: {ano}")
        
        # Adjust minTurno for specific dates (this is from the R code)
        special_dates = [f'{ano}-12-23', f'{ano}-12-24', f'{ano}-12-30', f'{ano}-12-31']
        friday_dates = [f'{ano}-12-22', f'{ano}-12-29']
        
        matrizB_ini = matrizB_og.copy()
        
        # Check if the required columns exist
        required_cols = ['min_turno', 'max_turno']
        missing_cols = [col for col in required_cols if col not in matrizB_ini.columns]
        
        if missing_cols:
            print(f"❌ Missing required columns: {missing_cols}")
            print(f"   Available columns: {list(matrizB_ini.columns)}")
            return
        
        # Apply the special date logic
        matrizB_ini.loc[matrizB_ini['data'].isin(special_dates), 'min_turno'] = matrizB_ini['max_turno']
        mask_friday = (matrizB_ini['data'].isin(friday_dates)) & (matrizB_ini['turno'] == 'M')
        matrizB_ini.loc[mask_friday, 'min_turno'] = matrizB_ini.loc[mask_friday, 'max_turno']
        
        print(f"✅ Applied special date adjustments")
        print(f"   matrizB_ini shape after adjustments: {matrizB_ini.shape}")
    else:
        print("❌ 'data' column not found in matrizB_og")
        return
    
    # Now the critical part - creating the +H column
    print(f"\n🔄 Creating +H column from matriz2_bk...")
    
    if len(matriz2_bk) == 0:
        print("❌ matriz2_bk is empty - cannot create +H column!")
        return
    
    print(f"📋 matriz2_bk columns: {list(matriz2_bk.columns)}")
    
    # Check the logic for calculating +H for morning shifts
    print(f"\n🌅 Processing morning shifts...")
    
    trab_manha_data = []
    unique_dates = matriz2_bk['DATA'].unique() if 'DATA' in matriz2_bk.columns else []
    
    print(f"   Found {len(unique_dates)} unique dates in matriz2_bk")
    
    if len(unique_dates) == 0:
        print("❌ No dates found in matriz2_bk DATA column")
        return
    
    # Sample a few dates to check the logic
    sample_dates = unique_dates[:3] if len(unique_dates) >= 3 else unique_dates
    
    for date in sample_dates:
        if date == 'TIPO_DIA':
            continue
            
        day_data = matriz2_bk[(matriz2_bk['DATA'] == date) & 
                            (matriz2_bk['COLABORADOR'] != 'TIPO_DIA')].copy()
        
        print(f"   📅 Date {date}: {len(day_data)} employee records")
        
        if len(day_data) == 0:
            print(f"      ⚠️ No employee data for date {date}")
            continue
        
        # Check the TIPO_TURNO and HORARIO columns
        if 'TIPO_TURNO' in day_data.columns and 'HORARIO' in day_data.columns:
            morning_workers = day_data[
                (day_data['TIPO_TURNO'] == 'M') & 
                (day_data['HORARIO'].str.contains('H|NL', case=False, na=False))
            ]
            print(f"      🌅 Morning workers: {len(morning_workers)}")
        else:
            print(f"      ❌ Missing TIPO_TURNO or HORARIO columns")
    
    # The issue might be in the merge logic
    print(f"\n🔗 Checking merge logic...")
    
    # Check if matrizB_ini has the expected columns for merging
    merge_cols = ['data', 'turno']
    available_merge_cols = [col for col in merge_cols if col in matrizB_ini.columns]
    
    print(f"   Required merge columns: {merge_cols}")
    print(f"   Available in matrizB_ini: {available_merge_cols}")
    
    if len(available_merge_cols) != len(merge_cols):
        print(f"❌ Cannot merge - missing columns in matrizB_ini")
        return
    
    # Test the merge for morning data
    if len(trab_manha_data) == 0:
        # Create at least one sample to test
        trab_manha_data = [{
            'DATA': sample_dates[0] if len(sample_dates) > 0 else '2025-01-01',
            'TURNO': 'M',
            '+H': 5.0
        }]
    
    trab_manha = pd.DataFrame(trab_manha_data)
    print(f"   trab_manha sample: {trab_manha.shape}")
    print(f"   trab_manha columns: {list(trab_manha.columns)}")
    
    # Test merge
    matrizB_m = matrizB_ini[matrizB_ini['turno'] == 'M'].copy()
    print(f"   matrizB morning records: {len(matrizB_m)}")
    
    if len(matrizB_m) > 0:
        try:
            merged = matrizB_m.merge(trab_manha, left_on=['data', 'turno'], 
                                   right_on=['DATA', 'TURNO'], how='left')
            print(f"   ✅ Merge successful: {merged.shape}")
            print(f"   +H column created: {'+H' in merged.columns}")
        except Exception as e:
            print(f"   ❌ Merge failed: {e}")
    
    # Summary
    print(f"\n📋 SUMMARY:")
    print(f"   🔸 matrizB_og (input): {matrizB_og.shape}")
    print(f"   🔸 Expected output should have +H column")
    print(f"   🔸 Issue likely in +H calculation or merge logic")
    
    return matrizB_ini

# Function to test the exact transformation
def test_matrizb_transformation(data_model):
    """
    Test the exact MatrizB transformation to see where it fails
    """
    print("\n🧪 TESTING MatrizB Transformation")
    print("=" * 50)
    
    # Get the raw data
    matrizB_og = data_model.raw_data.get('df_estimativas', pd.DataFrame()).copy()
    
    if len(matrizB_og) == 0:
        print("❌ Cannot test - matrizB_og is empty")
        return
    
    print(f"📊 Starting with: {matrizB_og.shape}")
    
    # Apply the basic transformation steps
    try:
        # Step 1: Convert data types
        numeric_cols = ['max_turno', 'min_turno', 'media_turno', 'sd_turno']
        for col in numeric_cols:
            if col in matrizB_og.columns:
                matrizB_og[col] = pd.to_numeric(matrizB_og[col], errors='coerce')
                print(f"   ✅ Converted {col} to numeric")
            else:
                print(f"   ⚠️ Column {col} not found")
        
        # Step 2: Add +H column (placeholder)
        matrizB_og['+H'] = 0
        print(f"   ✅ Added +H column")
        
        # Step 3: Apply the calculation logic
        param_pess_obj = 0.5
        matrizB_og['aux'] = np.where(
            matrizB_og['media_turno'] != 0,
            matrizB_og['sd_turno'] / matrizB_og['media_turno'],
            0
        )
        
        matrizB_og['pess_obj'] = np.where(
            matrizB_og['aux'] >= param_pess_obj,
            np.ceil(matrizB_og['media_turno']),
            np.round(matrizB_og['media_turno'])
        )
        
        matrizB_og['diff'] = matrizB_og['+H'] - matrizB_og['pess_obj']
        
        print(f"   ✅ Applied calculations")
        print(f"   📊 Final shape: {matrizB_og.shape}")
        
        # Store in medium_data to test
        data_model.medium_data['test_df_estimativas'] = matrizB_og.copy()
        
        print(f"   ✅ Test successful - stored in medium_data['test_df_estimativas']")
        
        return matrizB_og
        
    except Exception as e:
        print(f"   ❌ Test failed: {e}")
        import traceback
        traceback.print_exc()
        return None

# Run the debugging
if 'data_model' in locals():
    result = debug_func_inicializa_matrizb(data_model)
    test_result = test_matrizb_transformation(data_model)
    
    if test_result is not None:
        print(f"\n✅ The transformation CAN work!")
        print(f"   The issue is likely in the +H calculation logic in func_inicializa")
        print(f"   Check the matriz2_bk processing section")
else:
    print("❌ data_model not found. Run the main notebook first.")

🔍 DEBUGGING func_inicializa MatrizB Processing
📊 Starting data:
   matrizB_og (df_estimativas): (730, 8)
   matriz2_bk: (16, 731)

📋 matrizB_og columns: ['data', 'media_turno', 'max_turno', 'min_turno', 'sd_turno', 'turno', 'fk_tipo_posto', 'data_turno']
📊 matrizB_og sample:
        data  media_turno  max_turno  min_turno  sd_turno turno fk_tipo_posto  \
0 2025-01-01          0.0        0.0        0.0       0.0     M          None   
1 2025-01-02          0.0        0.0        0.0       0.0     M          None   
2 2025-01-03          0.0        0.0        0.0       0.0     M          None   

     data_turno  
0  2025-01-01_M  
1  2025-01-02_M  
2  2025-01-03_M  

📅 Year from data: 2025
✅ Applied special date adjustments
   matrizB_ini shape after adjustments: (730, 8)

🔄 Creating +H column from matriz2_bk...
📋 matriz2_bk columns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 4

  matrizB_ini.loc[matrizB_ini['data'].isin(special_dates), 'min_turno'] = matrizB_ini['max_turno']
  mask_friday = (matrizB_ini['data'].isin(friday_dates)) & (matrizB_ini['turno'] == 'M')


In [9]:
df_estima = medium_dataframes["df_estimativas"].copy()

df_estima.head()

Unnamed: 0,data,media_turno,max_turno,min_turno,sd_turno,turno,fk_tipo_posto,data_turno,+H,aux,pess_obj,diff,WDAY
0,2025-01-01,0.0,0.0,1.0,0.0,M,,2025-01-01_M,0.0,0.0,0.0,0.0,3
1,2025-01-02,0.0,0.0,1.0,0.0,M,,2025-01-02_M,0.0,0.0,0.0,0.0,4
2,2025-01-03,0.0,0.0,1.0,0.0,M,,2025-01-03_M,0.0,0.0,0.0,0.0,5
3,2025-01-04,0.0,0.0,1.0,0.0,M,,2025-01-04_M,0.0,0.0,0.0,0.0,6
4,2025-01-05,0.0,0.0,1.0,0.0,M,,2025-01-05_M,0.0,0.0,0.0,0.0,7


In [10]:
df_cal = medium_dataframes["df_calendario"].copy()

df_cal = df_cal[df_cal["COLABORADOR"] == "0005016794"]

df_cal.head()

Unnamed: 0,COLABORADOR,DATA,TIPO_TURNO,HORARIO,WDAY,ID,WW,WD,DIA_TIPO,emp,data_admissao,data_demissao,matricula,tipo_contrato
1,5016794,2025-01-01,-,-,3,1,1,Wed,Sat,5016794,2021-10-14,NaT,,
15,5016794,2025-01-01,-,-,3,15,1,Wed,Mon,5016794,2021-10-14,NaT,,
29,5016794,2025-01-02,-,-,4,29,1,Thu,Wed,5016794,2021-10-14,NaT,,
43,5016794,2025-01-02,-,-,4,43,1,Thu,Fri,5016794,2021-10-14,NaT,,
57,5016794,2025-01-03,-,-,5,57,1,Fri,domYf,5016794,2021-10-14,NaT,,


In [11]:
df_colab = medium_dataframes["df_colaborador"].copy()

df_colab.head()

Unnamed: 0,unidade,secao,posto,fk_colaborador,matricula,out,tipo_contrato,ciclo,l_total,l_dom,...,l_qs,c2d,c3d,cxx,descansos_atrb,lq_og,l_res,vz,l_res2,min_fest_h
0,1015,10150184,VENTA PF,1386,5016794,0,4,0,156.0,40.0,...,0,2.0,0.0,4.0,0,0.0,4.0,47,37.0,0
1,1015,10150184,VENTA PF,1380,155613,0,5,0,104.0,40.0,...,0,2.0,0.0,4.0,0,0.0,4.0,0,32.0,0
2,1015,10150184,VENTA PF,24678,5037932,0,6,0,78.0,40.0,...,0,9.0,2.0,0.0,0,12.0,4.0,0,0.0,0
3,1015,10150184,VENTA PF,25607,5038542,0,6,0,78.0,40.0,...,0,9.0,2.0,0.0,0,12.0,4.0,0,0.0,0
4,1015,10150184,VENTA PF,1382,5003281,0,6,0,78.0,40.0,...,0,9.0,2.0,0.0,0,12.0,4.0,0,0.0,0


In [12]:
from src.algorithms.alcampoAlgorithm import AlcampoAlgorithm

# Test the AlcampoAlgorithm with loaded data

print("🚀 Testing AlcampoAlgorithm with loaded project data")
print("=" * 60)

try:
    # =================================================================
    # 1. INITIALIZE THE ALCAMPO ALGORITHM
    # =================================================================
    print("📝 Initializing AlcampoAlgorithm...")
    
    # Create algorithm instance with default parameters
    alcampo_algorithm = AlcampoAlgorithm()
    
    print("✅ AlcampoAlgorithm initialized successfully")
    print(f"📋 Algorithm name: {alcampo_algorithm.algo_name}")
    print(f"⚙️ Parameters: {alcampo_algorithm.parameters}")
    
    # =================================================================
    # 2. PREPARE INPUT DATA
    # =================================================================
    print("\n📊 Preparing input data...")
    
    # Check available medium dataframes
    print(f"📁 Available medium dataframes: {list(medium_dataframes.keys())}")
    
    # Check data shapes
    for name, df in medium_dataframes.items():
        print(f"   📋 {name}: {df.shape}")
    
    
    print("✅ Input data prepared")
    
    # =================================================================
    # 3. RUN THE FULL ALGORITHM PIPELINE
    # =================================================================
    print("\n🔄 Running full algorithm pipeline...")
    print("   This may take several minutes depending on data size...")
    
    # Run the complete algorithm
    results = alcampo_algorithm.run_full_algorithm(medium_dataframes)
    
    print("🎉 Algorithm execution completed successfully!")
    
    # =================================================================
    # 4. ANALYZE RESULTS
    # =================================================================
    print("\n📊 ALGORITHM RESULTS ANALYSIS")
    print("=" * 60)
    
    # Extract main components
    final_schedule = results['schedule']
    metadata = results['metadata']
    stage1_schedule = results.get('stage1_schedule')
    summary = results['summary']
    
    # Display summary information
    print(f"📋 Status: {summary['status']}")
    print(f"💬 Message: {summary['message']}")
    print(f"🕐 Execution time: {metadata['execution_timestamp']}")
    
    # Schedule statistics
    print(f"\n📊 SCHEDULE STATISTICS:")
    print(f"   👥 Total workers: {metadata['total_workers']}")
    print(f"   📅 Total days: {metadata['total_days']}")
    print(f"   📝 Total assignments: {metadata['total_assignments']}")
    
    # Shift distribution
    print(f"\n📈 SHIFT DISTRIBUTION:")
    shift_dist = metadata['shift_distribution']
    for shift, count in shift_dist.items():
        percentage = (count / metadata['total_assignments']) * 100
        print(f"   {shift}: {count:>4} assignments ({percentage:5.1f}%)")
    
    # =================================================================
    # 5. EXAMINE THE FINAL SCHEDULE
    # =================================================================
    print(f"\n📋 FINAL SCHEDULE SAMPLE:")
    print("=" * 60)
    
    if final_schedule is not None and not final_schedule.empty:
        print(f"📏 Schedule shape: {final_schedule.shape}")
        print(f"📋 Columns: {list(final_schedule.columns)}")
        print(f"\n🔍 First 10 rows:")
        print(final_schedule.head(10).to_string())
        
        # Check for any specific patterns
        if 'Worker' in final_schedule.columns:
            unique_workers = final_schedule['Worker'].unique()
            print(f"\n👥 Workers in schedule: {len(unique_workers)}")
            print(f"   Workers: {sorted(unique_workers)}")
        
        if 'Day' in final_schedule.columns:
            unique_days = final_schedule['Day'].unique()
            print(f"\n📅 Days in schedule: {len(unique_days)}")
            print(f"   Date range: {min(unique_days)} to {max(unique_days)}")
    
    # =================================================================
    # 6. COMPARE STAGE 1 AND FINAL RESULTS (if available)
    # =================================================================
    if stage1_schedule is not None:
        print(f"\n🔄 STAGE COMPARISON:")
        print("=" * 60)
        print(f"📊 Stage 1 schedule: {stage1_schedule.shape}")
        print(f"📊 Final schedule: {final_schedule.shape}")
        
        # Compare shift distributions if possible
        if 'Shift' in stage1_schedule.columns and 'Shift' in final_schedule.columns:
            stage1_shifts = stage1_schedule['Shift'].value_counts()
            final_shifts = final_schedule['Shift'].value_counts()
            
            print(f"\n📈 Shift changes from Stage 1 to Final:")
            all_shifts = set(stage1_shifts.index) | set(final_shifts.index)
            for shift in sorted(all_shifts):
                stage1_count = stage1_shifts.get(shift, 0)
                final_count = final_shifts.get(shift, 0)
                change = final_count - stage1_count
                change_str = f"({change:+d})" if change != 0 else ""
                print(f"   {shift}: {stage1_count} → {final_count} {change_str}")
    
    # =================================================================
    # 7. SAVE RESULTS FOR FURTHER ANALYSIS
    # =================================================================
    print(f"\n💾 STORING RESULTS IN VARIABLES:")
    print("=" * 60)
    
    # Make results accessible in the notebook
    alcampo_results = results
    alcampo_schedule = final_schedule
    alcampo_metadata = metadata
    
    print("✅ Results stored in variables:")
    print("   📊 alcampo_results   → Complete results dictionary")
    print("   📋 alcampo_schedule  → Final schedule DataFrame")
    print("   ⚙️ alcampo_metadata  → Algorithm metadata")
    
    print(f"\n🎯 QUICK ACCESS EXAMPLES:")
    print("   alcampo_schedule.head()                    → View schedule")
    print("   alcampo_schedule['Worker'].unique()        → List workers")
    print("   alcampo_schedule['Shift'].value_counts()   → Count shifts")
    print("   explore_df(alcampo_schedule, 'Final Schedule')  → Detailed analysis")

except Exception as e:
    print(f"❌ Error running algorithm: {e}")
    print(f"📜 Error details: {type(e).__name__}: {str(e)}")
    
    # Print more detailed error information if available
    import traceback
    print(f"\n📋 Full traceback:")
    traceback.print_exc()
    
    # Check if partial results are available
    if 'alcampo_algorithm' in locals():
        print(f"\n🔍 Checking algorithm state:")
        print(f"   Data processed: {hasattr(alcampo_algorithm, 'data_processed') and alcampo_algorithm.data_processed is not None}")
        print(f"   Stage 1 model: {hasattr(alcampo_algorithm, 'model_stage1') and alcampo_algorithm.model_stage1 is not None}")
        print(f"   Stage 1 schedule: {hasattr(alcampo_algorithm, 'schedule_stage1') and alcampo_algorithm.schedule_stage1 is not None}")

print(f"\n✨ Algorithm testing completed!")

🚀 Testing AlcampoAlgorithm with loaded project data
📝 Initializing AlcampoAlgorithm...
2025-06-26 12:49:09,421 |     INFO | Logger initialized for base_data_project
2025-06-26 12:49:09,423 |     INFO | Initialized algorithm: alcampo_algorithm
2025-06-26 12:49:09,426 |     INFO | Initialized alcampo_algorithm with parameters: {'shifts': ['M', 'T', 'L', 'LQ', 'F', 'V', 'LD', 'A', 'TC'], 'check_shifts': ['M', 'T', 'L', 'LQ', 'LD', 'TC'], 'check_shift_special': ['M', 'T', 'L', 'TC'], 'working_shifts': ['M', 'T', 'TC'], 'max_continuous_working_days': 10, 'settings': {'F_special_day': False, 'free_sundays_plus_c2d': False, 'missing_days_afect_free_days': False}}
✅ AlcampoAlgorithm initialized successfully
📋 Algorithm name: alcampo_algorithm
⚙️ Parameters: {'shifts': ['M', 'T', 'L', 'LQ', 'F', 'V', 'LD', 'A', 'TC'], 'check_shifts': ['M', 'T', 'L', 'LQ', 'LD', 'TC'], 'check_shift_special': ['M', 'T', 'L', 'TC'], 'working_shifts': ['M', 'T', 'TC'], 'max_continuous_working_days': 10, 'settings':

--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\joao.soares\AppData\Local\Programs\Python\Python311\Lib\logging\__init__.py", line 1113, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\joao.soares\AppData\Local\Programs\Python\Python311\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 59: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\.venv\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\.venv\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
  

2025-06-26 12:49:12,020 |     INFO | Processing estimativas data with 730 records
2025-06-26 12:49:12,020 |     INFO |   - pess_obj: 1092 entries
2025-06-26 12:49:12,020 |     INFO |   - min_workers: 728 entries
2025-06-26 12:49:12,020 |     INFO |   - max_workers: 728 entries
2025-06-26 12:49:12,020 |     INFO | Setting up additional worker assignments
2025-06-26 12:49:26,791 |     INFO | ✅ Data processing completed successfully
2025-06-26 12:49:26,801 |     INFO | Unpacking processed data
2025-06-26 12:49:26,802 |     INFO | Worker 155540, working days: set(), special days: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 19, 26, 33, 40, 47, 54, 61, 68, 75, 82, 89, 96, 103, 110, 117, 124, 131, 138, 145, 152, 159, 166, 173, 180, 187, 194, 201, 208, 215, 222, 229, 236, 243, 250, 257, 264, 271, 278, 285, 292, 299, 306, 313, 320, 327, 334, 341, 348, 355, 362]
2025-06-26 12:49:26,803 |     INFO | Worker 155550, working days: set(), special days: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 19, 26, 33, 

--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\joao.soares\AppData\Local\Programs\Python\Python311\Lib\logging\__init__.py", line 1113, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\joao.soares\AppData\Local\Programs\Python\Python311\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 60: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\.venv\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\.venv\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
  

2025-06-26 12:49:27,231 |     INFO | Decision variables created for Stage 1
days_of_year: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 20

--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\joao.soares\AppData\Local\Programs\Python\Python311\Lib\logging\__init__.py", line 1113, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\joao.soares\AppData\Local\Programs\Python\Python311\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 52: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\.venv\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\.venv\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
  

2025-06-26 12:49:28,600 |     INFO | Solver progress:   6.78e-03s  0.00e+00d  [DetectDominanceRelations] 
2025-06-26 12:49:28,600 |     INFO | Solver progress:   6.10e-02s  0.00e+00d  [operations_research::sat::CpModelPresolver::PresolveToFixPoint] #num_loops=1 #num_dual_strengthening=1 
2025-06-26 12:49:28,600 |     INFO | Solver progress:   2.01e-04s  0.00e+00d  [operations_research::sat::CpModelPresolver::ExtractEncodingFromLinear] 
2025-06-26 12:49:28,600 |     INFO | Solver progress:   2.15e-04s  0.00e+00d  [operations_research::sat::CpModelPresolver::DetectDuplicateColumns] 
2025-06-26 12:49:28,600 |     INFO | Solver progress:   1.51e-04s  0.00e+00d  [operations_research::sat::CpModelPresolver::DetectDuplicateConstraints] 
2025-06-26 12:49:28,621 |     INFO | Solver progress: [Symmetry] Graph for symmetry has 45'500 nodes and 0 arcs.
2025-06-26 12:49:28,621 |     INFO | Solver progress: [Symmetry] Symmetry computation done. time: 0.0037455 dtime: 0.00273
2025-06-26 12:49:28,646 

--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\joao.soares\AppData\Local\Programs\Python\Python311\Lib\logging\__init__.py", line 1113, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\joao.soares\AppData\Local\Programs\Python\Python311\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 53: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\.venv\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\.venv\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
  

2025-06-26 12:49:31,617 |    ERROR | Error in algorithm execution: cannot access local variable 'd' where it is not associated with a value
Traceback (most recent call last):
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\src\algorithms\alcampoAlgorithm.py", line 341, in execute_algorithm
    self._apply_stage2_constraints(
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\src\algorithms\alcampoAlgorithm.py", line 469, in _apply_stage2_constraints
    day3_quality_weekend(new_model, new_shift, workers, working_days, start_weekday,
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\src\algorithms\shift_scheduler\model\constraints\alcampo_constraints.py", line 767, in day3_quality_weekend
    for d in working_days[w] or d in closed_holidays:
                                ^
UnboundLocalError: cannot access local variable 'd' where it is not associated with a value
❌ Error running algorithm: cannot access local variable 'd' where it is not associated with a v

Traceback (most recent call last):
  File "C:\Users\joao.soares\AppData\Local\Temp\ipykernel_18536\547664014.py", line 43, in <module>
    results = alcampo_algorithm.run_full_algorithm(medium_dataframes)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\src\algorithms\alcampoAlgorithm.py", line 542, in run_full_algorithm
    results = self.execute_algorithm(adapted_data)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\src\algorithms\alcampoAlgorithm.py", line 341, in execute_algorithm
    self._apply_stage2_constraints(
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\src\algorithms\alcampoAlgorithm.py", line 469, in _apply_stage2_constraints
    day3_quality_weekend(new_model, new_shift, workers, working_days, start_weekday,
  File "c:\Users\joao.soares\Documents\GitHub\algortimo-gd\src\algorithms\shift_scheduler\model\constraints\alca

In [13]:
df_colab = medium_dataframes["df_colaborador"].copy()

df_colab.head(20)

Unnamed: 0,unidade,secao,posto,fk_colaborador,matricula,out,tipo_contrato,ciclo,l_total,l_dom,...,l_qs,c2d,c3d,cxx,descansos_atrb,lq_og,l_res,vz,l_res2,min_fest_h
0,1015,10150184,VENTA PF,1386,5016794,0,4,0,156.0,40.0,...,0,2.0,0.0,4.0,0,0.0,4.0,47,37.0,0
1,1015,10150184,VENTA PF,1380,155613,0,5,0,104.0,40.0,...,0,2.0,0.0,4.0,0,0.0,4.0,0,32.0,0
2,1015,10150184,VENTA PF,24678,5037932,0,6,0,78.0,40.0,...,0,9.0,2.0,0.0,0,12.0,4.0,0,0.0,0
3,1015,10150184,VENTA PF,25607,5038542,0,6,0,78.0,40.0,...,0,9.0,2.0,0.0,0,12.0,4.0,0,0.0,0
4,1015,10150184,VENTA PF,1382,5003281,0,6,0,78.0,40.0,...,0,9.0,2.0,0.0,0,12.0,4.0,0,0.0,0
5,1015,10150184,VENTA PF,1379,155612,0,6,0,78.0,29.0,...,0,9.0,2.0,0.0,0,12.0,4.0,0,0.0,0
6,1015,10150184,VENTA PF,1377,155550,0,6,0,78.0,40.0,...,0,9.0,2.0,0.0,0,12.0,4.0,0,0.0,0
7,1015,10150184,VENTA PF,23958,5036116,0,6,0,78.0,40.0,...,0,9.0,2.0,2.0,0,12.0,2.0,0,0.0,0
8,1015,10150184,VENTA PF,1376,155540,0,6,0,78.0,40.0,...,0,9.0,2.0,0.0,0,12.0,4.0,0,0.0,0
9,1015,10150184,VENTA PF,34896,5039237,0,6,0,73.0,45.0,...,0,6.0,1.0,0.0,0,7.0,4.0,0,0.0,0


In [14]:
df_estima = medium_dataframes["df_estimativas"].copy()

df_estima.head()

Unnamed: 0,data,media_turno,max_turno,min_turno,sd_turno,turno,fk_tipo_posto,data_turno,+H,aux,pess_obj,diff,WDAY
0,2025-01-01,0.0,0.0,1.0,0.0,M,,2025-01-01_M,0.0,0.0,0.0,0.0,3
1,2025-01-02,0.0,0.0,1.0,0.0,M,,2025-01-02_M,0.0,0.0,0.0,0.0,4
2,2025-01-03,0.0,0.0,1.0,0.0,M,,2025-01-03_M,0.0,0.0,0.0,0.0,5
3,2025-01-04,0.0,0.0,1.0,0.0,M,,2025-01-04_M,0.0,0.0,0.0,0.0,6
4,2025-01-05,0.0,0.0,1.0,0.0,M,,2025-01-05_M,0.0,0.0,0.0,0.0,7
