In [1]:
# Interactive Development Notebook - Algoritmo GD Project
# Load real project data and keep DataFrames in memory for development

import sys
import os
from pathlib import Path
import pandas as pd
import numpy as np
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional, Tuple
import warnings

# Add project root to path so we can import from src/
project_root = Path.cwd()
if 'src' not in sys.path:
    sys.path.insert(0, str(project_root))

print("üöÄ Interactive Development Environment - Algoritmo GD Project")
print("=" * 70)

# =============================================================================
# 1. IMPORT PROJECT MODULES AND CONFIGURATION
# =============================================================================

try:
    # Import project configuration and modules
    from src.config import CONFIG, PROJECT_NAME
    from src.models import DescansosDataModel
    from base_data_project.utils import create_components
    from base_data_project.log_config import setup_logger
    from base_data_project.storage.containers import CSVDataContainer, DBDataContainer
    
    print("‚úÖ Project modules imported successfully")
    print(f"üìÅ Project: {PROJECT_NAME}")
    print(f"üóÇÔ∏è  Root directory: {project_root}")
    
except ImportError as e:
    print(f"‚ùå Error importing project modules: {e}")
    print("Make sure you're running this notebook from the project root directory")
    raise

# Configure logging
logger = setup_logger(PROJECT_NAME, log_level=logging.INFO)

# =============================================================================
# 2. CONFIGURATION AND EXTERNAL DATA SETUP
# =============================================================================

print("\nüìã Setting up configuration and external data...")

# Use the real project configuration
use_db = True  # Set to True if you want to use database, False for CSV
external_call_data = CONFIG.get('external_call_data', {
    'current_process_id': 2637,
    'api_proc_id': 999,
    'wfm_proc_id': 2637,
    'wfm_user': 'WFM',
    'start_date': '2025-01-01',
    'end_date': '2025-12-31',
    'wfm_proc_colab': None,
})

print(f"üìä Data source: {'Database' if use_db else 'CSV files'}")
print(f"üìÖ Date range: {external_call_data['start_date']} to {external_call_data['end_date']}")
print(f"üî¢ Process ID: {external_call_data['current_process_id']}")

# =============================================================================
# 3. INITIALIZE DATA MANAGER AND COMPONENTS
# =============================================================================

print("\nüîß Initializing data manager and components...")

try:
    # Create data manager using the project's utility function
    data_manager, process_manager = create_components(
        use_db=use_db, 
        no_tracking=True,  # Disable tracking for development
        config=CONFIG,
        project_name=PROJECT_NAME  # Pass project name explicitly
    )
    print("‚úÖ Data manager created successfully")
    
except Exception as e:
    print(f"‚ùå Error creating data manager: {e}")
    raise

# =============================================================================
# 4. LOAD PROJECT DATA INTO MEMORY
# =============================================================================

print("\nüìä Loading project data into memory...")

# Create the appropriate data container based on configuration
if use_db:
    data_container = DBDataContainer(
        project_name=PROJECT_NAME,
        config=CONFIG
    )
else:
    data_container = CSVDataContainer(
        project_name=PROJECT_NAME,
        config=CONFIG
    )

# Initialize the data model with real project structure and data container
data_model = DescansosDataModel(
    project_name=PROJECT_NAME, 
    external_data=external_call_data,
    data_container=data_container
)

print("‚úÖ Data model initialized")

# Context manager for data manager connection
with data_manager:
    
    # =============================================================================
    # 4.1 LOAD PROCESS DATA (Stage 1)
    # =============================================================================
    
    print("\nüîÑ Stage 1: Loading process data...")
    
    try:
        # Get entities to load from configuration
        entities_dict = CONFIG.get('available_entities_processing', {})
        
        success = data_model.load_process_data(data_manager, entities_dict)
        
        if success:
            print("‚úÖ Process data loaded successfully")
            print(f"   üìã Valid employees: {len(data_model.auxiliary_data.get('valid_emp', []))} records")
            print(f"   üè¢ Unit ID: {data_model.auxiliary_data.get('unit_id')}")
            print(f"   üè≠ Section ID: {data_model.auxiliary_data.get('secao_id')}")
            print(f"   üë§ Position IDs: {data_model.auxiliary_data.get('posto_id_list')}")
        else:
            print("‚ùå Failed to load process data")
            
    except Exception as e:
        print(f"‚ùå Error in Stage 1: {e}")
        logger.error(f"Stage 1 error: {e}", exc_info=True)
    
    # =============================================================================
    # 4.2 LOAD DETAILED DATA FOR EACH POSITION (Stage 2)
    # =============================================================================
    
    print("\nüîÑ Stage 2: Loading detailed data for positions...")
    
    posto_id_list = data_model.auxiliary_data.get('posto_id_list', [])

    
    if posto_id_list:
        # Process first position as example (you can modify this)
        posto_id = posto_id_list[0]
        print(f"üìç Processing position ID: {posto_id}")
        
        try:
            # Load colaborador info
            success = data_model.load_colaborador_info(data_manager, posto_id)
            if success:
                print(f"   ‚úÖ Colaborador info loaded")
                df_colaborador = data_model.raw_data.get('df_colaborador')
                if df_colaborador is not None:
                    print(f"      üìä {len(df_colaborador)} employee records")
            
            # Load estimativas info  
            success = data_model.load_estimativas_info(
                data_manager, 
                posto_id, 
                external_call_data['start_date'], 
                external_call_data['end_date']
            )
            if success:
                print(f"   ‚úÖ Estimativas info loaded")
                df_estimativas = data_model.raw_data.get('df_estimativas')
                if df_estimativas is not None:
                    print(f"      üìà {len(df_estimativas)} estimate records")
            
            # Load calendario info
            success = data_model.load_calendario_info(
                data_manager,
                external_call_data['current_process_id'],
                posto_id,
                external_call_data['start_date'],
                external_call_data['end_date']
            )
            if success:
                print(f"   ‚úÖ Calendario info loaded")
                df_calendario = data_model.raw_data.get('df_calendario')
                if df_calendario is not None:
                    print(f"      üìÖ Calendar matrix: {df_calendario.shape}")
            
        except Exception as e:
            print(f"   ‚ùå Error loading data for position {posto_id}: {e}")
            logger.error(f"Position {posto_id} error: {e}", exc_info=True)
    


üöÄ Interactive Development Environment - Algoritmo GD Project
2025-07-31 17:02:25,181 |     INFO | Logger initialized for algoritmo_GD
‚úÖ Project modules imported successfully
üìÅ Project: algoritmo_GD
üóÇÔ∏è  Root directory: c:\ALCAMPO\python-algorithms\algortimo-gd

üìã Setting up configuration and external data...
üìä Data source: Database
üìÖ Date range: 2025-01-01 to 2025-12-31
üî¢ Process ID: 2637

üîß Initializing data manager and components...
Creating components for project: algoritmo_GD
2025-07-31 17:02:26,302 |     INFO | Data manager for 'db' not registered, trying built-in managers
2025-07-31 17:02:26,303 |     INFO | Initialized BaseDataManager
‚úÖ Data manager created successfully

üìä Loading project data into memory...
2025-07-31 17:02:26,308 |     INFO | Initialized DBDataContainer
2025-07-31 17:02:26,310 |     INFO | Initializing database data container with URL: oracle+cx_oracle://EDUARDO_DIREITO:52CV3cP[z8s}@10.175.23.29:1526/?service_name=stardard_pdb1.

In [2]:
with data_manager:
    # =============================================================================
    # 4.3 PERFORM DATA TRANSFORMATIONS (Stage 3)
    # =============================================================================
    
    print("\nüîÑ Stage 3: Performing data transformations...")
    
    try:
        #print("--------------------------------")
        # print("Pre-tratamento")
        # print(f"df_granularidade: {data_model.auxiliary_data['df_granularidade']}")
        # print(f"df_faixa_horario: {data_model.auxiliary_data['df_faixa_horario']}")
        # print(f"df_feriados: {data_model.auxiliary_data['df_feriados']}")
        # print(f"df_estrutura_wfm: {data_model.auxiliary_data['df_estrutura_wfm']}")
        # print(f"df_estimativas_raw: {data_model.raw_data['df_estimativas']}")
        # print("--------------------------------")
        # Load estimativas transformations
        success = data_model.load_estimativas_transformations()
        if success:
            print("   ‚úÖ Estimativas transformations completed")
        
        # Load colaborador transformations  
        success = data_model.load_colaborador_transformations()
        if success:
            print("   ‚úÖ Colaborador transformations completed")
        
        # Load calendario transformations
        success = data_model.load_calendario_transformations()
        if success:
            print("   ‚úÖ Calendario transformations completed")
        
        # Store matriz2_bk before func_inicializa
        data_model.medium_data['matriz2_bk'] = data_model.raw_data['df_calendario'].copy()
        
        # Debug: Print matriz2_bk info
        matriz2_bk = data_model.medium_data['matriz2_bk']
        print(f"\nüîç Debug matriz2_bk before func_inicializa:")
        print(f"   Shape: {matriz2_bk.shape}")
        print(f"   First few rows:\n{matriz2_bk.head()}")
        print(f"   Columns: {matriz2_bk.columns.tolist()}")

        # print("--------------------------------")
        # print("Pos-tratamento")
        # print(f"df_granularidade: {data_model.auxiliary_data['df_granularidade']}")
        # print(f"df_estimativas_raw: {data_model.raw_data['df_estimativas']}")
        # print("--------------------------------")

        # Debug: Print raw_data['df_estimativas'] before func_inicializa
        print("\nüîç Debug raw_data['df_estimativas'] before func_inicializa:")
        df_est = data_model.raw_data['df_estimativas']
        print(f"   Shape: {df_est.shape}")
        print(f"   Columns: {df_est.columns.tolist()}")
        print(f"   First few rows:\n{df_est.head()}")
    except Exception as e:
        print(f"   ‚ùå Error in transformations: {e}")
        logger.error(f"Transformation error: {e}", exc_info=True)

print("\nüéâ Data loading completed!")

2025-07-31 17:03:11,943 |     INFO | Connected to database: oracle+cx_oracle://EDUARDO_DIREITO:52CV3cP[z8s}@10.175.23.29:1526/?service_name=stardard_pdb1.database.backbone.oraclevcn.com

üîÑ Stage 3: Performing data transformations...
2025-07-31 17:03:11,946 |     INFO | Starting load_estimativas_transformations
2025-07-31 17:03:11,946 |     INFO | Extracting parameters from auxiliary_data and external_data
2025-07-31 17:03:11,947 |     INFO | Parameters extracted - start_date: 2025-01-01, end_date: 2025-12-31, fk_unidade: 2112, fk_secao: 49, fk_tipo_posto: None
2025-07-31 17:03:11,948 |     INFO | Loading DataFrames from existing data
2025-07-31 17:03:11,951 |     INFO | DataFrames loaded - df_turnos: (14, 25), df_estrutura_wfm: (360, 6), df_faixa_horario: (123, 19), df_feriados: (78, 8), df_orcamento: (20805, 11)
2025-07-31 17:03:11,952 |     INFO | Processing df_turnos data
2025-07-31 17:03:11,954 |     INFO | Filtered df_turnos by fk_tipo_posto None: (0, 25)
2025-07-31 17:03:11,95

  output_final = output_final.fillna(0)
  matriz_ma[non_date_columns] = matriz_ma[non_date_columns].fillna(0)


2025-07-31 17:03:42,646 |     INFO | DEBUG: reshaped_final_3 after insert_feriados:          0           1           2           3           4           5    \
0        Dia  2025-01-01  2025-01-01  2025-01-02  2025-01-02  2025-01-03   
1   TIPO_DIA           F           F           -           -           -   
2      TURNO           M           T           M           T           M   
3   80000951           F           F           L           L           L   
4   80001012           F           F           L           L           L   
5   80001134           F           F           L           L           L   
6    0000003           F           F         MoT         MoT         MoT   
7   80001578           F           F         MoT         MoT         MoT   
8   80001237           F           F         MoT         MoT         MoT   
9   80001489           F           F         MoT         MoT         MoT   
10  80001586           F           F         MoT         MoT         MoT   
11  

In [3]:
with data_manager:
    try: 
        # Perform func_inicializa
        success = data_model.func_inicializa(
            start_date=external_call_data['start_date'],
            end_date=external_call_data['end_date'],
            fer=data_model.auxiliary_data.get('df_festivos'),
        )
        if success:
            print("   ‚úÖ func_inicializa completed")
            
            # Debug: Print medium_data['df_estimativas'] after func_inicializa
            print("\nüîç Debug medium_data['df_estimativas'] after func_inicializa:")
            df_est = data_model.medium_data['df_estimativas']
            print(f"   Shape: {df_est.shape}")
            print(f"   Columns: {df_est.columns.tolist()}")
            print(f"   First few rows:\n{df_est.head()}")
            
    except Exception as e:
        print(f"   ‚ùå Error in transformations: {e}")
        logger.error(f"Transformation error: {e}", exc_info=True)

print("\nüéâ Data loading completed!")


2025-07-31 17:04:00,790 |     INFO | Connected to database: oracle+cx_oracle://EDUARDO_DIREITO:52CV3cP[z8s}@10.175.23.29:1526/?service_name=stardard_pdb1.database.backbone.oraclevcn.com
2025-07-31 17:04:00,793 |     INFO | Starting func_inicializa processing
2025-07-31 17:04:00,795 |     INFO | Importing required libraries
2025-07-31 17:04:00,796 |     INFO | Libraries imported successfully
2025-07-31 17:04:00,800 |     INFO | Validating input parameters
2025-07-31 17:04:00,802 |     INFO | Input parameters validated - start_date: 2025-01-01, end_date: 2025-12-31, fer: <class 'pandas.core.frame.DataFrame'>
2025-07-31 17:04:00,803 |     INFO | Loading matrices from existing data
2025-07-31 17:04:00,814 |     INFO | Matrices loaded - matriz2_og (columns: 17, rows: 731), matrizB_og ( columns: 730, rows: 8), matrizA_og ( columns: 14, rows: 52)
2025-07-31 17:04:00,815 |     INFO | Debugging matrizB_og (df_estimativas)
2025-07-31 17:04:00,816 |     INFO | === Debug matrizB_og (df_estimativas

  matrizB_ini.loc[matrizB_ini['data'].isin(special_dates), 'min_turno'] = matrizB_ini['max_turno']
  mask_friday = (matrizB_ini['data'].isin(friday_dates)) & (matrizB_ini['turno'] == 'M')


2025-07-31 17:04:02,250 |     INFO | DEBUG: matriz2 after filter by dismissal date:
       COLABORADOR       DATA TIPO_TURNO HORARIO  WDAY     ID  WW   WD  \
10920    80001630 2025-12-16          M       H     2  10920  51  Tue   
10921    80001630 2025-12-16          T       H     2  10921  51  Tue   
10922    80001630 2025-12-17          M       H     3  10922  51  Wed   
10923    80001630 2025-12-17          T       H     3  10923  51  Wed   
10924    80001630 2025-12-18          M       H     4  10924  51  Thu   
10925    80001630 2025-12-18          T       H     4  10925  51  Thu   
10926    80001630 2025-12-19          M       H     5  10926  51  Fri   
10927    80001630 2025-12-19          T       H     5  10927  51  Fri   
10928    80001630 2025-12-20          M       H     6  10928  51  Sat   
10929    80001630 2025-12-20          T       H     6  10929  51  Sat   
10930    80001630 2025-12-21          M       H     7  10930  51  Sun   
10931    80001630 2025-12-21          T

In [4]:

print(data_model.medium_data)

{'df_calendario': None, 'df_colaborador': None, 'df_estimativas': None, 'matriz2_bk':          0           1           2           3           4           5    \
0        Dia  2025-01-01  2025-01-01  2025-01-02  2025-01-02  2025-01-03   
1   TIPO_DIA           F           F           -           -           -   
2      TURNO           M           T           M           T           M   
3   80000951           F           F           L           L           L   
4   80001012           F           F           L           L           L   
5   80001134           F           F           L           L           L   
6    0000003           F           F         MoT         MoT         MoT   
7   80001578           F           F         MoT         MoT         MoT   
8   80001237           F           F         MoT         MoT         MoT   
9   80001489           F           F         MoT         MoT         MoT   
10  80001586           F           F         MoT         MoT         MoT   
11

In [None]:

# =============================================================================
# 5. ORGANIZE DATAFRAMES FOR EASY ACCESS
# =============================================================================

print("\nüìä Organizing DataFrames for interactive access...")

# Extract all DataFrames from the data model
auxiliary_dataframes = {}
raw_dataframes = {}
medium_dataframes = {}
rare_dataframes = {}
formatted_dataframes = {}

# Auxiliary data
for key, value in data_model.auxiliary_data.items():
    if isinstance(value, pd.DataFrame):
        auxiliary_dataframes[key] = value

# Raw data  
for key, value in data_model.raw_data.items():
    if isinstance(value, pd.DataFrame):
        raw_dataframes[key] = value

# Medium data (transformed)
for key, value in data_model.medium_data.items():
    if isinstance(value, pd.DataFrame):
        medium_dataframes[key] = value

# Rare data (algorithm results)
for key, value in data_model.rare_data.items():
    if isinstance(value, pd.DataFrame):
        rare_dataframes[key] = value

# Formatted data (final output)
for key, value in data_model.formatted_data.items():
    if isinstance(value, pd.DataFrame):
        formatted_dataframes[key] = value

# =============================================================================
# 6. DISPLAY AVAILABLE DATAFRAMES
# =============================================================================

print("\nüìã AVAILABLE DATAFRAMES")
print("=" * 70)

all_dataframes = {
    "üóÇÔ∏è AUXILIARY": auxiliary_dataframes,
    "üìÅ RAW": raw_dataframes, 
    "‚öôÔ∏è MEDIUM (Transformed)": medium_dataframes,
    "üíé RARE (Algorithm Results)": rare_dataframes,
    "üìä FORMATTED (Final)": formatted_dataframes
}

for category, dataframes in all_dataframes.items():
    if dataframes:
        print(f"\n{category}:")
        for name, df in dataframes.items():
            print(f"   üìã {name:<25} ‚Üí {df.shape[0]:>6} rows √ó {df.shape[1]:>3} columns")
    else:
        print(f"\n{category}: (no DataFrames yet)")

# =============================================================================
# 7. QUICK ACCESS VARIABLES AND UTILITY FUNCTIONS
# =============================================================================

print(f"\nüîó QUICK ACCESS VARIABLES")
print("=" * 70)

# Make key DataFrames easily accessible with simple variable names
try:
    if 'valid_emp' in auxiliary_dataframes:
        valid_emp = auxiliary_dataframes['valid_emp']
        print(f"‚úÖ valid_emp           ‚Üí {valid_emp.shape}")
    
    if 'df_colaborador' in raw_dataframes:
        df_colaborador = raw_dataframes['df_colaborador']
        print(f"‚úÖ df_colaborador      ‚Üí {df_colaborador.shape}")
    
    if 'df_estimativas' in raw_dataframes:
        df_estimativas = raw_dataframes['df_estimativas']
        print(f"‚úÖ df_estimativas      ‚Üí {df_estimativas.shape}")
    
    if 'df_calendario' in raw_dataframes:
        df_calendario = raw_dataframes['df_calendario']
        print(f"‚úÖ df_calendario       ‚Üí {df_calendario.shape}")
    
    if 'matrizA_bk' in medium_dataframes:
        matrizA_bk = medium_dataframes['matrizA_bk']
        print(f"‚úÖ matrizA_bk          ‚Üí {matrizA_bk.shape}")
    
    if 'matriz2_bk' in medium_dataframes:
        matriz2_bk = medium_dataframes['matriz2_bk']
        print(f"‚úÖ matriz2_bk          ‚Üí {matriz2_bk.shape}")
    
    if 'matrizB_bk' in medium_dataframes:
        matrizB_bk = medium_dataframes['matrizB_bk']
        print(f"‚úÖ matrizB_bk          ‚Üí {matrizB_bk.shape}")
        
except Exception as e:
    print(f"‚ö†Ô∏è Some DataFrames may not be available yet: {e}")

# =============================================================================
# 8. UTILITY FUNCTIONS FOR DATA EXPLORATION
# =============================================================================

def explore_df(df, name="DataFrame"):
    """Explore a DataFrame with detailed information"""
    print(f"\nüîç EXPLORING: {name}")
    print("=" * 60)
    print(f"üìè Shape: {df.shape[0]} rows √ó {df.shape[1]} columns")
    print(f"üíæ Memory usage: {df.memory_usage(deep=True).sum() / 1024:.1f} KB")
    
    print(f"\nüìã Columns ({len(df.columns)}):")
    for i, col in enumerate(df.columns):
        dtype = df[col].dtype
        null_count = df[col].isnull().sum()
        print(f"   {i+1:2d}. {col:<20} ({dtype}) - {null_count} nulls")
    
    print(f"\nüìä First 3 rows:")
    print(df.head(3).to_string())
    
    # Numeric summary
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    if len(numeric_cols) > 0:
        print(f"\nüìà Numeric columns summary:")
        print(df[numeric_cols].describe())
    
    return df

def compare_dfs(*dataframes, names=None):
    """Compare multiple DataFrames"""
    if names is None:
        names = [f"DataFrame_{i+1}" for i in range(len(dataframes))]
    
    print(f"\nüîÑ COMPARING DATAFRAMES")
    print("=" * 60)
    
    for name, df in zip(names, dataframes):
        print(f"üìã {name:<20} ‚Üí {df.shape[0]:>6} rows √ó {df.shape[1]:>3} columns")
    
    # Check for common columns
    if len(dataframes) > 1:
        all_columns = [set(df.columns) for df in dataframes]
        common_cols = set.intersection(*all_columns)
        
        print(f"\nüîó Common columns ({len(common_cols)}):")
        for col in sorted(common_cols):
            print(f"   ‚Ä¢ {col}")

def show_sample_data(df_dict, category_name, n_rows=3):
    """Show sample data from DataFrames in a category"""
    print(f"\nüìñ SAMPLE DATA: {category_name}")
    print("=" * 60)
    
    for name, df in df_dict.items():
        print(f"\nüîπ {name} (showing {min(n_rows, len(df))} rows):")
        if len(df) > 0:
            print(df.head(n_rows).to_string())
        else:
            print("   (empty DataFrame)")

def search_columns(pattern, df_dict=None):
    """Search for columns matching a pattern across all DataFrames"""
    if df_dict is None:
        df_dict = {**auxiliary_dataframes, **raw_dataframes, **medium_dataframes}
    
    print(f"\nüîç SEARCHING COLUMNS: '{pattern}'")
    print("=" * 60)
    
    found = False
    for df_name, df in df_dict.items():
        matching_cols = [col for col in df.columns if pattern.lower() in col.lower()]
        if matching_cols:
            found = True
            print(f"\nüìã {df_name}:")
            for col in matching_cols:
                print(f"   ‚Ä¢ {col}")
    
    if not found:
        print(f"‚ùå No columns found matching '{pattern}'")

def df_info():
    """Show information about all available DataFrames"""
    print(f"\nüìä ALL DATAFRAMES INFO")
    print("=" * 70)
    
    categories = [
        ("üóÇÔ∏è AUXILIARY", auxiliary_dataframes),
        ("üìÅ RAW", raw_dataframes),
        ("‚öôÔ∏è MEDIUM", medium_dataframes),
        ("üíé RARE", rare_dataframes),
        ("üìä FORMATTED", formatted_dataframes)
    ]
    
    for category_name, df_dict in categories:
        if df_dict:
            print(f"\n{category_name}:")
            for name, df in df_dict.items():
                memory_mb = df.memory_usage(deep=True).sum() / (1024 * 1024)
                print(f"   üìã {name:<25} ‚Üí {df.shape[0]:>6} rows √ó {df.shape[1]:>3} cols ({memory_mb:.1f} MB)")

# =============================================================================
# 9. INSTRUCTIONS AND EXAMPLES
# =============================================================================

print(f"\nüõ†Ô∏è UTILITY FUNCTIONS AVAILABLE:")
print("=" * 70)
print("üîç explore_df(dataframe, 'name')              ‚Üí Detailed DataFrame exploration")
print("üîÑ compare_dfs(df1, df2, names=['A', 'B'])    ‚Üí Compare multiple DataFrames")  
print("üìñ show_sample_data(df_dict, 'category', 5)   ‚Üí Show sample data from category")
print("üîç search_columns('pattern')                  ‚Üí Find columns matching pattern")
print("üìä df_info()                                  ‚Üí Show all DataFrames info")

print(f"\nüí° EXAMPLE USAGE:")
print("=" * 70)
print("# Explore specific DataFrames")
print("explore_df(valid_emp, 'Valid Employees')")
print("explore_df(df_colaborador, 'Employee Details')")
print("")
print("# Compare DataFrames")
print("compare_dfs(df_colaborador, matrizA_bk, names=['Raw', 'Processed'])")
print("")
print("# Show sample data")
print("show_sample_data(raw_dataframes, 'Raw Data', 3)")
print("")
print("# Search for specific columns")
print("search_columns('matricula')")
print("search_columns('data')")
print("")
print("# Access DataFrames directly")
print("valid_emp.head()")
print("df_colaborador.describe()")
print("matrizA_bk.columns")

print(f"\nüéØ DIRECT ACCESS TO PROJECT DATA:")
print("=" * 70)
print("üìä data_model.auxiliary_data    ‚Üí Dictionary with auxiliary data")
print("üìÅ data_model.raw_data          ‚Üí Dictionary with raw DataFrames")  
print("‚öôÔ∏è data_model.medium_data       ‚Üí Dictionary with transformed DataFrames")
print("üíé data_model.rare_data         ‚Üí Dictionary with algorithm results")
print("üìã data_model.formatted_data    ‚Üí Dictionary with final formatted data")
print("")
print("üìä auxiliary_dataframes         ‚Üí Easy access to auxiliary DataFrames")
print("üìÅ raw_dataframes              ‚Üí Easy access to raw DataFrames")
print("‚öôÔ∏è medium_dataframes           ‚Üí Easy access to medium DataFrames")

print(f"\n‚ú® READY FOR INTERACTIVE DEVELOPMENT!")
print("üîß All project DataFrames are loaded and available in memory")
print("üìù Use the utility functions above to explore and analyze the data")
print("üöÄ Start developing your data transformations!")