## Importar

In [1]:
import numpy as np
import pandas as pd
import os 
import sys
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

from sklearn.model_selection import GridSearchCV
import optuna
import optuna.visualization as vis
from catboost import CatBoostClassifier

  from .autonotebook import tqdm as notebook_tqdm


## Leer datos

In [2]:
# Asegurar que el directorio del proyecto esté en sys.path
project_root = os.getcwd()
if project_root not in sys.path:
    sys.path.append(project_root)

In [3]:
# Importar el diccionario desde Dict/dict_df.py
from Dict.dict_df import dict_df

In [4]:
# Definir la ruta del archivo CSV
csv_path_df = os.path.join('Data', '.ipynb_checkpoints', 'df.csv')

# Leer el CSV usando el diccionario de tipos
df = pd.read_csv(csv_path_df, dtype=dict_df, parse_dates=['Begin_Date'])

In [5]:
df

Unnamed: 0,Customer_ID,Begin_Date,End_Date,Type,Paperless_Billing,Payment_Method,Monthly_Charges,Total_Charges,Gender,Senior_Citizen,Partner,Dependents,Multiple_Lines,Internet_Service,Online_Security,Online_Backup,Device_Protection,Tech_Support,Streaming_TV,Streaming_Movies
0,0002-ORFBO,2019-05-01,0,one year,yes,mailed check,65.60,593.30,female,0,yes,yes,no,dsl,no,yes,no,yes,yes,no
1,0003-MKNFE,2019-05-01,0,month-to-month,no,mailed check,59.90,542.40,male,0,no,no,yes,dsl,no,no,no,no,no,yes
2,0004-TLHLJ,2019-09-01,1,month-to-month,yes,electronic check,73.90,280.85,male,0,no,no,no,fiber optic,no,no,yes,no,no,no
3,0011-IGKFF,2018-12-01,1,month-to-month,yes,electronic check,98.00,1237.85,male,1,yes,no,no,fiber optic,no,yes,yes,no,yes,yes
4,0013-MHZWF,2019-05-01,0,month-to-month,yes,credit card,69.40,571.45,female,0,no,yes,no,dsl,no,no,no,yes,yes,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4844,9978-HYCIN,2016-03-01,0,one year,yes,bank transfer,84.95,4018.05,male,1,yes,yes,no,fiber optic,no,yes,no,no,yes,no
4845,9979-RGMZT,2019-07-01,0,one year,yes,mailed check,94.05,633.45,female,0,no,no,no,fiber optic,no,yes,no,no,yes,yes
4846,9987-LUTYD,2019-01-01,0,one year,no,mailed check,55.15,742.90,female,0,no,no,no,dsl,yes,no,no,yes,no,no
4847,9992-UJOEL,2019-12-01,0,month-to-month,yes,mailed check,50.30,92.75,male,0,no,no,no,dsl,no,yes,no,no,no,no


In [6]:
def read_data(data):
    '''
    Función para leer los DataFrames guardados en la carpeta Data utilizando los scripts de Dict.
    Se asume que:
    - Los archivos .csv están en Data/.ipynb_checkpoints/
    - Los diccionarios asociados están en Dict/ con el formato dict_<nombre>.py
    '''
    # Asegurar que el directorio del proyecto esté en sys.path
    project_root = os.getcwd()
    if project_root not in sys.path:
        sys.path.append(project_root)

    try:
        # Importar dinámicamente el diccionario asociado al DataFrame
        dict_module_name = f'Dict.dict_{data}'
        dict_module = __import__(dict_module_name, fromlist=['dict_df'])
        dtype_dict = dict_module.dict_df

        # Construir la ruta al archivo CSV
        csv_path = os.path.join('Data', '.ipynb_checkpoints', f'{data}.csv')

        # Leer el DataFrame utilizando el diccionario de tipos
        dataframe = pd.read_csv(csv_path, dtype=dtype_dict)
        return dataframe
    except ModuleNotFoundError:
        raise ImportError(
            f"No se encontró el módulo asociado al DataFrame: {dict_module_name}")
    except FileNotFoundError:
        raise FileNotFoundError(f"No se encontró el archivo CSV: {csv_path}")
    except Exception as e:
        raise RuntimeError(f"Error al procesar el archivo {data}: {e}")

In [7]:
df = read_data(df)

ImportError: No se encontró el módulo asociado al DataFrame: Dict.dict_     Customer_ID Begin_Date End_Date            Type Paperless_Billing  \
0     0002-ORFBO 2019-05-01        0        one year               yes   
1     0003-MKNFE 2019-05-01        0  month-to-month                no   
2     0004-TLHLJ 2019-09-01        1  month-to-month               yes   
3     0011-IGKFF 2018-12-01        1  month-to-month               yes   
4     0013-MHZWF 2019-05-01        0  month-to-month               yes   
...          ...        ...      ...             ...               ...   
4844  9978-HYCIN 2016-03-01        0        one year               yes   
4845  9979-RGMZT 2019-07-01        0        one year               yes   
4846  9987-LUTYD 2019-01-01        0        one year                no   
4847  9992-UJOEL 2019-12-01        0  month-to-month               yes   
4848  9993-LHIEB 2014-07-01        0        two year                no   

        Payment_Method  Monthly_Charges  Total_Charges  Gender  \
0         mailed check            65.60         593.30  female   
1         mailed check            59.90         542.40    male   
2     electronic check            73.90         280.85    male   
3     electronic check            98.00        1237.85    male   
4          credit card            69.40         571.45  female   
...                ...              ...            ...     ...   
4844     bank transfer            84.95        4018.05    male   
4845      mailed check            94.05         633.45  female   
4846      mailed check            55.15         742.90  female   
4847      mailed check            50.30          92.75    male   
4848      mailed check            67.85        4627.65    male   

      Senior_Citizen Partner Dependents Multiple_Lines Internet_Service  \
0                  0     yes        yes             no              dsl   
1                  0      no         no            yes              dsl   
2                  0      no         no             no      fiber optic   
3                  1     yes         no             no      fiber optic   
4                  0      no        yes             no              dsl   
...              ...     ...        ...            ...              ...   
4844               1     yes        yes             no      fiber optic   
4845               0      no         no             no      fiber optic   
4846               0      no         no             no              dsl   
4847               0      no         no             no              dsl   
4848               0     yes        yes             no              dsl   

     Online_Security Online_Backup Device_Protection Tech_Support  \
0                 no           yes                no          yes   
1                 no            no                no           no   
2                 no            no               yes           no   
3                 no           yes               yes           no   
4                 no            no                no          yes   
...              ...           ...               ...          ...   
4844              no           yes                no           no   
4845              no           yes                no           no   
4846             yes            no                no          yes   
4847              no           yes                no           no   
4848             yes            no               yes          yes   

     Streaming_TV Streaming_Movies  
0             yes               no  
1              no              yes  
2              no               no  
3             yes              yes  
4             yes              yes  
...           ...              ...  
4844          yes               no  
4845          yes              yes  
4846           no               no  
4847           no               no  
4848           no              yes  

[4849 rows x 20 columns]

In [None]:
def read_data(data):
    '''
    Función para leer los DataFrames guardados en la carpeta Data utilizando los scripts de Dict
    '''
    # Asegurar que el directorio del proyecto esté en sys.path
    project_root = os.getcwd()
    if project_root not in sys.path:
        sys.path.append(project_root) #ok

    # Importar dinámica del diccionario desde Dict/dict_df.py
    dict_module_name = f'Dict.dict_{data}'
    dict_module = __import__(dict_module_name, fromlist=[data])
    dtype_dict = dict_module.data 
    
    from Dict.dict_{data} import dict_df  
    
    csv_path = os.path.join('Data', '.ipynb_checkpoints', f'{data}.csv') #ok
    data = pd.read_csv(csv_path, dtype=f'dict_{data}')
    
    return data

    # try:
    #     # Importar dinámicamente el diccionario asociado al DataFrame
    #     dict_module_name = f'Dict.dict_{data}'
    #     dict_module = __import__(dict_module_name, fromlist=['dict_df'])
    #     dtype_dict = dict_module.dict_df

In [None]:
features_train_ord = pd.read_csv(csv_path_fto, dtype=dict_fto)
target_train_ord = pd.read_csv(csv_path_tto, dtype=dict_tto)

In [None]:
# Agregar el directorio raíz del proyecto a sys.path
from Dict.dict_df import dict_df
project_root = os.getcwd()
if project_root not in sys.path:
    sys.path.append(project_root)

# Importar el diccionario desde Dict/dict_df.py
from Dict.dict_df import dict_df

In [None]:
def create_dict(data):
    keys=[]
    values=[]
    keys.append(data.colmuns)
    values.append(data.dtypes)
    f'dict_{data}' = {keys:values}
    return 'dict_{data}'