In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer

import lightgbm as lgb

import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances, plot_slice, plot_contour

from time import time
from kaggle.api.kaggle_api_extended import KaggleApi
import pickle

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
base_path = 'C:/Users/Federico/Desktop/Maestria Data mining/DM EyF/'
dataset_path = base_path + 'datasets/'
modelos_path = base_path + 'modelos/'
db_path = base_path + 'db/'
dataset_file = 'competencia_01_DQ.csv'


ganancia_acierto = 273000
costo_estimulo = 7000

mes_train = 202104
mes_test = 202106

# agregue sus semillas
semillas = [540079, 250829, 314299, 302111, 801007]

data = pd.read_csv(dataset_path + dataset_file)

  data = pd.read_csv(dataset_path + dataset_file)


In [3]:
df=data.copy()
df.shape

(981946, 98)

In [4]:
df['clase_peso'] = 1.0
df['clase_binaria'] = 0
df['clase_binaria'] = np.where(df['clase_ternaria'] == 'CONTINUA', 0, 1)

df.loc[df['clase_binaria'] == 1, 'clase_peso'] = 1.0001

In [5]:
df[df["foto_mes"]==202106]

Unnamed: 0,mpayroll,cpayroll_trx,mcuentas_saldo,mrentabilidad_annual,mactivos_margen,mtarjeta_visa_consumo,ctrx_quarter,mpasivos_margen,mrentabilidad,Visa_mpagominimo,...,cprestamos_prendarios,ctarjeta_visa,Master_msaldodolares,mttarjeta_master_debitos_automaticos,Master_mconsumototal,tmobile_app,foto_mes,clase_ternaria,clase_peso,clase_binaria
817070,117300.00,1,20016.94,20332.88,-2491.60,20708.29,199,1029.14,399.88,1313.76,...,0,1,0.00,9328.57,53339.61,0,202106,,1.0001,1
817071,0.00,0,130909.79,23227.25,-2810.24,113842.54,191,2103.70,4708.37,0.00,...,0,1,450.42,0.00,36861.75,0,202106,,1.0001,1
817072,235650.91,2,111111.85,38207.62,1180.59,14491.15,172,1376.77,3534.50,18885.30,...,0,1,,0.00,,0,202106,,1.0001,1
817073,0.00,0,5.05,1445.30,-678.84,21741.23,20,204.32,-138.82,1818.15,...,0,1,0.00,0.00,,0,202106,,1.0001,1
817074,53958.00,2,58408.06,36468.62,-659.77,50919.03,29,721.51,556.99,4058.58,...,0,1,0.00,0.00,,0,202106,,1.0001,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
981941,23460.00,1,0.00,0.00,0.00,0.00,2,0.00,0.00,0.00,...,0,1,0.00,0.00,,0,202106,,1.0001,1
981942,0.00,1,91576.57,95.20,0.00,0.00,5,81.16,95.20,0.00,...,0,1,0.00,0.00,,0,202106,,1.0001,1
981943,0.00,1,71850.94,72.08,0.00,0.00,2,61.45,72.08,0.00,...,0,1,0.00,0.00,,0,202106,,1.0001,1
981944,116570.62,2,0.00,0.00,0.00,0.00,2,0.00,0.00,0.00,...,0,1,0.00,0.00,,0,202106,,1.0001,1


In [7]:
## funciones generales
def lgb_gan_eval(y_pred, data):
    weight = data.get_weight()
    ganancia = np.where(weight == 1.0001, ganancia_acierto, 0) - np.where(weight < 1.0001, costo_estimulo, 0)
    ganancia = ganancia[np.argsort(y_pred)[::-1]]
    ganancia = np.cumsum(ganancia)

    return 'gan_eval', np.max(ganancia) , True

In [8]:
train_data = df[df['foto_mes'] == mes_train]
test_data = df[df['foto_mes'] == mes_test]

X_train = train_data.drop(['clase_ternaria', 'clase_peso', 'clase_binaria'], axis=1)
y_train_binaria = train_data['clase_binaria']
w_train = train_data['clase_peso']

X_test = test_data.drop(['clase_ternaria', 'clase_peso', 'clase_binaria'], axis=1)


In [9]:
X_train.shape

(164090, 97)

In [None]:
#optimizacion

In [10]:
def objective(trial):

    num_leaves = trial.suggest_int('num_leaves', 20, 500)
    learning_rate = trial.suggest_float('learning_rate', 0.05, 0.3) # mas bajo, más iteraciones necesita
    min_data_in_leaf = trial.suggest_int('min_data_in_leaf', 1000, 6000)
    feature_fraction = trial.suggest_float('feature_fraction', 0.3, 1.0)
    bagging_fraction = trial.suggest_float('bagging_fraction', 0.3, 1.0)

    params = {
        'objective': 'binary',
        'metric': 'custom',
        'boosting_type': 'gbdt',
        'first_metric_only': True,
        'boost_from_average': True,
        'feature_pre_filter': False,
        'max_bin': 31,
        'num_leaves': num_leaves,
        'learning_rate': learning_rate,
        'min_data_in_leaf': min_data_in_leaf,
        'feature_fraction': feature_fraction,
        'bagging_fraction': bagging_fraction,
        'seed': 1,
        'early_stopping_rounds': int(50 + 5 / learning_rate),
        'verbose': -1
    }

    train_data = lgb.Dataset(X_train,
                              label=y_train_binaria, # todos los baja como 1
                              weight=w_train)
    
    cv_results = lgb.cv(
        params,
        train_data,
        num_boost_round=10000000, # modificar, subit y subir
        feval=lgb_gan_eval,
        stratified=True,
        nfold=5,
        seed=semillas[0]
    )
    
    max_gan = max(cv_results['valid gan_eval-mean'])
    best_iter = cv_results['valid gan_eval-mean'].index(max_gan) + 1

    # Guardamos cual es la mejor iteración del modelo
    trial.set_user_attr("best_iter", best_iter)

    return max_gan * 5


storage_name = "sqlite:///" + db_path + "optimizacion_LGBM_competencia_01.db"
study_name = "modelo_22"

study_1= optuna.create_study(
    direction="maximize",
    study_name=study_name,
    storage=storage_name,
    load_if_exists=True,
)

[I 2024-10-09 08:44:50,244] Using an existing study with name 'modelo_22' instead of creating a new one.


In [None]:
study_1.optimize(objective, n_trials=1000) # subir subir

[I 2024-10-09 00:11:43,165] Trial 0 finished with value: 354060000.0 and parameters: {'num_leaves': 271, 'learning_rate': 0.18028060572150412, 'min_data_in_leaf': 2400, 'feature_fraction': 0.32381863985618703, 'bagging_fraction': 0.32834533711765923}. Best is trial 0 with value: 354060000.0.
[I 2024-10-09 00:12:06,191] Trial 1 finished with value: 354984000.0 and parameters: {'num_leaves': 448, 'learning_rate': 0.11256586733885775, 'min_data_in_leaf': 3066, 'feature_fraction': 0.6333011182891084, 'bagging_fraction': 0.960544083713275}. Best is trial 1 with value: 354984000.0.
[I 2024-10-09 00:12:18,797] Trial 2 finished with value: 348663000.0 and parameters: {'num_leaves': 236, 'learning_rate': 0.2561665772845419, 'min_data_in_leaf': 4657, 'feature_fraction': 0.7910231773158027, 'bagging_fraction': 0.7208364139828585}. Best is trial 1 with value: 354984000.0.
[I 2024-10-09 00:12:52,897] Trial 3 finished with value: 357882000.0 and parameters: {'num_leaves': 191, 'learning_rate': 0.073

In [11]:
optuna.visualization.plot_optimization_history(study_1)

In [12]:
plot_param_importances(study_1)

In [13]:
plot_slice(study_1)

In [19]:
storage_name = "sqlite:///" + db_path + "optimizacion_LGBM_competencia_01.db"
study_name = "modelo_11"

study_1= optuna.create_study(
    direction="maximize",
    study_name=study_name,
    storage=storage_name,
    load_if_exists=True,
)

[I 2024-10-09 08:49:25,075] Using an existing study with name 'modelo_11' instead of creating a new one.


In [20]:
def entrenar_modelos_con_varias_semillas(best_params, X_train, y_train_binaria, w_train, best_iter, semillas):
    """
    Entrena modelos de LightGBM utilizando los mejores parámetros y una lista de semillas.
    
    Parámetros:
    - best_params: dict, contiene los mejores parámetros del estudio de optimización.
    - X_train: dataframe, las características de entrenamiento.
    - y_train_binaria: array-like, la variable objetivo binaria.
    - w_train: array-like, los pesos para las observaciones de entrenamiento.
    - best_iter: int, la mejor cantidad de árboles según el estudio de optimización.
    - semillas: lista de int, las semillas para entrenar cada modelo.
    
    Retorna:
    - modelos_entrenados: diccionario de modelos de LightGBM entrenados, con nombres basados en la semilla.
    """
    modelos_entrenados = {}

    # Crear el dataset de entrenamiento
    train_data = lgb.Dataset(X_train, label=y_train_binaria, weight=w_train)

    # Entrenar un modelo por cada semilla
    for seed in semillas:
        # Actualizar el parámetro de la semilla
        params = best_params.copy()
        params['seed'] = seed
        
        # Entrenar el modelo
        model = lgb.train(params, train_data, num_boost_round=best_iter)
        
        # Asignar el nombre del modelo como "modelo_semilla"
        nombre_modelo = f"modelo_{seed}"
        modelos_entrenados[nombre_modelo] = model
        
        print(f"Modelo entrenado con semilla {seed} guardado como {nombre_modelo}")
    
    return modelos_entrenados

In [18]:
## muchas semillas
def es_primo(n):
    if n < 2:
        return False
    for i in range(2, int(n**0.5) + 1):
        if n % i == 0:
            return False
    return True

# Lista para almacenar los números primos
semillas = [540079, 250829, 314299, 302111, 801007]
num = 1000  # Comenzar desde el primer número primo

# Continuar buscando hasta obtener 200 números primos
while len(semillas) < 200:
    if es_primo(num):
        semillas.append(num)
    num += 1

semillas

[540079,
 250829,
 314299,
 302111,
 801007,
 1009,
 1013,
 1019,
 1021,
 1031,
 1033,
 1039,
 1049,
 1051,
 1061,
 1063,
 1069,
 1087,
 1091,
 1093,
 1097,
 1103,
 1109,
 1117,
 1123,
 1129,
 1151,
 1153,
 1163,
 1171,
 1181,
 1187,
 1193,
 1201,
 1213,
 1217,
 1223,
 1229,
 1231,
 1237,
 1249,
 1259,
 1277,
 1279,
 1283,
 1289,
 1291,
 1297,
 1301,
 1303,
 1307,
 1319,
 1321,
 1327,
 1361,
 1367,
 1373,
 1381,
 1399,
 1409,
 1423,
 1427,
 1429,
 1433,
 1439,
 1447,
 1451,
 1453,
 1459,
 1471,
 1481,
 1483,
 1487,
 1489,
 1493,
 1499,
 1511,
 1523,
 1531,
 1543,
 1549,
 1553,
 1559,
 1567,
 1571,
 1579,
 1583,
 1597,
 1601,
 1607,
 1609,
 1613,
 1619,
 1621,
 1627,
 1637,
 1657,
 1663,
 1667,
 1669,
 1693,
 1697,
 1699,
 1709,
 1721,
 1723,
 1733,
 1741,
 1747,
 1753,
 1759,
 1777,
 1783,
 1787,
 1789,
 1801,
 1811,
 1823,
 1831,
 1847,
 1861,
 1867,
 1871,
 1873,
 1877,
 1879,
 1889,
 1901,
 1907,
 1913,
 1931,
 1933,
 1949,
 1951,
 1973,
 1979,
 1987,
 1993,
 1997,
 1999,
 2003,
 20

In [23]:
study=study_1
best_iter=study_1.best_trial.user_attrs["best_iter"]

# Definir los mejores parámetros del estudio
best_params = {
    'objective': 'binary',
    'boosting_type': 'gbdt',
    'first_metric_only': True,
    'boost_from_average': True,
    'feature_pre_filter': False,
    'max_bin': 31,
    'num_leaves': study.best_trial.params['num_leaves'],
    'learning_rate': study.best_trial.params['learning_rate'],
    'min_data_in_leaf': study.best_trial.params['min_data_in_leaf'],
    'feature_fraction': study.best_trial.params['feature_fraction'],
    'bagging_fraction': study.best_trial.params['bagging_fraction'],
    'verbose': 0
}


# Entrenar los modelos
modelos_entrenado = entrenar_modelos_con_varias_semillas(best_params, X_train, y_train_binaria, w_train, best_iter, semillas)

Modelo entrenado con semilla 540079 guardado como modelo_540079
Modelo entrenado con semilla 250829 guardado como modelo_250829
Modelo entrenado con semilla 314299 guardado como modelo_314299
Modelo entrenado con semilla 302111 guardado como modelo_302111
Modelo entrenado con semilla 801007 guardado como modelo_801007
Modelo entrenado con semilla 1009 guardado como modelo_1009
Modelo entrenado con semilla 1013 guardado como modelo_1013
Modelo entrenado con semilla 1019 guardado como modelo_1019
Modelo entrenado con semilla 1021 guardado como modelo_1021
Modelo entrenado con semilla 1031 guardado como modelo_1031
Modelo entrenado con semilla 1033 guardado como modelo_1033
Modelo entrenado con semilla 1039 guardado como modelo_1039
Modelo entrenado con semilla 1049 guardado como modelo_1049
Modelo entrenado con semilla 1051 guardado como modelo_1051
Modelo entrenado con semilla 1061 guardado como modelo_1061
Modelo entrenado con semilla 1063 guardado como modelo_1063
Modelo entrenado con

In [24]:
modelos_entrenado

{'modelo_540079': <lightgbm.basic.Booster at 0x259d80ecd70>,
 'modelo_250829': <lightgbm.basic.Booster at 0x259d7c96030>,
 'modelo_314299': <lightgbm.basic.Booster at 0x259d85834a0>,
 'modelo_302111': <lightgbm.basic.Booster at 0x259d7b85c40>,
 'modelo_801007': <lightgbm.basic.Booster at 0x25a3481ade0>,
 'modelo_1009': <lightgbm.basic.Booster at 0x259d7d68170>,
 'modelo_1013': <lightgbm.basic.Booster at 0x259d8581d00>,
 'modelo_1019': <lightgbm.basic.Booster at 0x259d85820f0>,
 'modelo_1021': <lightgbm.basic.Booster at 0x25a34a6b440>,
 'modelo_1031': <lightgbm.basic.Booster at 0x25a34a6b920>,
 'modelo_1033': <lightgbm.basic.Booster at 0x25a34a6aea0>,
 'modelo_1039': <lightgbm.basic.Booster at 0x25a34ab2b40>,
 'modelo_1049': <lightgbm.basic.Booster at 0x259d8582960>,
 'modelo_1051': <lightgbm.basic.Booster at 0x259d7d681a0>,
 'modelo_1061': <lightgbm.basic.Booster at 0x259d85827e0>,
 'modelo_1063': <lightgbm.basic.Booster at 0x25a34a6a780>,
 'modelo_1069': <lightgbm.basic.Booster at 0x2

In [22]:
# def predecir_y_ordenar_modelos(modelos_entrenados, X_test):
#     """
#     Realiza predicciones con modelos entrenados de LightGBM, agrega la probabilidad de ser "BAJA" 
#     y ordena los clientes de forma descendente por probabilidad.
    
#     Parámetros:
#     - modelos_entrenados: dict, contiene los modelos entrenados con las semillas como nombres.
#     - X_test: dataframe, conjunto de test.
    
#     Retorna:
#     - resultados_predicciones: diccionario con los resultados predichos y ordenados para cada modelo entrenado.
#     """
#     resultados_predicciones = {}

#     # Asegurarse de que X_test tenga las mismas columnas que los datos de entrenamiento
#     columnas_modelo = modelos_entrenados[list(modelos_entrenados.keys())[0]].feature_name()
#     X_test = X_test[columnas_modelo]

#     # Asegurarse de que 'numero_de_cliente' esté en X_test
#     if 'numero_de_cliente' not in X_test.columns:
#         raise ValueError("La columna 'numero_de_cliente' debe estar presente en X_test")

#     # Para cada modelo entrenado, hacemos predicciones y ordenamos los resultados
#     for nombre_modelo, model in modelos_entrenados.items():
#         try:
#             # Hacer predicciones sobre el conjunto de test
#             predicciones = model.predict(X_test)
            
#             # Crear un nuevo DataFrame solo con 'numero_de_cliente' y 'Probabilidad'
#             resultados = pd.DataFrame({
#                 'numero_de_cliente': X_test['numero_de_cliente'],
#                 'Probabilidad': predicciones
#             })
            
#             # Ordenar a los clientes por la probabilidad de ser "BAJA"
#             resultados_ordenados = resultados.sort_values(by='Probabilidad', ascending=False)
            
#             # Guardar los resultados ordenados
#             resultados_predicciones[nombre_modelo] = resultados_ordenados
            
#             print(f"Predicciones realizadas y ordenadas para {nombre_modelo}")
        
#         except Exception as e:
#             print(f"Error al procesar {nombre_modelo}: {str(e)}")
#             continue
    
#     return resultados_predicciones

In [25]:
def predecir_y_ordenar_modelos2(modelos_entrenados, X_test):
    """
    Realiza predicciones con modelos entrenados de LightGBM y crea un DataFrame final
    con la probabilidad más alta obtenida por cada cliente en cualquiera de los modelos.
    
    Parámetros:
    - modelos_entrenados: dict, contiene los modelos entrenados con las semillas como nombres.
    - X_test: dataframe, conjunto de test.
    
    Retorna:
    - df_final: DataFrame con 'numero_de_cliente' y la 'Probabilidad' más alta para cada cliente.
    """
    # Asegurarse de que X_test tenga las mismas columnas que los datos de entrenamiento
    columnas_modelo = modelos_entrenados[list(modelos_entrenados.keys())[0]].feature_name()
    X_test = X_test[columnas_modelo]

    # Asegurarse de que 'numero_de_cliente' esté en X_test
    if 'numero_de_cliente' not in X_test.columns:
        raise ValueError("La columna 'numero_de_cliente' debe estar presente en X_test")

    # Crear un DataFrame para almacenar todas las predicciones
    todas_predicciones = pd.DataFrame({'numero_de_cliente': X_test['numero_de_cliente']})

    # Para cada modelo entrenado, hacemos predicciones
    for nombre_modelo, model in modelos_entrenados.items():
        try:
            # Hacer predicciones sobre el conjunto de test
            predicciones = model.predict(X_test)
            
            # Agregar las predicciones al DataFrame
            todas_predicciones[nombre_modelo] = predicciones
            
            print(f"Predicciones realizadas para {nombre_modelo}")
        
        except Exception as e:
            print(f"Error al procesar {nombre_modelo}: {str(e)}")
            continue
    
    # Calcular la probabilidad máxima para cada cliente
    todas_predicciones['Probabilidad'] = todas_predicciones.iloc[:, 1:].max(axis=1)
    
    # Crear el DataFrame final con 'numero_de_cliente' y la 'Probabilidad' más alta
    df_final = todas_predicciones[['numero_de_cliente', 'Probabilidad']]
    
    # Ordenar el DataFrame final por 'Probabilidad' de forma descendente
    df_final = df_final.sort_values(by='Probabilidad', ascending=False)

    return df_final

In [37]:
# predicciones_1=predecir_y_ordenar_modelos(modelos_entrenado, X_test)

Predicciones realizadas y ordenadas para modelo_540079
Predicciones realizadas y ordenadas para modelo_250829
Predicciones realizadas y ordenadas para modelo_314299
Predicciones realizadas y ordenadas para modelo_302111
Predicciones realizadas y ordenadas para modelo_801007


In [26]:
df_final=predecir_y_ordenar_modelos2(modelos_entrenado, X_test)

Predicciones realizadas para modelo_540079
Predicciones realizadas para modelo_250829
Predicciones realizadas para modelo_314299
Predicciones realizadas para modelo_302111
Predicciones realizadas para modelo_801007
Predicciones realizadas para modelo_1009
Predicciones realizadas para modelo_1013
Predicciones realizadas para modelo_1019
Predicciones realizadas para modelo_1021
Predicciones realizadas para modelo_1031
Predicciones realizadas para modelo_1033
Predicciones realizadas para modelo_1039
Predicciones realizadas para modelo_1049
Predicciones realizadas para modelo_1051
Predicciones realizadas para modelo_1061
Predicciones realizadas para modelo_1063
Predicciones realizadas para modelo_1069
Predicciones realizadas para modelo_1087
Predicciones realizadas para modelo_1091
Predicciones realizadas para modelo_1093
Predicciones realizadas para modelo_1097
Predicciones realizadas para modelo_1103
Predicciones realizadas para modelo_1109
Predicciones realizadas para modelo_1117
Predic


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1669



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1693



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1697



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1699



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1709



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1721



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1723



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1733



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1741



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1747



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1753



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1759



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1777



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1783



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1787



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1789



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1801



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1811



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1823



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1831



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1847



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1861



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1867



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1871



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1873



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1877



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1879



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1889



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1901



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1907



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1913



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1931



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1933



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1949



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1951



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1973



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1979



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1987



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1993



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1997



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_1999



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2003



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2011



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2017



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2027



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2029



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2039



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2053



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2063



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2069



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2081



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2083



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2087



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2089



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2099



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2111



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2113



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2129



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2131



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2137



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2141



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2143



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2153



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2161



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2179



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2203



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2207



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2213



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2221



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2237



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2239



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2243



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2251



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2267



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2269



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2273



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2281



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2287



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2293



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2297



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2309



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2311



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2333



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2339



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2341



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2347



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2351



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2357



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2371



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2377



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2381



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2383



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2389



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2393



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2399



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2411



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2417



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2423



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2437



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2441



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



Predicciones realizadas para modelo_2447



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`



In [39]:
df_modelo_540079 = pd.DataFrame(predicciones_1['modelo_540079'])
df_modelo_250829 = pd.DataFrame(predicciones_1['modelo_250829'])
df_modelo_314299 = pd.DataFrame(predicciones_1['modelo_314299'])
df_modelo_302111 = pd.DataFrame(predicciones_1['modelo_302111'])
df_modelo_801007 = pd.DataFrame(predicciones_1['modelo_801007'])

In [51]:
df_modelo_302111

Unnamed: 0,numero_de_cliente,Probabilidad
910079,812656770,0.890022
978032,1462097870,0.878702
856107,511863915,0.860759
977205,1455714205,0.847359
828791,303620250,0.845073
...,...,...
861297,554981460,0.000040
917847,857932152,0.000040
845849,464825086,0.000039
901635,752639620,0.000031


In [27]:
df_final

Unnamed: 0,numero_de_cliente,Probabilidad
910079,812656770,0.903539
856107,511863915,0.899408
978032,1462097870,0.890252
873626,606371992,0.887768
942950,1021275210,0.867305
...,...,...
836384,339394033,0.000105
861297,554981460,0.000104
888399,674618747,0.000104
851594,490871275,0.000090


In [28]:
import os 
# Definir las rutas
base_path = 'C:/Users/Federico/Desktop/Maestria Data mining/DM EyF/'
dataset_path = os.path.join(base_path, 'datasets/')


# Definir el nombre del archivo
file_name = 'ensamble_shpa_200_semillas.csv'
file_path = os.path.join(dataset_path, file_name)

# Guardar el dataframe (df es tu dataframe)
df_final.to_csv(file_path, index=False)

In [52]:
api = KaggleApi()
api.authenticate()

In [53]:
# #L. Predecimos Junio.
# #i. Predecimos propiamente dicho.
# predicciones = model_lgb.predict(X_test_m1)
# #ii. Le pegamos la probabilidad de ser "BAJA" a cada cliente.
# X_test_m1['Probabilidad'] = predicciones
# #iii. Ordenamos a los clientes por probabilidad de ser "BAJA" de forma descendente.
# tb_entrega = X_test_m1.sort_values(by='Probabilidad', ascending=False)
#iv. Genero una lista de distintos cortes candidatos, para enviar a Kaggle.
tb_entrega=df_final
cortes = range(11000,12000,100)
#v. Generamos las distintas predicciones de clases a partir de los distintos cortes posibles.
num_subida_kaggle = 1
for envios in cortes:
    #1. Le ponemos clase 1 ("BAJA") a los primeros "envios" con mayor probabilidad.
    tb_entrega['Predicted'] = 0
    tb_entrega.iloc[:envios, tb_entrega.columns.get_loc('Predicted')] = 1
    resultados = tb_entrega[["numero_de_cliente", 'Predicted']].reset_index(drop=True)
    
    print("Cantidad de clientes {}".format(envios))
    #2. Guardamos el archivo para Kaggle.
    nombre_archivo = "Primer_ensable_menos_rango{}.csv".format(num_subida_kaggle)
    ruta_archivo= "../../../exp/{}".format(nombre_archivo)
    resultados.to_csv(ruta_archivo, index=False)
    
    num_subida_kaggle += 1
    
    #3. Envío a Kaggle.
    #a. Defino los parámetros claves.
    mensaje = f'Archivo {nombre_archivo}.Punto_corte: {envios}.'
    competencia = 'dm-ey-f-2024-primera'
    #c. Subo la Submission.
    while True:
        try:
            api.competition_submit(file_name=ruta_archivo, message=mensaje, competition=competencia)
            print("Submission successful!")
            break
        except ApiException as e:
            print(f"Error: {e}")  # Imprime la excepción completa para ver qué atributos tiene
            if e.status == 429:  # Reemplaza esto si `status` no es correcto
                print("Rate limit exceeded. Retrying after 30 seconds...")
                time.sleep(30)
            else:
                raise e  # Re-raise other exceptions

Cantidad de clientes 11000


100%|██████████| 2.08M/2.08M [00:01<00:00, 1.32MB/s]


Submission successful!
Cantidad de clientes 11100


100%|██████████| 2.08M/2.08M [00:01<00:00, 1.34MB/s]


Submission successful!
Cantidad de clientes 11200


100%|██████████| 2.08M/2.08M [00:01<00:00, 1.30MB/s]


Submission successful!
Cantidad de clientes 11300


100%|██████████| 2.08M/2.08M [00:01<00:00, 1.32MB/s]


Submission successful!
Cantidad de clientes 11400


100%|██████████| 2.08M/2.08M [00:01<00:00, 1.33MB/s]


Submission successful!
Cantidad de clientes 11500


100%|██████████| 2.08M/2.08M [00:01<00:00, 1.29MB/s]


Submission successful!
Cantidad de clientes 11600


100%|██████████| 2.08M/2.08M [00:01<00:00, 1.33MB/s]


Submission successful!
Cantidad de clientes 11700


100%|██████████| 2.08M/2.08M [00:01<00:00, 1.28MB/s]


Submission successful!
Cantidad de clientes 11800


100%|██████████| 2.08M/2.08M [00:01<00:00, 1.38MB/s]


Submission successful!
Cantidad de clientes 11900


100%|██████████| 2.08M/2.08M [00:01<00:00, 1.31MB/s]


Submission successful!
