# Modelo Árbol de Decisión

In [None]:
# === Importación de librerías ===
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder

# === Carga de datos ===
df_train = pd.read_csv("./data/train.csv")
df_test = pd.read_csv("./data/test.csv")

# === Imputación de valores nulos ===
for df in [df_train, df_test]:
    for col in df.columns:
        if df[col].isnull().sum() > 0:
            if df[col].dtype in ['float64', 'int64']:
                df[col].fillna(df[col].mean(), inplace=True)
            else:
                df[col].fillna(df[col].mode()[0], inplace=True)

# === Codificación robusta solo para columnas comunes y categóricas ===
cat_cols = df_train.select_dtypes(include=['object', 'category']).columns
cat_cols = [col for col in cat_cols if col in df_test.columns and col != 'RENDIMIENTO_GLOBAL']
for col in cat_cols:
    le = LabelEncoder()
    df_train[col] = le.fit_transform(df_train[col])
    mapping = dict(zip(le.classes_, le.transform(le.classes_)))
    df_test[col] = df_test[col].map(mapping).fillna(-1).astype(int)

# === Variable objetivo ===
X = df_train.drop(columns=['RENDIMIENTO_GLOBAL'])
y = df_train['RENDIMIENTO_GLOBAL']
X_test = df_test.drop(columns=['ID'])

# === Entrenamiento ===
modelo = DecisionTreeClassifier(random_state=42)
modelo.fit(X, y)

# === Predicción ===
X_test_model = df_test 
y_pred = modelo.predict(X_test_model)

# === Generar archivo de submission ===
submission = pd.DataFrame({'ID': df_test['ID'], 'RENDIMIENTO_GLOBAL': y_pred})
submission.to_csv('submission.csv', index=False)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)


ValueError: The feature names should match those that were passed during fit.
Feature names seen at fit time, yet now missing:
- ID
