In [34]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import pandas as pd
import tensorflow as tf
import numpy as np
import pickle

In [35]:
# Cargar el archivo Excel
df = pd.read_csv(r'C:\Users\anton\OneDrive - Universidad de los andes\Antonia Streubel\ANDES\7. Semestre\Analítica Computacional\Proyecto 2\df_limpio.csv')
df_dummies = pd.get_dummies(df)
columnas_modelo = df_dummies.columns.tolist()

X_dummies = df[[
    'FAMI_TIENELAVADORA_Si','FAMI_TIENEAUTOMOVIL_Si','FAMI_TIENECOMPUTADOR_Si',
]]
X_categoricas = df[[
    'ESTU_COD_RESIDE_MCPIO','FAMI_PERSONASHOGAR','ESTU_COD_MCPIO_PRESENTACION','ESTU_TIPODOCUMENTO','FAMI_CUARTOSHOGAR','FAMI_EDUCACIONMADRE','FAMI_ESTRATOVIVIENDA','COLE_AREA_UBICACION_URBANO'
]]
X_categoricas_dummified = pd.get_dummies(X_categoricas, drop_first=True)

X = pd.concat([X_dummies, X_categoricas_dummified], axis=1)

In [36]:
# Prepara tus datos (reemplaza con tu df real)
y = df['ALTO_RENDIMIENTO_MATE']  

In [37]:
# Codificación de variable objetivo
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_cat = to_categorical(y_encoded)

In [38]:
# Escalamiento y split
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_cat, test_size=0.2, random_state=42)

In [39]:
#Capa de normalizacion
norm = tf.keras.layers.Normalization()
# Adaptar la capa de normalización a los datos de entrenamiento
norm.adapt(np.array(X_train))

In [40]:
# Crear modelo secuencial
model = tf.keras.Sequential([
    norm,
    tf.keras.layers.Dense(32, activation='relu', input_shape=(X_train.shape[1],)),  # Capa oculta con 64 neuronas
    tf.keras.layers.Dense(y_cat.shape[1], activation='softmax') # Capa de salida
])

# Compilar el modelo
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Guardar las columnas exactas usadas para entrenar el modelo
columnas_modelo = X.columns.tolist()
with open('columnas_modelo.pkl', 'wb') as f:
    pickle.dump(columnas_modelo, f)
    
# Entrenar el modelo
history = model.fit(X_train, y_train,
                    epochs=50,
                    batch_size=32,
                    validation_split=0.2,
                    verbose=1)

# Mostrar última accuracy de validación
val_accuracy = history.history['val_accuracy'][-1]
print(f"Val Accuracy final: {round(val_accuracy, 4)}")

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2246/2246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.7103 - loss: 0.6266 - val_accuracy: 0.7034 - val_loss: 0.6082
Epoch 2/50
[1m2246/2246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7103 - loss: 0.6019 - val_accuracy: 0.7034 - val_loss: 0.6082
Epoch 3/50
[1m2246/2246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7095 - loss: 0.6027 - val_accuracy: 0.7034 - val_loss: 0.6080
Epoch 4/50
[1m2246/2246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7132 - loss: 0.5993 - val_accuracy: 0.7034 - val_loss: 0.6083
Epoch 5/50
[1m2246/2246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7079 - loss: 0.6041 - val_accuracy: 0.7034 - val_loss: 0.6081
Epoch 6/50
[1m2246/2246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7102 - loss: 0.6020 - val_accuracy: 0.7034 - val_loss: 0.6081
Epoch 7/50
[1m2246/2246[0

In [41]:
# Guarda el modelo
model.save('modelo2.keras')