In [19]:
# Importar librerías
import numpy as np
import pandas as pd

from IPython.display import display
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow import keras
from pathlib import Path

In [20]:
# Cargar y unir datos
df_train = pd.read_csv('./src/titanic/train.csv')
df_test = pd.read_csv('./src/titanic/test.csv')
df_survived = pd.read_csv('./src/titanic/gender_submission.csv')

# Unir test con survived y concatenar con train
df_test_complete = pd.merge(df_test, df_survived, on='PassengerId', how='left')
df_full = pd.concat([df_train, df_test_complete], axis=0, ignore_index=True)

print(f"Dataset cargado: {df_full.shape[0]} filas")

Dataset cargado: 1309 filas


In [21]:
# Seleccionar columnas relevantes y limpiar datos
# Usando pandas: crear una copia para evitar SettingWithCopyWarning
cols = ['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']
df_clean = df_full[cols].copy()

# Rellenar nulos con mediana (evitar usar inplace sobre vistas)
df_clean['Age'] = df_clean['Age'].fillna(df_clean['Age'].median())
df_clean['Fare'] = df_clean['Fare'].fillna(df_clean['Fare'].median())

# Eliminar filas sin etiqueta de Survived (por ejemplo en datos de test sin etiqueta)
df_clean = df_clean.dropna(subset=['Survived'])

# Convertir Sex a numérico (male -> 0, female -> 1)
df_clean['Sex'] = df_clean['Sex'].map({'male': 0, 'female': 1}).astype(int)

print(f"Dataset limpio: {df_clean.shape[0]} filas")
display(df_clean.head())

Dataset limpio: 1309 filas


Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare
0,0,3,0,22.0,1,0,7.25
1,1,1,1,38.0,1,0,71.2833
2,1,3,1,26.0,0,0,7.925
3,1,1,1,35.0,1,0,53.1
4,0,3,0,35.0,0,0,8.05


In [22]:
# Preparar datos para el modelo
feature_columns = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']
X = df_clean[feature_columns].to_numpy()
y = df_clean['Survived'].to_numpy().ravel()

# Dividir datos (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print(f"Train: {X_train.shape[0]}")
print(f"Test: {X_test.shape[0]}")

Train: 1047
Test: 262


In [None]:
# Entrenar modelo Keras
model = keras.Sequential([
    keras.layers.Dense(16, activation='relu', input_shape=(6,)), # 6 features de entrada
    keras.layers.Dropout(0.2), # evitar overfitting 
    keras.layers.Dense(8, activation='relu'), # otra capa oculta 
    keras.layers.Dropout(0.2), # evitar overfitting
    keras.layers.Dense(1, activation='sigmoid') # capa de salida 
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Normalizar datos usando min-max normalization integrada
X_train_min = X_train.min(axis=0)
X_train_max = X_train.max(axis=0)
X_train_normalized = (X_train - X_train_min) / (X_train_max - X_train_min)
X_test_normalized = (X_test - X_train_min) / (X_train_max - X_train_min)

# Entrenar
history = model.fit(
    X_train_normalized,
    y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2
)

# Evaluar
y_pred_prob = model.predict(X_test_normalized, verbose=0)
y_pred = (y_pred_prob > 0.5).astype(int).ravel()
accuracy = accuracy_score(y_test, y_pred)

print(f"\nPRECISIÓN DEL MODELO: {accuracy*100:.2f}%")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [24]:
# Función de predicción
def predecir(pclass: int, sex: str, age: int, sibsp: int, parch: int, fare: float):
    """Predice supervivencia. sex: 'male' o 'female'"""
    sex_num = 0 if sex == 'male' else 1
    input_data = np.array([[pclass, sex_num, age, sibsp, parch, fare]])
    
    # Normalizar con los mismos parámetros del entrenamiento
    input_normalized = (input_data - X_train_min) / (X_train_max - X_train_min)
    
    return model.predict(input_normalized, verbose=0)[0][0]
# Ejemplos
print(f"Hombre, 30 años, clase 3 --> Probabilidad de sobrevivir: {predecir(3, 'male', 30, 0,0,8.0)*100:.2f}%")
print(f"Mujer, 25 años, clase 1 --> Probabilidad de sobrevivir: {predecir(1, 'female', 25, 1, 0, 80.0)*100:.2f}%")
print(f"Niño, 5 años, clase 2 --> Probabilidad de sobrevivir: {predecir(2, 'male', 5, 1, 2, 25.0)*100:.2f}%")

Hombre, 30 años, clase 3 --> Probabilidad de sobrevivir: 8.51%
Mujer, 25 años, clase 1 --> Probabilidad de sobrevivir: 97.69%
Niño, 5 años, clase 2 --> Probabilidad de sobrevivir: 23.65%


## Exportar modelo

In [25]:
# Preparar directorio para exportar el modelo
model_dir = Path('../client/public/model')
model_dir.mkdir(parents=True, exist_ok=True)


# Exportar el modelo en formato h5
model_path = model_dir / 'titanic_model.h5'
model.save(model_path)
print(f"Modelo exportado en: {model_path.resolve()}")

Modelo exportado en: C:\Users\willi\Documentos\Projects\ML_Titanic\client\public\model\titanic_model.h5


  saving_api.save_model(
