In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ReduceLROnPlateau

def preprocess_data_tf(df, train_df=None):
    if train_df is not None:
        age_mean = train_df['Age'].mean()
        fare_mean = train_df['Fare'].mean()
    else:
        age_mean = df['Age'].mean()
        fare_mean = df['Fare'].mean()
    
    df['Age'] = df['Age'].fillna(age_mean)
    df['Fare'] = df['Fare'].fillna(fare_mean)
    df['Fare'] = df['Fare'].clip(upper=df['Fare'].quantile(0.99))
    df['Age'] = df['Age'].clip(upper=df['Age'].quantile(0.99))
    
    df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
    df['FamilySize'] = df['SibSp'] + df['Parch']
    df['IsAlone'] = (df['FamilySize'] == 0).astype(int)
    
    df['Title'] = df['Name'].str.extract(' ([A-Za-z]+)\.', expand=False)
    df['Title'] = df['Title'].replace(['Lady', 'Countess', 'Capt', 'Col', 'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
    df['Title'] = df['Title'].replace(['Mlle', 'Ms'], 'Miss')
    df['Title'] = df['Title'].replace('Mme', 'Mrs')
    df['HasCabin'] = df['Cabin'].notnull().astype(int)
    
    df = pd.get_dummies(df, columns=['Title'], drop_first=True)
    scaler = MinMaxScaler()
    df[['Age', 'Fare']] = scaler.fit_transform(df[['Age', 'Fare']])
    
    features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'FamilySize', 'IsAlone', 'HasCabin']
    features += [col for col in df.columns if col.startswith('Title_')]
    return df[features]

def build_model(input_shape):
    model = Sequential([
        Dense(128, activation='relu', kernel_regularizer=l2(0.001), input_shape=(input_shape,)),
        Dropout(0.3),
        Dense(64, activation='relu', kernel_regularizer=l2(0.001)),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    return model

# Data Preprocessing
train = pd.read_csv('/kaggle/input/titanic/train.csv')
test = pd.read_csv('/kaggle/input/titanic/test.csv')
X = preprocess_data_tf(train)
y = train['Survived'].values
X_test = preprocess_data_tf(test, train_df=train)

# Cross-validation and training
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
final_preds = np.zeros(X_test.shape[0])

for train_idx, val_idx in skf.split(X, y):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]
    
    model = build_model(X_train.shape[1])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=50, batch_size=32, 
              validation_data=(X_val, y_val), verbose=1)
    
    # Predict on the test set for this fold
    final_preds += model.predict(X_test).flatten()

# Take the average of predictions
final_preds = (final_preds / skf.get_n_splits() > 0.5).astype(int)

submission = pd.DataFrame({
    'PassengerId': test['PassengerId'],
    'Survived': final_preds
})
submission.to_csv('submission.csv', index=False)
print('\nSubmission file created!')


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.5808 - loss: 0.7750 - val_accuracy: 0.6872 - val_loss: 0.6609
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7369 - loss: 0.6317 - val_accuracy: 0.7821 - val_loss: 0.5824
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7916 - loss: 0.5916 - val_accuracy: 0.7709 - val_loss: 0.5388
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7990 - loss: 0.5731 - val_accuracy: 0.7821 - val_loss: 0.5255
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8068 - loss: 0.5447 - val_accuracy: 0.7989 - val_loss: 0.5199
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8102 - loss: 0.5115 - val_accuracy: 0.8045 - val_loss: 0.5185
Epoch 7/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.6109 - loss: 0.7626 - val_accuracy: 0.7191 - val_loss: 0.6307
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7561 - loss: 0.6208 - val_accuracy: 0.7809 - val_loss: 0.5509
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7724 - loss: 0.5581 - val_accuracy: 0.8034 - val_loss: 0.5326
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8319 - loss: 0.5194 - val_accuracy: 0.7921 - val_loss: 0.5227
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8051 - loss: 0.5367 - val_accuracy: 0.7978 - val_loss: 0.5134
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7949 - loss: 0.5387 - val_accuracy: 0.7978 - val_loss: 0.5094
Epoch 7/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.6675 - loss: 0.7429 - val_accuracy: 0.7303 - val_loss: 0.6502
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7783 - loss: 0.6200 - val_accuracy: 0.8034 - val_loss: 0.5818
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7647 - loss: 0.5712 - val_accuracy: 0.8034 - val_loss: 0.5692
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7658 - loss: 0.5787 - val_accuracy: 0.8034 - val_loss: 0.5459
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8079 - loss: 0.5284 - val_accuracy: 0.8090 - val_loss: 0.5442
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8073 - loss: 0.5039 - val_accuracy: 0.8034 - val_loss: 0.5335
Epoch 7/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.5504 - loss: 0.7790 - val_accuracy: 0.7303 - val_loss: 0.6442
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7422 - loss: 0.6212 - val_accuracy: 0.7921 - val_loss: 0.5697
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7553 - loss: 0.5912 - val_accuracy: 0.8034 - val_loss: 0.5385
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7914 - loss: 0.5612 - val_accuracy: 0.7978 - val_loss: 0.5350
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7896 - loss: 0.5353 - val_accuracy: 0.8034 - val_loss: 0.5253
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7903 - loss: 0.5294 - val_accuracy: 0.8034 - val_loss: 0.5190
Epoch 7/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.6201 - loss: 0.7534 - val_accuracy: 0.7978 - val_loss: 0.5978
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7700 - loss: 0.6272 - val_accuracy: 0.8315 - val_loss: 0.5239
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7837 - loss: 0.5719 - val_accuracy: 0.8202 - val_loss: 0.5055
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8206 - loss: 0.5118 - val_accuracy: 0.8258 - val_loss: 0.4926
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7892 - loss: 0.5385 - val_accuracy: 0.8258 - val_loss: 0.4871
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7963 - loss: 0.5506 - val_accuracy: 0.8202 - val_loss: 0.4907
Epoch 7/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━