In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report

# Load Titanic dataset
training_DF = pd.read_csv('titanic_dataset_GBC.csv')

# Handle missing values in Age
training_DF['Age'].fillna(training_DF['Age'].mean(), inplace=True)

# Drop unnecessary columns
training_DF.drop('Cabin', axis=1, inplace=True)
training_DF.dropna(inplace=True)

# Encode categorical variables
sex = pd.get_dummies(training_DF['Sex'])
embark = pd.get_dummies(training_DF['Embarked'])
training_DF.drop(['Sex', 'Embarked', 'Name', 'Ticket'], axis=1, inplace=True)
training_DF = pd.concat([training_DF, sex, embark], axis=1)
training_DF.drop(['female', 'C'], axis=1, inplace=True)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(training_DF.drop('Survived', axis=1),
                                                    training_DF['Survived'], test_size=0.30,
                                                    random_state=101)

# Scale data
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)

# Define SGDClassifier models with different parameters
params = [
    {'loss': 'hinge', 'alpha': 0.0001},
    {'loss': 'hinge', 'alpha': 0.001},
    {'loss': 'log', 'alpha': 0.0001},
    {'loss': 'log', 'alpha': 0.001},
    {'loss': 'modified_huber', 'alpha': 0.0001},
    {'loss': 'modified_huber', 'alpha': 0.001}
]

# Train and evaluate SGDClassifier models
for param in params:
    print(f"\nTraining SGDClassifier with loss='{param['loss']}' and alpha={param['alpha']}")
    sgd_clf = SGDClassifier(loss=param['loss'], alpha=param['alpha'], random_state=42)
    sgd_clf.fit(X_train, y_train)
    y_pred = sgd_clf.predict(scaler.transform(X_test))
    
    # Evaluate model
    print(classification_report(y_test, y_pred))




Training SGDClassifier with loss='hinge' and alpha=0.0001
              precision    recall  f1-score   support

           0       0.86      0.82      0.84       163
           1       0.73      0.79      0.76       104

    accuracy                           0.81       267
   macro avg       0.80      0.80      0.80       267
weighted avg       0.81      0.81      0.81       267


Training SGDClassifier with loss='hinge' and alpha=0.001
              precision    recall  f1-score   support

           0       0.80      0.90      0.85       163
           1       0.80      0.65      0.72       104

    accuracy                           0.80       267
   macro avg       0.80      0.77      0.78       267
weighted avg       0.80      0.80      0.80       267


Training SGDClassifier with loss='log' and alpha=0.0001
              precision    recall  f1-score   support

           0       0.75      0.99      0.85       163
           1       0.96      0.47      0.63       104

    accu

