In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:

from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

In [3]:
df=pd.read_csv("./data/cleaned_data.csv",index_col=0)

In [4]:
dep_var="alive"
random_state=42

In [37]:
X,y=df.drop(columns=[dep_var,"position"]),df[dep_var]

In [39]:
categorical_cols = [cname for cname in X.columns if X[cname].nunique() < 10 and 
                        X[cname].dtype == "object"]
numerical_cols = [cname for cname in X.columns if X[cname].dtype in ['int64', 'float64']]

In [40]:
numerical_cols

['Id',
 'class',
 'num_sibling/spouse',
 'num_parent/child',
 'price',
 'age',
 'Family_Size']

In [41]:
numerical_transformer = StandardScaler()
categorical_transformer=OneHotEncoder(handle_unknown='ignore')

In [42]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

In [51]:
xs=preprocessor.fit_transform(X)

In [44]:
X_train, X_test, y_train, y_test = train_test_split(xs, y,random_state=random_state)

In [45]:
classifiers = {
    "LogisticRegression" : LogisticRegression(max_iter=1000,random_state=random_state),
    "KNeighbors" : KNeighborsClassifier(3),
    "SVC" : SVC(random_state=random_state),
    "DecisionTree" : DecisionTreeClassifier(random_state=random_state),
    "RandomForest" : RandomForestClassifier(random_state=random_state),
    "XGBoost" : XGBClassifier(random_state=random_state)
}

In [50]:
for key, classifier in classifiers.items():
    classifier.fit(X_train, y_train)
    prediction = classifier.predict(X_test)
    print("Model "+key)
    print(classification_report(y_test, prediction,zero_division=0))

Model LogisticRegression
              precision    recall  f1-score   support

           0       0.84      0.90      0.87       126
           1       0.85      0.77      0.81        91

    accuracy                           0.85       217
   macro avg       0.85      0.84      0.84       217
weighted avg       0.85      0.85      0.85       217

Model KNeighbors
              precision    recall  f1-score   support

           0       0.81      0.87      0.84       126
           1       0.80      0.71      0.76        91

    accuracy                           0.81       217
   macro avg       0.81      0.79      0.80       217
weighted avg       0.81      0.81      0.80       217

Model SVC
              precision    recall  f1-score   support

           0       0.82      0.89      0.85       126
           1       0.83      0.74      0.78        91

    accuracy                           0.82       217
   macro avg       0.83      0.81      0.82       217
weighted avg       0.8

In [52]:
import keras
import tensorflow as tf
from keras.utils.np_utils import to_categorical
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten

In [59]:
model=keras.Sequential([
keras.layers.Dense(512),
keras.layers.Activation("sigmoid"),
keras.layers.Dense(256),
keras.layers.Dropout(0.2),
keras.layers.Activation(activation="relu"),
keras.layers.Dense(2),
keras.layers.Activation(activation="softmax")])

In [60]:
one_hot_y=pd.get_dummies(y_train)
one_hot_valid_y=pd.get_dummies(y_test)

In [63]:

opt = tf.optimizers.SGD(learning_rate=0.01)
model.compile(loss=tf.keras.losses.BinaryFocalCrossentropy(), optimizer=opt, metrics=['accuracy',tf.keras.metrics.Recall()])
epochs = 20
history=model.fit(X_train, one_hot_y,batch_size=4, epochs=epochs,validation_data=(X_test,one_hot_valid_y))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [64]:
y_prob = model.predict(X_test)
prediction = y_prob.argmax(axis=-1)
print("Model NN")
print(classification_report(y_test,prediction,zero_division=0))

Model NN
              precision    recall  f1-score   support

           0       0.87      0.87      0.87       126
           1       0.81      0.81      0.81        91

    accuracy                           0.84       217
   macro avg       0.84      0.84      0.84       217
weighted avg       0.84      0.84      0.84       217

