# Test data

In [44]:
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
import missingno as mno
import seaborn as sns
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report

# neural network
import tensorflow as tf
from tensorflow import keras
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Dense
from sklearn.preprocessing import StandardScaler

In [45]:
df = pd.read_csv('test.csv')

df = df.drop(['PassengerId', 'Name'], axis = 1)


# Cabin
to_drop = df[df['Cabin'].isnull()].index
df.drop(to_drop, inplace = True)

df[['Cabin_deck', 'Cabin_num', 'Cabin_side' ]] = df.Cabin.str.split("/", expand = True)
df = df.drop(['Cabin'], axis = 1)

df['Cabin_deck'] = df['Cabin_deck'].replace(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'T'], [1, 2, 3, 4, 5, 6, 7, 8])
df['Cabin_deck'] = df['Cabin_deck'].astype('category')

df['Cabin_num'] = df['Cabin_num'].astype('int')

df['Cabin_side'] = df['Cabin_side'].replace(['P', 'S'], [1,0])
df['Cabin_side'] = df['Cabin_side'].astype('bool')

# HomePlanet
df['HomePlanet'] = df['HomePlanet'].fillna('Unknown')
df['HomePlanet'] = df['HomePlanet'].astype('category')

# Luxuries
df['RoomService'].fillna(0.0, inplace = True)
df['FoodCourt'].fillna(0.0, inplace = True)
df['ShoppingMall'].fillna(0.0, inplace = True)
df['Spa'].fillna(0.0, inplace = True)
df['VRDeck'].fillna(0.0, inplace = True)

amount_columns = ['RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']
df['Spendings'] = df[amount_columns].sum(axis = 1)
df = df.drop(amount_columns, axis = 1)

# CryoSleep
def is_cryo(total):
    if total == 0:
        return True
    else:
        return False

df_nan_cryo = df[df['CryoSleep'].isnull()]
df = df.drop(df_nan_cryo.index)
df_nan_cryo['CryoSleep'] = df_nan_cryo.apply(lambda row: is_cryo(row['Spendings']), axis = 1)
df = df.append(df_nan_cryo)
df = df.sort_index()

df['CryoSleep'] = df['CryoSleep'].astype('bool')


# Destination
df['Destination'] = df['Destination'].fillna('Unknown')
df['Destination'] = df['Destination'].astype('category')

# Age
df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Age'] = df['Age'].astype('int')

# VIP
def is_vip(home):
    if home == 'Earth':
        return False
    else:
        return 'IDK'
    
df_nan_vip = df[df['VIP'].isnull()]
df = df.drop(df_nan_vip.index)
df_nan_vip['VIP'] = df_nan_vip.apply(lambda row: is_vip(row['HomePlanet']), axis = 1)
df = df.append(df_nan_vip)
df = df.sort_index()

df = df.drop(df[df['VIP'] == 'IDK'].index)
df['VIP'] = df['VIP'].astype('bool')

# Dummies
df = pd.get_dummies(df, columns = ['Cabin_deck', 'HomePlanet', 'Destination'])
df = df.reset_index(drop = 1)

  df = df.append(df_nan_cryo)
  df = df.append(df_nan_vip)


# XGBoost

In [46]:
tree = xgb.XGBClassifier()
tree.load_model("models/xgb_model.json")
pred_xgb = tree.predict(df)

In [47]:
df_result = df.copy()
df_result['preds_xgb'] = pred_xgb
df_result['preds_xgb'] = df_result['preds_xgb'].astype('bool')

# MLP

In [48]:
df2 = df.copy()

# Scaling the numerical variables
numerical = ['Spendings', 'Cabin_num']
df_num = df2[numerical].copy()
scaler = StandardScaler()  
df_num1 = scaler.fit_transform(df_num)
df_num1 = pd.DataFrame(df_num1, columns = df_num.columns.values)

# Merging the categorical and numerical variables
df2['Spendings'] = df_num1['Spendings']
df2['Cabin_num'] = df_num1['Cabin_num']

bool_cols = ['CryoSleep', 'VIP', 'Cabin_side']
for col in bool_cols:
    df2[col] = df2[col].astype('int')

In [49]:
mlp = tf.keras.models.load_model('models/mlp_model.keras')
pred_mlp = mlp.predict(df2)
df_result['preds_mlp'] = np.round(pred_mlp, 0)
df_result['preds_mlp'] = df_result['preds_mlp'].astype('bool')



In [50]:
df_result

Unnamed: 0,CryoSleep,Age,VIP,Cabin_num,Cabin_side,Spendings,Cabin_deck_1,Cabin_deck_2,Cabin_deck_3,Cabin_deck_4,...,HomePlanet_Earth,HomePlanet_Europa,HomePlanet_Mars,HomePlanet_Unknown,Destination_55 Cancri e,Destination_PSO J318.5-22,Destination_TRAPPIST-1e,Destination_Unknown,preds_xgb,preds_mlp
0,True,27,False,3,False,0.0,0,0,0,0,...,1,0,0,0,0,0,1,0,True,True
1,False,19,False,4,False,2832.0,0,0,0,0,...,1,0,0,0,0,0,1,0,False,False
2,True,31,False,0,False,0.0,0,0,1,0,...,0,1,0,0,1,0,0,0,True,True
3,False,38,False,1,False,7418.0,0,0,1,0,...,0,1,0,0,0,0,1,0,False,False
4,False,20,False,5,False,645.0,0,0,0,0,...,1,0,0,0,0,0,1,0,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4125,False,40,False,1796,False,868.0,0,0,0,0,...,1,0,0,0,0,0,1,0,False,False
4126,True,34,False,1496,False,0.0,0,0,0,0,...,1,0,0,0,0,0,1,0,True,True
4127,True,28,False,296,True,0.0,0,0,0,1,...,0,0,1,0,1,0,0,0,True,True
4128,False,28,False,297,True,3203.0,0,0,0,1,...,0,1,0,0,0,0,0,1,False,False
