# <span style='color:#A80808'>Objective</span>

Predict which passengers were transported by the anomaly using records recovered from the spaceship’s damaged computer system.

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

from sklearn.model_selection import StratifiedKFold
from lightgbm import LGBMClassifier

import warnings
warnings.filterwarnings('ignore')

# <span style='color:#A80808'>Data</span>

In [None]:
train = pd.read_csv("../input/spaceship-titanic/train.csv", index_col='PassengerId')
print(f'Shape of train: {train.shape}')
train.head()

In [None]:
plt.figure(figsize=(15,7))
plt.bar(train.columns, train.isna().sum(), color='green')
plt.xticks(rotation=90, fontsize=16)
plt.title('Missing values', fontsize=16)
plt.show()

# <span style='color:#A80808'>Target</span>

In [None]:
fig = px.histogram(train['Transported'],  marginal=None, nbins = 200, template="plotly_white", color_discrete_sequence=['red'])
fig.show()

The two categories are quite balance

# <span style='color:#A80808'>Home planet</span>

![](https://upload.wikimedia.org/wikipedia/commons/thumb/c/c4/PIA19048_realistic_color_Europa_mosaic_%28original%29.jpg/220px-PIA19048_realistic_color_Europa_mosaic_%28original%29.jpg) (*Europa planet*)

In [None]:
train['HomePlanet'] = train['HomePlanet'].fillna('Nan')

In [None]:
fig = px.histogram(train['HomePlanet'],  marginal='violin', nbins = 200, template="plotly_white", color_discrete_sequence=['blue'])
fig.show()

In [None]:
for idx, home in enumerate(train['HomePlanet'].unique()):
    fig = px.histogram(train.groupby('HomePlanet').get_group(home)['Transported'],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['orange'], title=home)
    fig.show()

# <span style='color:#A80808'>CryoSleep</span>

![](https://www.nag.co.za/wp-content/uploads/2019/11/EventHorizonStasis_feat.jpg)

In [None]:
train['CryoSleep'] = train['CryoSleep'].fillna('Nan')

In [None]:
fig = px.histogram(train['CryoSleep'],  marginal='violin', nbins = 200, template="plotly_white", color_discrete_sequence=['blue'])
fig.show()

In [None]:
for idx, CryoSleep in enumerate(train['CryoSleep'].unique()):
    fig = px.histogram(train.groupby('CryoSleep').get_group(home)['Transported'],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['green'], title=str(CryoSleep))
    fig.show()

The target is quite balance between having cryosleep or not

# <span style='color:#A80808'>Cabin</span>

In [None]:
train['Cabin'] = train['Cabin'].fillna('Nan')
train['Cabin'].nunique()

In [None]:
fig = px.histogram(train['Cabin'],  marginal='box', nbins = 200, template="plotly_white", color_discrete_sequence=['blue'])
fig.show()

# <span style='color:#A80808'>Destination</span>

In [None]:
train['Destination'] = train['Destination'].fillna('Nan')

In [None]:
fig = px.histogram(train['Destination'],  marginal='violin', nbins = 200, template="plotly_white", color_discrete_sequence=['violet'])
fig.show()

In [None]:
for idx, Destination in enumerate(train['Destination'].unique()):
    fig = px.histogram(train.groupby('Destination').get_group(home)['Transported'],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['purple'], title=Destination)
    fig.show()

The target is quite balance between the estinations including missing destination

# <span style='color:#A80808'>Age</span>

In [None]:
train['Age'] = train['Age'].fillna(-1)

In [None]:
fig = px.histogram(train['Age'],  marginal='violin', nbins = 200, template="plotly_white", color_discrete_sequence=['maroon'])
fig.show()

In [None]:
train['Age'] = train['Age'].astype('int16')

fig = px.histogram(train['Transported'][train['Age']<18],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['fuchsia'], title='Age < 18')
fig.show()

fig = px.histogram(train['Transported'][(train['Age']>=18) & (train['Age']<=60)],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['lime'], title='Age from 18 to 60')
fig.show()

fig = px.histogram(train['Transported'][train['Age']>60],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['olive'], title='Age > 60')
fig.show()

# <span style='color:#A80808'>VIP</span>

In [None]:
train['VIP'] = train['VIP'].fillna('Nan')

In [None]:
fig = px.histogram(train['VIP'],  marginal='violin', nbins = 200, template="plotly_white", color_discrete_sequence=['navy'])
fig.show()

In [None]:
for idx, VIP in enumerate(train['VIP'].unique()):
    fig = px.histogram(train.groupby('VIP').get_group(home)['Transported'],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['teal'], title=str(VIP))
    fig.show()

# <span style='color:#A80808'>RoomService</span>

In [None]:
train['RoomService'] = train['RoomService'].fillna(-1)

In [None]:
fig = px.histogram(train['RoomService'],  marginal='violin', nbins = 200, template="plotly_white", color_discrete_sequence=['aqua'])
fig.show()

In [None]:
fig = px.histogram(train['Transported'][train['RoomService']==0],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['antiquewhite'], title='RoomService=0')
fig.show()

fig = px.histogram(train['Transported'][train['RoomService']>0],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['aqua'], title='RoomService>0')
fig.show()

fig = px.histogram(train['Transported'][train['RoomService']==-1],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['aquamarine'], title='RoomService is missing')
fig.show()

# <span style='color:#A80808'>FoodCourt</span>

In [None]:
train['FoodCourt'] = train['FoodCourt'].fillna(-1)

In [None]:
fig = px.histogram(train['FoodCourt'],  marginal='violin', nbins = 200, template="plotly_white", color_discrete_sequence=['aqua'])
fig.show()

In [None]:
fig = px.histogram(train['Transported'][train['FoodCourt']==0],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['antiquewhite'], title='FoodCourt=0')
fig.show()

fig = px.histogram(train['Transported'][train['FoodCourt']>0],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['aqua'], title='FoodCourt>0')
fig.show()

fig = px.histogram(train['Transported'][train['FoodCourt']==-1],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['aquamarine'], title='FoodCourt is missing')
fig.show()

# <span style='color:#A80808'>ShoppingMall</span>

In [None]:
train['ShoppingMall'] = train['ShoppingMall'].fillna(-1)

In [None]:
fig = px.histogram(train['ShoppingMall'],  marginal='violin', nbins = 200, template="plotly_white", color_discrete_sequence=['aqua'])
fig.show()

In [None]:
fig = px.histogram(train['Transported'][train['ShoppingMall']==0],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['antiquewhite'], title='ShoppingMall=0')
fig.show()

fig = px.histogram(train['Transported'][train['ShoppingMall']>0],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['aqua'], title='ShoppingMall>0')
fig.show()

fig = px.histogram(train['Transported'][train['ShoppingMall']==-1],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['aquamarine'], title='ShoppingMall is missing')
fig.show()

# <span style='color:#A80808'>Spa</span>

In [None]:
train['Spa'] = train['Spa'].fillna(-1)

In [None]:
fig = px.histogram(train['Spa'],  marginal='violin', nbins = 200, template="plotly_white", color_discrete_sequence=['aqua'])
fig.show()

In [None]:
fig = px.histogram(train['Transported'][train['Spa']==0],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['antiquewhite'], title='Spa=0')
fig.show()

fig = px.histogram(train['Transported'][train['Spa']>0],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['aqua'], title='Spa>0')
fig.show()

fig = px.histogram(train['Transported'][train['Spa']==-1],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['aquamarine'], title='Spa is missing')
fig.show()

# <span style='color:#A80808'>VRDeck</span>

In [None]:
train['VRDeck'] = train['VRDeck'].fillna(-1)

In [None]:
fig = px.histogram(train['VRDeck'],  marginal='violin', nbins = 200, template="plotly_white", color_discrete_sequence=['aqua'])
fig.show()

In [None]:
fig = px.histogram(train['Transported'][train['VRDeck']==0],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['antiquewhite'], title='VRDeck=0')
fig.show()

fig = px.histogram(train['Transported'][train['VRDeck']>0],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['aqua'], title='VRDeck>0')
fig.show()

fig = px.histogram(train['Transported'][train['VRDeck']==-1],  
                       marginal=None, nbins = 200, template="plotly_white", 
                       color_discrete_sequence=['aquamarine'], title='VRDeck is missing')
fig.show()