In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns 
%matplotlib inline 
from sklearn.model_selection import train_test_split 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# create a decision tree model that predicts survival from seaborn 

# Load in Data 

In [2]:
Titanic = sns.load_dataset('titanic')
Titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


# Recode columns to integers

In [3]:
def item (series): 
    if series == "male":
        return 0
    if series == "female":
        return 1
Titanic['sexR'] = Titanic['sex'].apply(item)

In [4]:
Titanic.embarked.value_counts()

S    644
C    168
Q     77
Name: embarked, dtype: int64

In [5]:
def item (series): 
    if series == "S":
        return 0
    if series == "C":
        return 1
    if series == "Q":
        return 2
Titanic['embarkedR'] = Titanic['embarked'].apply(item)

In [6]:
Titanic.who.value_counts()

man      537
woman    271
child     83
Name: who, dtype: int64

In [7]:
def item (series):
    if series == "man":
        return 0 
    if series == "woman": 
        return 1
    if series == "child": 
        return 2
Titanic['whoR'] = Titanic['who'].apply(item)

In [8]:
Titanic.adult_male.value_counts()

True     537
False    354
Name: adult_male, dtype: int64

In [9]:
Titanic['adult_male'] = Titanic['adult_male'].astype(int)

In [10]:
Titanic.deck.value_counts()

C    59
B    47
D    33
E    32
A    15
F    13
G     4
Name: deck, dtype: int64

In [11]:
def item (series): 
    if series == "A":
        return 0 
    if series == "B":
        return 1
    if series == "C":
        return 2 
    if series == "D":
        return 3
    if series == "E":
        return 4
    if series == "F":
        return 5
Titanic['deckR'] = Titanic['deck'].apply(item)

In [12]:
Titanic.alive.value_counts()

no     549
yes    342
Name: alive, dtype: int64

In [13]:
def item (series):
    if series == "no":
        return 0 
    if series == "yes": 
        return 1
Titanic['aliveR'] = Titanic['alive'].apply(item)

In [14]:
Titanic.alone.value_counts()

True     537
False    354
Name: alone, dtype: int64

In [15]:
Titanic['alone'] = Titanic['alone'].astype(int)

In [16]:
Titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,sexR,embarkedR,whoR,deckR,aliveR
0,0,3,male,22.0,1,0,7.25,S,Third,man,1,,Southampton,no,0,0,0.0,0,,0
1,1,1,female,38.0,1,0,71.2833,C,First,woman,0,C,Cherbourg,yes,0,1,1.0,1,2.0,1
2,1,3,female,26.0,0,0,7.925,S,Third,woman,0,,Southampton,yes,1,1,0.0,1,,1
3,1,1,female,35.0,1,0,53.1,S,First,woman,0,C,Southampton,yes,0,1,0.0,1,2.0,1
4,0,3,male,35.0,0,0,8.05,S,Third,man,1,,Southampton,no,1,0,0.0,0,,0


# Drop old columns 

In [17]:
Titanic2 = Titanic.drop("class", axis=1)
Titanic3 = Titanic2.drop("embarked", axis=1)
Titanic4 = Titanic3.drop("who", axis=1)
Titanic5 = Titanic4.drop("deck", axis=1)
Titanic6 = Titanic5.drop("embark_town", axis=1)
Titanic7 = Titanic6.drop("alive", axis=1)
Titanic8 = Titanic7.drop("sex", axis=1)

In [18]:
Titanic8.head()

Unnamed: 0,survived,pclass,age,sibsp,parch,fare,adult_male,alone,sexR,embarkedR,whoR,deckR,aliveR
0,0,3,22.0,1,0,7.25,1,0,0,0.0,0,,0
1,1,1,38.0,1,0,71.2833,0,0,1,1.0,1,2.0,1
2,1,3,26.0,0,0,7.925,0,1,1,0.0,1,,1
3,1,1,35.0,1,0,53.1,0,0,1,0.0,1,2.0,1
4,0,3,35.0,0,0,8.05,1,1,0,0.0,0,,0


In [25]:
Titanic8.dtypes

survived        int64
pclass          int64
age           float64
sibsp           int64
parch           int64
fare          float64
adult_male      int64
alone           int64
sexR            int64
embarkedR     float64
whoR            int64
deckR         float64
aliveR          int64
dtype: object

In [20]:
Titanic9 = Titanic8.fillna("0")

# Define x & y 

In [22]:
x = Titanic9.drop('survived', axis=1)
y = Titanic9['survived']

# Test Train Split

In [23]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size= 0.3)

# Create Inital Decision Tree

In [24]:
decisionTree = DecisionTreeClassifier()
decisionTree.fit(x_train, y_train)

DecisionTreeClassifier()

# Assess the Model 
## create a set of predictions & interpret the results 

In [33]:
titanicPredictions = decisionTree.predict(x_test)

# Read the Confusion Matrix

In [34]:
print(confusion_matrix(y_test, titanicPredictions))

[[165   0]
 [  0 103]]


In [35]:
print(classification_report(y_test, titanicPredictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       165
           1       1.00      1.00      1.00       103

    accuracy                           1.00       268
   macro avg       1.00      1.00      1.00       268
weighted avg       1.00      1.00      1.00       268



### non-survived(0) was predicted w/ 100% percision 
### survived(1) was predicted w/ 100% percision 

# Random Forest Test

In [36]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

### Data wrangling & train test split already done
### inital random forest model 

In [39]:
titanic = RandomForestClassifier(n_estimators=500)
titanic.fit(x_train, y_train)

RandomForestClassifier(n_estimators=500)

## Evaluate Model Fit

In [41]:
titanic2Predictions = titanic.predict(x_test)
print(confusion_matrix(y_test, titanic2Predictions))
print(classification_report(y_test, titanic2Predictions))

[[165   0]
 [  0 103]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       165
           1       1.00      1.00      1.00       103

    accuracy                           1.00       268
   macro avg       1.00      1.00      1.00       268
weighted avg       1.00      1.00      1.00       268



### the results of the random forest & the decision tree are the same.
### non-survivers(0) was predicted w/ 100% percision 
### survivers(1) was predicted w/ 100% percision 