In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [None]:
df = sns.load_dataset('titanic')

In [None]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


# 데이터 처리

In [None]:
df = df.drop(['embark_town', 'who', 'adult_male', 'alive', 'class', 'embarked', 'alone', 'deck'], axis=1)

In [None]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare
0,0,3,male,22.0,1,0,7.2500
1,1,1,female,38.0,1,0,71.2833
2,1,3,female,26.0,0,0,7.9250
3,1,1,female,35.0,1,0,53.1000
4,0,3,male,35.0,0,0,8.0500
...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000
887,1,1,female,19.0,0,0,30.0000
888,0,3,female,,1,2,23.4500
889,1,1,male,26.0,0,0,30.0000


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   survived  891 non-null    int64  
 1   pclass    891 non-null    int64  
 2   sex       891 non-null    object 
 3   age       714 non-null    float64
 4   sibsp     891 non-null    int64  
 5   parch     891 non-null    int64  
 6   fare      891 non-null    float64
dtypes: float64(2), int64(4), object(1)
memory usage: 48.9+ KB


In [None]:
df.fillna(df['age'].median(), inplace=True)
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare
0,0,3,male,22.0,1,0,7.2500
1,1,1,female,38.0,1,0,71.2833
2,1,3,female,26.0,0,0,7.9250
3,1,1,female,35.0,1,0,53.1000
4,0,3,male,35.0,0,0,8.0500
...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000
887,1,1,female,19.0,0,0,30.0000
888,0,3,female,28.0,1,2,23.4500
889,1,1,male,26.0,0,0,30.0000


In [None]:
df = pd.get_dummies(df, dtype=int)

In [None]:
df

Unnamed: 0,survived,pclass,age,sibsp,parch,fare,sex_female,sex_male
0,0,3,22.0,1,0,7.2500,0,1
1,1,1,38.0,1,0,71.2833,1,0
2,1,3,26.0,0,0,7.9250,1,0
3,1,1,35.0,1,0,53.1000,1,0
4,0,3,35.0,0,0,8.0500,0,1
...,...,...,...,...,...,...,...,...
886,0,2,27.0,0,0,13.0000,0,1
887,1,1,19.0,0,0,30.0000,1,0
888,0,3,28.0,1,2,23.4500,1,0
889,1,1,26.0,0,0,30.0000,0,1


In [None]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('survived', axis=1), df['survived'], test_size=0.4)

In [None]:
X_train

Unnamed: 0,pclass,age,sibsp,parch,fare,sex_female,sex_male
336,1,29.0,1,0,66.6000,0,1
207,3,26.0,0,0,18.7875,0,1
431,3,28.0,1,0,16.1000,1,0
464,3,28.0,0,0,8.0500,0,1
406,3,51.0,0,0,7.7500,0,1
...,...,...,...,...,...,...,...
367,3,28.0,0,0,7.2292,1,0
837,3,28.0,0,0,8.0500,0,1
606,3,30.0,0,0,7.8958,0,1
389,2,17.0,0,0,12.0000,1,0


In [None]:
X_test

Unnamed: 0,pclass,age,sibsp,parch,fare,sex_female,sex_male
28,3,28.0,0,0,7.8792,1,0
764,3,16.0,0,0,7.7750,0,1
578,3,28.0,1,0,14.4583,1,0
246,3,25.0,0,0,7.7750,1,0
746,3,16.0,1,1,20.2500,0,1
...,...,...,...,...,...,...,...
494,3,21.0,0,0,8.0500,0,1
349,3,42.0,0,0,8.6625,0,1
879,1,56.0,0,1,83.1583,1,0
437,2,24.0,2,3,18.7500,1,0


In [None]:
y_train

Unnamed: 0,survived
336,0
207,1
431,1
464,0
406,0
...,...
367,1
837,0
606,0
389,1


In [None]:
y_test

Unnamed: 0,survived
28,1
764,0
578,0
246,0
746,0
...,...
494,0
349,0
879,1
437,1


# 모델 학습

In [None]:
mlp = MLPClassifier(max_iter=300, solver='adam', verbose=1)

In [None]:
mlp.fit(X_train, y_train)

Iteration 1, loss = 4.00774806
Iteration 2, loss = 3.06096867
Iteration 3, loss = 2.16874446
Iteration 4, loss = 1.32469920
Iteration 5, loss = 0.82202033
Iteration 6, loss = 0.74268561
Iteration 7, loss = 0.89288153
Iteration 8, loss = 0.96319047
Iteration 9, loss = 0.87413266
Iteration 10, loss = 0.74964176
Iteration 11, loss = 0.66923390
Iteration 12, loss = 0.66274463
Iteration 13, loss = 0.68161673
Iteration 14, loss = 0.67624781
Iteration 15, loss = 0.64298406
Iteration 16, loss = 0.60871967
Iteration 17, loss = 0.58893676
Iteration 18, loss = 0.59536892
Iteration 19, loss = 0.58937887
Iteration 20, loss = 0.57292896
Iteration 21, loss = 0.56619324
Iteration 22, loss = 0.56415236
Iteration 23, loss = 0.55781088
Iteration 24, loss = 0.55156379
Iteration 25, loss = 0.54698678
Iteration 26, loss = 0.54402041
Iteration 27, loss = 0.53942589
Iteration 28, loss = 0.53642710
Iteration 29, loss = 0.53437337
Iteration 30, loss = 0.53310309
Iteration 31, loss = 0.52893499
Iteration 32, los

In [None]:
y_pred = mlp.predict(X_test)

## 정확률

In [None]:
incorrect = 0
for i in range(len(y_pred)):
    if y_pred[i] != y_test.iloc[i]:
        incorrect += 1


In [None]:
accuracy = ((len(y_test) - incorrect)/len(y_test)) * 100


##1

In [None]:
tp1 = 0
fn1 = 0
fp1 = 0

for i in range(len(y_pred)):
  if y_pred[i] == y_test.iloc[i]:
    if y_pred[i] == 1:
      tp1 += 1

  else:
    if y_pred[i] == 1:
      fp1 += 1
    else:
      fn1 += 1

precision = tp1/(tp1+fp1)
recall = tp1/(tp1+fn1)
f1_score = (2*precision*recall)/(precision+recall)

print(f"Accuracy: {accuracy}%")
print(f"Precision: {precision * 100}%")
print(f"Recall: {recall * 100}%")
print(f"F1 Score: {f1_score * 100}%")

Accuracy: 79.83193277310924%
Precision: 67.88321167883211%
Recall: 76.85950413223141%
F1 Score: 72.09302325581396%


##0


In [None]:
tp0 = 0
fn0 = 0
fp0 = 0

for i in range(len(y_pred)):
  if y_pred[i] == y_test.iloc[i]:
    if y_pred[i] == 0:
      tp0 += 1

  else:
    if y_pred[i] == 0:
      fp0 += 1
    else:
      fn0 += 1

precision = tp0/(tp0+fp0)
recall = tp0/(tp0+fn0)
f1_score = (2*precision*recall)/(precision+recall)

print(f"Accuracy: {accuracy}%")
print(f"Precision: {precision * 100}%")
print(f"Recall: {recall * 100}%")
print(f"F1 Score: {f1_score * 100}%")

Accuracy: 79.83193277310924%
Precision: 87.27272727272727%
Recall: 81.35593220338984%
F1 Score: 84.21052631578948%


In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.81      0.84       236
           1       0.68      0.77      0.72       121

    accuracy                           0.80       357
   macro avg       0.78      0.79      0.78       357
weighted avg       0.81      0.80      0.80       357

