In [3]:
# titanic_tf_probabilities.py
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# 1️⃣ 데이터 불러오기
df = pd.read_csv("titanic1309.csv")

# 2️⃣ 사용할 열 선택
X = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]
y = df['Survived']

# 3️⃣ 결측치 처리 (수치형: 평균으로, 범주형: 가장 많은 값으로)
X['Age'].fillna(X['Age'].mean(), inplace=True)
X['Fare'].fillna(X['Fare'].mean(), inplace=True)
X['Embarked'].fillna(X['Embarked'].mode()[0], inplace=True)

# 4️⃣ 범주형을 숫자로 변환 (원-핫 인코딩)
X = pd.get_dummies(X, columns=['Sex', 'Embarked'], drop_first=True)

# 5️⃣ 수치형 변수 스케일링 (평균=0, 표준편차=1)
scaler = StandardScaler()
X[['Age', 'Fare', 'SibSp', 'Parch', 'Pclass']] = scaler.fit_transform(X[['Age', 'Fare', 'SibSp', 'Parch', 'Pclass']])

# 6️⃣ 학습용, 테스트용 데이터셋으로 나누기
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# 7️⃣ TensorFlow 모델 구성
model = Sequential([
    Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 8️⃣ 모델 학습
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# 9️⃣ 테스트셋 정확도 평가
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"[TensorFlow] Test Accuracy: {test_acc:.4f}")

# 🔟 전체 데이터에 대해 생존 확률 예측
predicted_probs = model.predict(X)  # X 전체 데이터로 예측

# 🔟 예측 결과를 원본 데이터프레임에 추가
df['Survival_Probability'] = predicted_probs

# 🔟 최종 결과 출력
print(df[['PassengerId', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Survival_Probability']])

# 필요하면 CSV로 저장 가능
# df.to_csv("titanic_with_probabilities.csv", index=False)


Epoch 1/50


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Age'].fillna(X['Age'].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Age'].fillna(X['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) 

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.5887 - loss: 0.8115 - val_accuracy: 0.6667 - val_loss: 0.7468
Epoch 2/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6161 - loss: 0.8014 - val_accuracy: 0.6762 - val_loss: 0.6868
Epoch 3/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6205 - loss: 0.7256 - val_accuracy: 0.6810 - val_loss: 0.6460
Epoch 4/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6402 - loss: 0.6807 - val_accuracy: 0.6857 - val_loss: 0.6109
Epoch 5/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.6712 - loss: 0.6133 - val_accuracy: 0.7000 - val_loss: 0.5846
Epoch 6/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6835 - loss: 0.5981 - val_accuracy: 0.7238 - val_loss: 0.5584
Epoch 7/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━