In [1]:
# titanic_tf_simple.py
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer

# 1️⃣ 데이터 불러오기
df = pd.read_csv("titanic1309.csv")

# 2️⃣ 사용할 열 선택
X = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]
y = df['Survived']

# 3️⃣ 결측치 처리 (수치형: 평균으로, 범주형: 가장 많은 값으로)
X['Age'].fillna(X['Age'].mean(), inplace=True)
X['Fare'].fillna(X['Fare'].mean(), inplace=True)
X['Embarked'].fillna(X['Embarked'].mode()[0], inplace=True)

# 4️⃣ 범주형을 숫자로 변환 (원-핫 인코딩)
X = pd.get_dummies(X, columns=['Sex', 'Embarked'], drop_first=True)

# 5️⃣ 수치형 변수 스케일링 (평균=0, 표준편차=1)
scaler = StandardScaler()
X[['Age', 'Fare', 'SibSp', 'Parch', 'Pclass']] = scaler.fit_transform(X[['Age', 'Fare', 'SibSp', 'Parch', 'Pclass']])

# 6️⃣ 학습용, 테스트용 데이터셋으로 나누기
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# 7️⃣ TensorFlow 모델
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

model = Sequential([
    Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# 8️⃣ 테스트셋 정확도 평가
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"[TensorFlow] Test Accuracy: {test_acc:.4f}")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Age'].fillna(X['Age'].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Age'].fillna(X['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) 

Epoch 1/50


2025-05-30 19:59:58.174591: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 51ms/step - accuracy: 0.4622 - loss: 0.8758 - val_accuracy: 0.6238 - val_loss: 0.7384
Epoch 2/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6283 - loss: 0.7551 - val_accuracy: 0.7000 - val_loss: 0.6402
Epoch 3/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7030 - loss: 0.6310 - val_accuracy: 0.7286 - val_loss: 0.5837
Epoch 4/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7131 - loss: 0.5993 - val_accuracy: 0.7429 - val_loss: 0.5433
Epoch 5/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7281 - loss: 0.5531 - val_accuracy: 0.7619 - val_loss: 0.5140
Epoch 6/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7375 - loss: 0.5388 - val_accuracy: 0.8000 - val_loss: 0.4881
Epoch 7/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer

# 1️⃣ 데이터 불러오기
df = pd.read_csv("titanic1309.csv")

# 2️⃣ 사용할 열 선택
X = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]
y = df['Survived']

# 3️⃣ 결측치 처리 (수치형: 평균으로, 범주형: 가장 많은 값으로)
X['Age'].fillna(X['Age'].mean(), inplace=True)
X['Fare'].fillna(X['Fare'].mean(), inplace=True)
X['Embarked'].fillna(X['Embarked'].mode()[0], inplace=True)

# 4️⃣ 범주형을 숫자로 변환 (원-핫 인코딩)
X = pd.get_dummies(X, columns=['Sex', 'Embarked'], drop_first=True)

# 5️⃣ 수치형 변수 스케일링 (평균=0, 표준편차=1)
scaler = StandardScaler()
X[['Age', 'Fare', 'SibSp', 'Parch', 'Pclass']] = scaler.fit_transform(X[['Age', 'Fare', 'SibSp', 'Parch', 'Pclass']])

# 6️⃣ 학습용, 테스트용 데이터셋으로 나누기
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# 7️⃣ TensorFlow 모델
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

model = Sequential([
    Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# 8️⃣ 테스트셋 정확도 평가
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"[TensorFlow] Test Accuracy: {test_acc:.4f}")

# 9️⃣ 테스트셋 각 샘플별 생존 확률 출력
y_pred_probs = model.predict(X_test)

# 0~1 사이의 생존 확률 출력 (앞부분만 일부 확인)
print("\n🎯 각 샘플별 생존 확률 (앞 10개 샘플):")
for idx, prob in enumerate(y_pred_probs[:10]):
    print(f"샘플 {idx+1} → 생존 확률: {prob[0]:.4f}")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Age'].fillna(X['Age'].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Age'].fillna(X['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) 

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-06-01 11:03:14.146957: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Max
2025-06-01 11:03:14.146997: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 96.00 GB
2025-06-01 11:03:14.147002: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 36.00 GB
2025-06-01 11:03:14.147155: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-06-01 11:03:14.147167: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2025-06-01 11:03:14.521727: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plu

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.6999 - loss: 0.6038 - val_accuracy: 0.7810 - val_loss: 0.5012
Epoch 2/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.7187 - loss: 0.5590 - val_accuracy: 0.8286 - val_loss: 0.4718
Epoch 3/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7403 - loss: 0.5412 - val_accuracy: 0.8381 - val_loss: 0.4560
Epoch 4/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.7449 - loss: 0.5484 - val_accuracy: 0.8571 - val_loss: 0.4453
Epoch 5/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.7725 - loss: 0.5490 - val_accuracy: 0.8619 - val_loss: 0.4306
Epoch 6/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.7792 - loss: 0.4889 - val_accuracy: 0.8762 - val_loss: 0.4187
Epoch 7/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer

# 1️⃣ 데이터 불러오기
df = pd.read_csv("titanic1309.csv")

# 2️⃣ 사용할 열 선택
X = df[['PassengerId', 'Name', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]
y = df['Survived']

# 3️⃣ 결측치 처리 (수치형: 평균으로, 범주형: 가장 많은 값으로)
X['Age'].fillna(X['Age'].mean(), inplace=True)
X['Fare'].fillna(X['Fare'].mean(), inplace=True)
X['Embarked'].fillna(X['Embarked'].mode()[0], inplace=True)

# 4️⃣ 범주형을 숫자로 변환 (원-핫 인코딩)
X_model = X.drop(['PassengerId', 'Name'], axis=1)
X_model = pd.get_dummies(X_model, columns=['Sex', 'Embarked'], drop_first=True)

# 5️⃣ 수치형 변수 스케일링
scaler = StandardScaler()
X_model[['Age', 'Fare', 'SibSp', 'Parch', 'Pclass']] = scaler.fit_transform(
    X_model[['Age', 'Fare', 'SibSp', 'Parch', 'Pclass']]
)

# 6️⃣ 학습용, 테스트용 데이터셋으로 나누기 (PassengerId, Name 포함해서 같이 나누기)
X_train_model, X_test_model, X_train_meta, X_test_meta, y_train, y_test = train_test_split(
    X_model, X[['PassengerId', 'Name']], y, stratify=y, test_size=0.2, random_state=42
)

# 7️⃣ TensorFlow 모델
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

model = Sequential([
    Dense(16, activation='relu', input_shape=(X_train_model.shape[1],)),
    Dropout(0.2),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train_model, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# 8️⃣ 테스트셋 정확도 평가
test_loss, test_acc = model.evaluate(X_test_model, y_test, verbose=0)
print(f"[TensorFlow] Test Accuracy: {test_acc:.4f}")

# 9️⃣ 테스트셋 각 샘플별 생존 확률 출력 (PassengerId, Name 포함)
y_pred_probs = model.predict(X_test_model)

# 0~1 사이의 생존 확률과 PassengerId, Name 함께 출력
print("\n🎯 테스트셋 각 샘플별 생존 확률:")
for idx in range(len(y_pred_probs)):
    passenger_id = X_test_meta.iloc[idx]['PassengerId']
    passenger_name = X_test_meta.iloc[idx]['Name']
    prob = y_pred_probs[idx][0]
    print(f"PassengerId: {passenger_id}, Name: {passenger_name}, 생존 확률: {prob:.4f}")


Epoch 1/50


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Age'].fillna(X['Age'].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Age'].fillna(X['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) 

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.6455 - loss: 0.6715 - val_accuracy: 0.7571 - val_loss: 0.5885
Epoch 2/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6708 - loss: 0.6123 - val_accuracy: 0.7524 - val_loss: 0.5411
Epoch 3/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.6944 - loss: 0.5752 - val_accuracy: 0.7571 - val_loss: 0.5165
Epoch 4/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.7261 - loss: 0.5293 - val_accuracy: 0.7667 - val_loss: 0.5013
Epoch 5/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.7239 - loss: 0.5379 - val_accuracy: 0.7714 - val_loss: 0.4883
Epoch 6/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.7488 - loss: 0.5099 - val_accuracy: 0.7810 - val_loss: 0.4768
Epoch 7/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# 1️⃣ 데이터 불러오기
df = pd.read_csv("titanic1309.csv")

# 2️⃣ 사용할 열 선택
X = df[['PassengerId', 'Name', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]
y = df['Survived']

# 3️⃣ 결측치 처리
X['Age'].fillna(X['Age'].mean(), inplace=True)
X['Fare'].fillna(X['Fare'].mean(), inplace=True)
X['Embarked'].fillna(X['Embarked'].mode()[0], inplace=True)

# 4️⃣ 원-핫 인코딩 (PassengerId, Name 제외)
X_model = X.drop(['PassengerId', 'Name'], axis=1)
X_model = pd.get_dummies(X_model, columns=['Sex', 'Embarked'], drop_first=True)

# 5️⃣ 수치형 변수 스케일링
scaler = StandardScaler()
X_model[['Age', 'Fare', 'SibSp', 'Parch', 'Pclass']] = scaler.fit_transform(
    X_model[['Age', 'Fare', 'SibSp', 'Parch', 'Pclass']]
)

# 6️⃣ 학습용, 테스트용 데이터셋으로 나누기
X_train_model, X_test_model, X_train_meta, X_test_meta, y_train, y_test = train_test_split(
    X_model, X[['PassengerId', 'Name']], y, stratify=y, test_size=0.2, random_state=42
)

# 7️⃣ 모델 정의 및 학습
model = Sequential([
    Dense(16, activation='relu', input_shape=(X_train_model.shape[1],)),
    Dropout(0.2),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train_model, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# 8️⃣ 전체 입력 데이터셋의 생존 확률 구하기
y_pred_probs_all = model.predict(X_model)

# 9️⃣ 원본 데이터와 생존 확률 합치기
df['Survival_Probability'] = y_pred_probs_all

# 🔟 결과 출력 (앞 10개만 예시)
print("\n🎯 전체 입력 데이터셋 생존 확률 (앞 10개):")
for idx, row in df.head(10).iterrows():
    print(f"PassengerId: {row['PassengerId']}, Name: {row['Name']}, 생존 확률: {row['Survival_Probability']:.4f}")

# 1️⃣1️⃣ (선택) CSV 파일로 저장
# df[['PassengerId', 'Name', 'Survival_Probability']].to_csv('titanic_survival_probabilities.csv', index=False)


Epoch 1/50


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Age'].fillna(X['Age'].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Age'].fillna(X['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) 

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.4847 - loss: 0.8372 - val_accuracy: 0.6333 - val_loss: 0.6874
Epoch 2/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.6661 - loss: 0.6697 - val_accuracy: 0.7000 - val_loss: 0.5971
Epoch 3/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6860 - loss: 0.6256 - val_accuracy: 0.7238 - val_loss: 0.5522
Epoch 4/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6842 - loss: 0.5914 - val_accuracy: 0.7571 - val_loss: 0.5193
Epoch 5/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7059 - loss: 0.5799 - val_accuracy: 0.7810 - val_loss: 0.4964
Epoch 6/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7379 - loss: 0.5249 - val_accuracy: 0.7905 - val_loss: 0.4770
Epoch 7/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━

In [10]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer   # 결측치 처리용

# 1️⃣ 데이터 불러오기 ─ 파일 경로를 자신의 CSV 경로로 바꿔 주세요
df = pd.read_csv("titanic1309.csv")

# 2️⃣ 특성 리스트
features = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]

# 3️⃣ 범주형으로 다룰 열 지정
categorical_feats = ["Pclass", "Sex", "Embarked"]   # Pclass도 범주처럼 취급

def compute_survival_prob(feature: str, data: pd.DataFrame) -> pd.Series:
    """
    단일 feature만 사용해 생존 확률을 계산한 뒤,
    (행 수,) 모양의 시리즈를 반환한다.
    """
    X = data[[feature]].copy()
    y = data["Survived"]

    # ── 전처리 파이프라인 구성 ─────────────────────────────────── #
    if feature in categorical_feats:                          # ▸ 범주형
        preprocessor = ColumnTransformer(
            [("cat",
              Pipeline([
                  ("impute", SimpleImputer(strategy="most_frequent")),
                  ("ohe", OneHotEncoder(drop="first"))
              ]),
              [feature])],
            remainder="passthrough"
        )
    else:                                                     # ▸ 수치형
        preprocessor = ColumnTransformer(
            [("num",
              Pipeline([
                  ("impute", SimpleImputer(strategy="mean")),
                  ("scale", StandardScaler())
              ]),
              [feature])],
            remainder="passthrough"
        )
    # ───────────────────────────────────────────────────────── #

    model = Pipeline(
        [("prep", preprocessor),
         ("clf", LogisticRegression(max_iter=1000))]
    )

    model.fit(X, y)
    prob = model.predict_proba(X)[:, 1]   # 생존 클래스(1)의 확률
    return pd.Series(prob, name=f"{feature}_Survival_Prob")

# 4️⃣ 각 특성별 생존 확률 계산 & 원본 데이터프레임에 추가
for feat in features:
    df[f"{feat}_Survival_Prob"] = compute_survival_prob(feat, df)

# 5️⃣ 확인
print(df[[*features, *[f"{f}_Survival_Prob" for f in features]]].head())

# 6️⃣ 필요하다면 저장
# df.to_csv("titanic_with_featurewise_probs.csv", index=False)


   Pclass     Sex   Age  SibSp  Parch     Fare Embarked  Pclass_Survival_Prob  \
0       3    male  22.0      1      0   7.2500        S              0.271143   
1       1  female  38.0      1      0  71.2833        C              0.570291   
2       3  female  26.0      0      0   7.9250        S              0.271143   
3       1  female  35.0      1      0  53.1000        S              0.570291   
4       3    male  35.0      0      0   8.0500        S              0.271143   

   Sex_Survival_Prob  Age_Survival_Prob  SibSp_Survival_Prob  \
0           0.133316           0.391650             0.377935   
1           0.819024           0.362310             0.377935   
2           0.819024           0.384232             0.376837   
3           0.819024           0.367741             0.377935   
4           0.133316           0.367741             0.376837   

   Parch_Survival_Prob  Fare_Survival_Prob  Embarked_Survival_Prob  
0             0.353735            0.311966                0

In [12]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import accuracy_score

# 1️⃣ 데이터 로드
df = pd.read_csv('titanic1309.csv')

# 2️⃣ 필요한 열 선택
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
target = 'Survived'

# 3️⃣ 전처리: 결측치 채우기
df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

# 4️⃣ 범주형 변수 원-핫 인코딩
encoder = OneHotEncoder(drop='first', sparse=False)
categorical = ['Pclass', 'Sex', 'Embarked']
encoded = encoder.fit_transform(df[categorical])

# 인코딩된 열 이름 다시 생성
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(categorical))
df = pd.concat([df.drop(columns=categorical), encoded_df], axis=1)

# 5️⃣ 표준화 (스케일링)
scaler = StandardScaler()
numerical = ['Age', 'SibSp', 'Parch', 'Fare']
df[numerical] = scaler.fit_transform(df[numerical])

# 6️⃣ 각 Feature별로 개별 모델 생성 & 예측
X = df.drop(columns=target)
y = df[target]

# 피처별 생존 확률 예측 저장
feature_probabilities = {}

for feature in X.columns:
    # 개별 Feature만 사용
    X_feature = X[[feature]]
    
    # 학습/테스트 분리
    X_train, X_test, y_train, y_test = train_test_split(X_feature, y, test_size=0.2, random_state=42)
    
    # 로지스틱 회귀 모델 학습
    model = LogisticRegression()
    model.fit(X_train, y_train)
    
    # 확률 예측
    y_prob = model.predict_proba(X_test)[:, 1]  # 생존 확률 (1의 확률)
    
    # 평균 생존 확률 계산
    mean_prob = y_prob.mean()
    feature_probabilities[feature] = mean_prob

# 결과 출력
print("\n🔎 각 Feature별 평균 생존 확률:")
for feature, prob in feature_probabilities.items():
    print(f"{feature}: {prob:.4f}")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)


TypeError: OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'