In [1]:
import pandas as pd
import numpy as np

np.random.seed(1234)

hours_studied = np.random.uniform(0, 10, 100)
hours_slept = np.random.uniform(3, 9, 100)

linear_combination = 0.8 * hours_studied - 0.5 * np.abs(hours_slept - 6) + np.random.normal(0, 0.5, 100)

prob_passed = 1 / (1 + np.exp(-linear_combination))
passed = (prob_passed > 0.5).astype(int)

df = pd.DataFrame({
    'hours_studied': hours_studied,
    'hours_slept': hours_slept,
    'passed': passed
})

df.head()

Unnamed: 0,hours_studied,hours_slept,passed
0,1.915195,7.6027,1
1,6.221088,7.248692,1
2,4.377277,7.781203,1
3,7.853586,6.346565,1
4,7.799758,8.795019,1


In [2]:
from sklearn.model_selection import train_test_split

X = df[['hours_studied', 'hours_slept']]
y = df['passed']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

In [4]:
# 학습
from sklearn.naive_bayes import BernoulliNB

model = BernoulliNB()
model.fit(X_train, y_train)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)

In [5]:
# 예측
y_pred = model.predict(X_test)
y_pred

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [6]:
# 평가
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

print("Accuracy :", accuracy_score(y_test, y_pred))
print("Recall :", recall_score(y_test, y_pred))
print("Precision :", precision_score(y_test, y_pred))

# pos_label=1 => 1이 양성 클래스라고 간주 (default)
print("F1 Score:", f1_score(y_test, y_pred, pos_label=1))

Accuracy : 0.7
Recall : 1.0
Precision : 0.7
F1 Score: 0.8235294117647058
