In [14]:
import pandas as pd
import numpy as np

np.random.seed(1234)

n_samples = 100
study_hours = np.random.randint(1, 11, size=n_samples)       # 1~10시간 사이
sleep_hours = np.random.randint(4, 10, size=n_samples)        # 4~9시간 사이
exercise = np.random.randint(0, 2, size=n_samples)            # 0 또는 1

passed = (
    (study_hours >= 6).astype(int) +
    (sleep_hours >= 6).astype(int) +
    (exercise == 1).astype(int)
) >= 2  # 세 조건 중 2개 이상 만족 시 합격

passed = passed.astype(int)

df = pd.DataFrame({
    'study_hours': study_hours,
    'sleep_hours': sleep_hours,
    'exercise': exercise,
    'passed': passed
})

df.head()

Unnamed: 0,study_hours,sleep_hours,exercise,passed
0,4,8,1,1
1,7,9,1,1
2,6,5,0,0
3,5,9,0,0
4,9,7,1,1


In [15]:
X = df[['study_hours', 'sleep_hours', 'exercise']]
y = df['passed']

In [16]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [17]:
from sklearn.ensemble import RandomForestClassifier

# 모델 학습
# n_estimators = 랜덤 포레스트를 구성하는 트리의 수
model = RandomForestClassifier(n_estimators=100, random_state=1234)
model.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=1234,
                       verbose=0, warm_start=False)

In [18]:
# 예측
y_pred = model.predict(X_test)
y_pred

array([1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 1])

In [19]:
# 평가
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

print("Accuracy :", accuracy_score(y_test, y_pred))
print("Recall :", recall_score(y_test, y_pred))
print("Precision :", precision_score(y_test, y_pred))

# pos_label=1 => 1이 양성 클래스라고 간주 (default)
print("F1 Score:", f1_score(y_test, y_pred, pos_label=1))

Accuracy : 0.92
Recall : 0.9375
Precision : 0.9375
F1 Score: 0.9375


In [20]:
# 변수 중요도 (Feature Importance)
model.feature_importances_

array([0.38290049, 0.30359123, 0.31350828])

In [23]:
# 각 샘플이 클래스에 속할 확률을 추정
probs = model.predict_proba(X_test)
probs

array([[0.  , 1.  ],
       [1.  , 0.  ],
       [0.69, 0.31],
       [0.02, 0.98],
       [0.98, 0.02],
       [0.01, 0.99],
       [0.  , 1.  ],
       [0.49, 0.51],
       [0.85, 0.15],
       [0.01, 0.99],
       [0.7 , 0.3 ],
       [1.  , 0.  ],
       [0.02, 0.98],
       [0.04, 0.96],
       [1.  , 0.  ],
       [0.  , 1.  ],
       [0.  , 1.  ],
       [0.09, 0.91],
       [0.  , 1.  ],
       [0.05, 0.95],
       [0.98, 0.02],
       [0.  , 1.  ],
       [0.86, 0.14],
       [0.02, 0.98],
       [0.02, 0.98]])