In [None]:
# 대학입시 합격 예측 딥러닝 실습
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical
import tensorflow as tf

In [None]:
# 1. 데이터 불러오기
url = 'university_admission.csv'

df = pd.read_csv(url)
print(df.head())

In [None]:
# 2. 입력(X)과 타겟(y) 설정
X = df[['Exam_Score', 'School_Grade', 'Absent_Days', 'University_Level']].values
y = df['Pass'].values

In [None]:
# 3. 데이터 분리 및 전처리
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# 4. 딥러닝 모델 구성 (예시)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train_scaled, y_train, epochs=20, batch_size=16, validation_split=0.2)

In [None]:
# 5. 예측 및 평가
y_pred_prob = model.predict(X_test_scaled).flatten()
y_pred = (y_pred_prob >= 0.5).astype(int)

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# 평가 지표 계산
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)

In [None]:
# 지표 시각화
plt.figure(figsize=(12,8))

# 1.Confusion Matrix
plt.subplot(2,2,1)
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')

# 2.ROC Curve
plt.subplot(2,2,2)
plt.plot(fpr, tpr, label=f'AUC = {roc_auc:.2f}')
plt.plot([0,1], [0,1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()

# 3.Metrics Text
plt.subplot(2,1,2)
plt.axis('off')
plt.text(0.1, 0.5, f'Accuracy  : {acc:.2f}\nPrecision : {prec:.2f}\nRecall    : {rec:.2f}\nF1-Score  : {f1:.2f}\nROC-AUC   : {roc_auc:.2f}', fontsize=14, va='center')

plt.tight_layout()
plt.show()

# 💡 지표 설명 주석 추가
# - Accuracy  : 전체 예측 중 정답 비율
# - Precision : '합격'이라고 예측한 것 중 실제 합격인 비율 (정밀도)
# - Recall    : 실제 합격자 중에서 모델이 잘 맞춘 비율 (재현율)
# - F1-Score  : Precision과 Recall의 균형 평가
# - ROC-AUC   : 다양한 기준에서 모델의 종합 분류 성능 (1에 가까울수록 좋음)