In [None]:
import pandas as pd

pd.set_option("display.max_columns", None)
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

plt.rcParams["font.family"] = "Malgun Gothic"
plt.rcParams["axes.unicode_minus"] = False

df = pd.read_csv("./data/dataset.csv")

In [None]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df.iloc[:, 1:].dropna(), test_size=0.2)

In [None]:
from autogluon.tabular import TabularDataset, TabularPredictor

predictor = TabularPredictor(
    label="passorfail", eval_metric="f1_macro", path="./Models"
).fit(train, presets="best")

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

y_pred = predictor.predict(test.iloc[:, :-1])

cm = confusion_matrix(test.iloc[:, -1], y_pred)

# 혼돈 행렬 시각화
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap="Blues")
plt.show()

In [None]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

rfc = RandomForestClassifier()
etc = ExtraTreesClassifier()
xgb = XGBClassifier()
cat = CatBoostClassifier()
lgbm = LGBMClassifier()

rfc.fit(train.iloc[:, :-1], train.iloc[:, -1])
etc.fit(train.iloc[:, :-1], train.iloc[:, -1])
xgb.fit(train.iloc[:, :-1], train.iloc[:, -1])
cat.fit(train.iloc[:, :-1], train.iloc[:, -1])
lgbm.fit(train.iloc[:, :-1], train.iloc[:, -1])

In [None]:
from sklearn.metrics import roc_curve

score1 = predictor.predict_proba(train.iloc[:, :-1]).iloc[:, 1]
score2 = rfc.predict_proba(train.iloc[:, :-1])[:, 1]
score3 = etc.predict_proba(train.iloc[:, :-1])[:, 1]
score4 = xgb.predict_proba(train.iloc[:, :-1])[:, 1]
score5 = cat.predict_proba(train.iloc[:, :-1])[:, 1]
score6 = lgbm.predict_proba(train.iloc[:, :-1])[:, 1]


## False Positive Rate, True Positive Rate, 절단값 리턴
fpr1, tpr1, cut1 = roc_curve(train.iloc[:, -1], score1)
fpr2, tpr2, cut2 = roc_curve(train.iloc[:, -1], score2)
fpr3, tpr3, cut3 = roc_curve(train.iloc[:, -1], score3)
fpr4, tpr4, cut4 = roc_curve(train.iloc[:, -1], score4)
fpr5, tpr5, cut5 = roc_curve(train.iloc[:, -1], score5)
fpr6, tpr6, cut6 = roc_curve(train.iloc[:, -1], score6)

## ROC 곡선 그리기
fig = plt.figure(figsize=(8, 8))
fig.set_facecolor("white")
ax = fig.add_subplot()
ax.plot(fpr1, tpr1, label="AutoGluon")
ax.plot(fpr2, tpr2, label="RandomForest")
ax.plot(fpr3, tpr3, label="ExtraTrees")
ax.plot(fpr4, tpr4, label="XGBoost")
ax.plot(fpr5, tpr5, label="Catboost")
ax.plot(fpr6, tpr6, label="LightGBM")
ax.plot([0, 1], [0, 1], color="red", label="Random Model")
ax.legend()
plt.show()

In [None]:
predictor.feature_importance(train)