In [None]:
!pip install ace_tools_open


In [None]:
!pip list



Package                               Version
------------------------------------- -------------------
absl-py                               1.4.0
absolufy-imports                      0.3.1
accelerate                            1.10.1
ace_tools_open                        0.1.0
aiofiles                              24.1.0
aiohappyeyeballs                      2.6.1
aiohttp                               3.13.0
aiosignal                             1.4.0
alabaster                             1.0.0
albucore                              0.0.24
albumentations                        2.0.8
ale-py                                0.11.2
alembic                               1.16.5
altair                                5.5.0
annotated-types                       0.7.0
antlr4-python3-runtime                4.9.3
anyio                                 4.11.0
anywidget                             0.9.18
argon2-cffi                           25.1.0
argon2-cffi-bindings                  25.1.0
array_

In [6]:
!pip install pytorch_tabnet==4.1.0



In [None]:
!pip install tabpfn==2.2.1

In [None]:
!pip install torch==2.5.1

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score
from itertools import product
from pytorch_tabnet.tab_model import TabNetClassifier
from pytorch_tabnet.pretraining import TabNetPretrainer
import torch
from sklearn.isotonic import IsotonicRegression


#위에 있는 ace_tools_open, pytorch_tabnet, torch 설치 후 런타임 유형에서 python3, 하드웨어 가속기 cpu, 런타임 버전 2025.10으로 선택


# 데이터 불러오기
data = pd.read_csv("/content/drive/MyDrive/train_val_t.csv")
test = pd.read_csv("/content/drive/MyDrive/test_t.csv")
weight = pd.read_csv("/content/drive/MyDrive/optimized_weights.csv") # 이 파일은 Epl 폴더 안에 있음




features =  ['away_prob_5', 'HTAG_5', 'B365H', 'PSH', 'HSRA', 'ASRA']

target = 'result'

X = data[features].values # train data
y = data[target].replace({'home':0,'away':1,'draw':2}).values


X_t = test[features].values #test data
y_t = test[target].replace({'home':0,'away':1,'draw':2}).values

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
# TabNet 하이퍼파라미터 후보
n_d_list = [2]
n_a_list = [4]
n_steps_list = [4]
gamma_list = [0.9]
lr_list = [0.04]
batch_size_list = [32]
w0, w1, w2 = weight.iloc[0,1], weight.iloc[0,2], weight.iloc[0,3]
print(w0,w1,w2)

results = []

# 가능한 모든 조합
param_combinations = list(product(n_d_list, n_a_list, n_steps_list, gamma_list, lr_list, batch_size_list))

for n_d, n_a, n_steps, gamma, lr, batch_size in param_combinations:
    try:
        #pretrain
        pretrainer = TabNetPretrainer(
            n_d=n_d,
            n_a=n_a,
            n_steps=n_steps,
            gamma=gamma,
            mask_type='sparsemax',
            optimizer_params={'lr': lr},
            verbose=0,
            seed=42
        )

        pretrainer.fit(
            X_train=X,
            eval_set=[X],
            max_epochs=100,
            batch_size=batch_size,
            virtual_batch_size=16,
            patience=10,
            num_workers=0,
            drop_last=False
        )

        acc_scores = []
        bal_acc_scores = []
        f1_scores = []
        fold_num=0
        for train_idx, test_idx in kf.split(X, y):
            X_train, X_test = X[train_idx], X[test_idx]
            y_train, y_test = y[train_idx], y[test_idx]

            model = TabNetClassifier(
              n_d=n_d,
              n_a=n_a,
              n_steps=n_steps,
              gamma=gamma,
              #optimizer_fn=None,
              optimizer_params={"lr": lr},
              mask_type='sparsemax',
              # scheduler_params={"step_size":20, "gamma":0.9},
              # scheduler_fn=torch.optim.lr_scheduler.StepLR,
              verbose=0,
              seed=42
            )


            # 학습
            model.fit(
                X_train, y_train,
                eval_set=[(X_test, y_test)],
                eval_name=['valid'],
                eval_metric=['accuracy'],
                max_epochs=100,
                patience=10,
                batch_size=batch_size,
                virtual_batch_size=16,
                num_workers=0,
                drop_last=False,
                from_unsupervised=pretrainer #pretrain load
            )

            # 예측
            model.network.eval()
            y_pred = model.predict(X_test)


            proba=model.predict_proba(X_test)
            adjusted_proba = np.array([
                proba[:, 0] * w0,
                proba[:, 1] * w1,
                proba[:, 2] * w2
            ]).T # (N, 3) 형태로 변환

            # 가장 높은 조정된 확률을 가진 클래스를 예측값으로 선택
            y_pred_adjusted = np.argmax(adjusted_proba, axis=1)

            acc_scores.append(accuracy_score(y_test, y_pred_adjusted))
            bal_acc_scores.append(balanced_accuracy_score(y_test, y_pred_adjusted))
            f1_scores.append(f1_score(y_test, y_pred_adjusted, average='weighted'))



        # 교차검증 평균
        avg_acc = np.mean(acc_scores)
        avg_bal = np.mean(bal_acc_scores)
        avg_f1 = np.mean(f1_scores)

        model_t = TabNetClassifier(
              n_d=n_d,
              n_a=n_a,
              n_steps=n_steps,
              gamma=gamma,
              #optimizer_fn=None,
              optimizer_params={"lr": lr},
              mask_type='sparsemax',
              # scheduler_params={"step_size":20, "gamma":0.9},
              # scheduler_fn=torch.optim.lr_scheduler.StepLR,
              verbose=0,
              seed=42
        )


        # 전체 데이터 학습
        model_t.fit(
            X, y,
            max_epochs=100,
            patience=10,
            batch_size=batch_size,
            virtual_batch_size=16,
            num_workers=0,
            drop_last=False,
            from_unsupervised=pretrainer  #pretrain load
        )

        # 테스트 예측
        model_t.network.eval()
        y_pred_t = model_t.predict(X_t)
        # print(y_pred_t)

        proba=model_t.predict_proba(X_t)
        # print(proba)


        adjusted_proba = np.array([
                proba[:, 0] * w0,
                proba[:, 1] * w1,
                proba[:, 2] * w2
            ]).T # (N, 3) 형태로 변환

        # 가장 높은 조정된 확률을 가진 클래스를 예측값으로 선택
        y_pred_t_adjusted = np.argmax(adjusted_proba, axis=1)


        acc_t = accuracy_score(y_t, y_pred_t_adjusted)
        bal_t = balanced_accuracy_score(y_t, y_pred_t_adjusted)
        f1_t = f1_score(y_t, y_pred_t_adjusted, average='weighted')


        results.append((n_d, n_a, n_steps, gamma, lr, batch_size, avg_acc, avg_bal, avg_f1, acc_t, bal_t, f1_t))


    except Exception as e:
        print(f"Error with params {n_d, n_a, n_steps, gamma, lr, batch_size} due to {e}")

# 결과 데이터프레임
results_df = pd.DataFrame(
    results,
    columns=['n_d','n_a','n_steps','gamma','lr','batch_size',
             'avg_acc','avg_bal','avg_f1','acc_t','bal_t','f1_t']
)


# 정렬
results_df = results_df.sort_values(by='avg_acc', ascending=False)

# 출력
import ace_tools_open as tools
tools.display_dataframe_to_user(name='TabNet Hyperparameter Tuning Results', dataframe=results_df)


# model_t.save_model("/content/drive/MyDrive/tabnet_5815")

# weights_df = pd.DataFrame({
#     'w0_home': [w0],
#     'w1_away': [w1],
#     'w2_draw': [w2]
# })

# weights_df.to_csv("/content/drive/MyDrive/optimized_weights.csv")



  y = data[target].replace({'home':0,'away':1,'draw':2}).values
  y_t = test[target].replace({'home':0,'away':1,'draw':2}).values


1.7857142857142856 1.816326530612245 1.816326530612245

Early stopping occurred at epoch 16 with best_epoch = 6 and best_val_0_unsup_loss_numpy = 1.171280026435852





Early stopping occurred at epoch 18 with best_epoch = 8 and best_valid_accuracy = 0.54441





Early stopping occurred at epoch 23 with best_epoch = 13 and best_valid_accuracy = 0.58059





Early stopping occurred at epoch 35 with best_epoch = 25 and best_valid_accuracy = 0.55592





Early stopping occurred at epoch 24 with best_epoch = 14 and best_valid_accuracy = 0.55757





Early stopping occurred at epoch 30 with best_epoch = 20 and best_valid_accuracy = 0.56743




TabNet Hyperparameter Tuning Results


0
Loading ITables v2.5.2 from the internet...  (need help?)
