In [10]:
from pytorch_tabular.models import GANDALFConfig
import pandas as pd
from pytorch_tabular import TabularModel
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


In [11]:
riot_data = pd.read_csv("reshape_total_riot.csv")
riot_data = riot_data.drop(columns=['win', 'win.1', 'win.2', 'win.3', 'win.4', 'win.5', 'win.6', 'win.7', 'win.8', 'win.9'])

categorical_columns = [ col for col in riot_data.columns if riot_data[col].dtype == "object"]
numerical_columns = [ col for col in riot_data.columns if riot_data[col].dtype != "object"]


encoder = OrdinalEncoder()
riot_data[categorical_columns] = encoder.fit_transform(riot_data[categorical_columns])

# 수치형 데이터 스케일링
scaler = StandardScaler()
riot_data[numerical_columns] = scaler.fit_transform(riot_data[numerical_columns])

In [12]:
loaded_model = TabularModel.load_model("model.pth")

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.rich_model_summary.RichModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [13]:
# soft_targets = loaded_model.predict(riot_data)
# print(soft_targets)

# 교사 모델의 예측 확률 생성
soft_targets = loaded_model.predict(riot_data)
print(soft_targets)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_encoded[col].fillna(self._imputed, inplace=True)


        0_probability  1_probability  prediction
0            0.000449       0.999551           1
1            0.000534       0.999466           1
2            0.999368       0.000632           0
3            0.000668       0.999332           1
4            0.000869       0.999131           1
...               ...            ...         ...
609148       0.999529       0.000471           0
609149       0.999880       0.000120           0
609150       0.000375       0.999625           1
609151       0.999374       0.000626           0
609152       0.996579       0.003421           0

[609153 rows x 3 columns]


In [19]:
patterns = ['teamId', 'championId','perk','primaryStyle', 'subStyle', 'tier']
selected_columns = [col for col in riot_data.columns if any(pattern in col for pattern in patterns)]
student_data = riot_data[selected_columns]

print(student_data.columns)

Index(['championId', 'tier', 'teamId', 'perk1', 'perk2', 'perk3', 'perk4',
       'primaryStyle', 'perk5', 'perk6',
       ...
       'tier.9', 'teamId.9', 'perk1.9', 'perk2.9', 'perk3.9', 'perk4.9',
       'primaryStyle.9', 'perk5.9', 'perk6.9', 'subStyle.9'],
      dtype='object', length=110)


In [21]:
test_column = student_data.columns

In [15]:

# 소프트 타겟을 DataFrame으로 변환
soft_targets = pd.DataFrame(soft_targets)


student_data = pd.concat([student_data, soft_targets[['1_probability']]], axis=1)

categorical_columns = [ col for col in student_data.columns if student_data[col].dtype == "object"]
numerical_columns = [ col for col in student_data.columns if student_data[col].dtype != "object"]
print(categorical_columns)
print(numerical_columns)

[]
['championId', 'tier', 'teamId', 'perk1', 'perk2', 'perk3', 'perk4', 'primaryStyle', 'perk5', 'perk6', 'subStyle', 'championId.1', 'tier.1', 'teamId.1', 'perk1.1', 'perk2.1', 'perk3.1', 'perk4.1', 'primaryStyle.1', 'perk5.1', 'perk6.1', 'subStyle.1', 'championId.2', 'tier.2', 'teamId.2', 'perk1.2', 'perk2.2', 'perk3.2', 'perk4.2', 'primaryStyle.2', 'perk5.2', 'perk6.2', 'subStyle.2', 'championId.3', 'tier.3', 'teamId.3', 'perk1.3', 'perk2.3', 'perk3.3', 'perk4.3', 'primaryStyle.3', 'perk5.3', 'perk6.3', 'subStyle.3', 'championId.4', 'tier.4', 'teamId.4', 'perk1.4', 'perk2.4', 'perk3.4', 'perk4.4', 'primaryStyle.4', 'perk5.4', 'perk6.4', 'subStyle.4', 'championId.5', 'tier.5', 'teamId.5', 'perk1.5', 'perk2.5', 'perk3.5', 'perk4.5', 'primaryStyle.5', 'perk5.5', 'perk6.5', 'subStyle.5', 'championId.6', 'tier.6', 'teamId.6', 'perk1.6', 'perk2.6', 'perk3.6', 'perk4.6', 'primaryStyle.6', 'perk5.6', 'perk6.6', 'subStyle.6', 'championId.7', 'tier.7', 'teamId.7', 'perk1.7', 'perk2.7', 'perk3

In [None]:
numerical_columns

In [16]:
train_df, valid_df = train_test_split(student_data, test_size=0.2, random_state=42)

print(train_df.head())

        championId  tier  teamId     perk1     perk2     perk3     perk4  \
594831   -0.698368   4.0     0.0  4.623618 -0.771698 -1.240843 -0.622242   
446276   -0.482053   4.0     0.0 -0.498950 -1.075438  1.009154 -1.286218   
539922    3.106999   2.0     0.0  0.621443 -0.186887 -0.528848  1.902329   
73342    -0.504126   0.0     0.0  0.061246 -0.583561 -1.073315  0.340889   
68163    -0.746928   3.0     0.0 -0.533963  1.422477  1.006827  0.793269   

        primaryStyle     perk5     perk6  ...  teamId.9   perk1.9   perk2.9  \
594831     -0.120937  0.144042 -0.089164  ...       0.0 -0.544399 -1.069639   
446276     -0.842969  0.681787  0.604744  ...       0.0  0.053320 -0.575751   
539922      2.045160  0.133701 -0.044556  ...       0.0  4.573568 -0.764658   
73342       0.601095  0.133701  0.123965  ...       0.0  0.613681 -0.036345   
68163      -0.842969  0.795541 -0.396466  ...       0.0 -0.544399 -1.069639   

         perk3.9   perk4.9  primaryStyle.9   perk5.9   perk6.9  subS

In [17]:
# 데이터 설정
data_config = DataConfig(
    target=['1_probability'],             # 타겟 변수
    continuous_cols=numerical_columns,    # 수치형 변수
    categorical_cols=categorical_columns  # 범주형 변수
)

# 모델 설정
model_config = GANDALFConfig(
    task="regression",
    gflu_stages=6,
    gflu_feature_init_sparsity=0.3,
    gflu_dropout=0.0,
    learning_rate=1e-3,
)

# 학습 설정
trainer_config = TrainerConfig(
    auto_lr_find=True,  # Learning rate 자동 탐색
    batch_size=128,
    max_epochs=100
)
# 모델 초기화 및 학습
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=OptimizerConfig(),
    trainer_config=trainer_config,
    verbose=True
)


In [18]:
tabular_model.fit(train=train_df, validation=valid_df)

tabular_model.save_model("student_model.pth")  # 학습된 모델 저장

Seed set to 42


 -0.99978149]' has dtype incompatible with float32, please explicitly cast to a compatible dtype first.
  data.loc[:, self.config.continuous_cols] = self.scaler.fit_transform(
 -1.04306855]' has dtype incompatible with float32, please explicitly cast to a compatible dtype first.
  data.loc[:, self.config.continuous_cols] = self.scaler.transform(data.loc[:, self.config.continuous_cols])


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


c:\Users\a\AppData\Local\pypoetry\Cache\virtualenvs\tabulr_transformers-5PYuSfOC-py3.11\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory D:\silla\tab_tf\saved_models exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\a\AppData\Local\pypoetry\Cache\virtualenvs\tabulr_transformers-5PYuSfOC-py3.11\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\a\AppData\Local\pypoetry\Cache\virtualenvs\tabulr_transformers-5PYuSfOC-py3.11\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve per

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

Trainer was signaled to stop but the required `min_epochs=5` or `min_steps=None` has not been met. Training will continue...
`Trainer.fit` stopped: `max_steps=100` reached.
Learning rate set to 0.01445439770745928
Restoring states from the checkpoint path at d:\silla\tab_tf\.lr_find_4b5771fe-5079-42b7-98cd-4bf31ebab48e.ckpt
Restored all states from the checkpoint at d:\silla\tab_tf\.lr_find_4b5771fe-5079-42b7-98cd-4bf31ebab48e.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()