In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from pytorch_tabular import TabularModel
from pytorch_tabular.models import GANDALFConfig
from pytorch_tabular.config import (
    DataConfig,
    OptimizerConfig,
    TrainerConfig,
)
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split


In [2]:
data = pd.read_csv("reshape_total_riot.csv")
df = pd.DataFrame(data)
df.head()

Unnamed: 0,championId,tier,position,teamId,win,kills,deaths,assists,totalDamageDealtToChampions,visionScore,...,perk3.9,perk4.9,primaryStyle.9,perk5.9,perk6.9,subStyle.9,gameMode.9,gameVersion.9,gameDuration.9,gameId.9
0,35,SILVER,UTILITY,100,True,8,4,16,21868,57,...,8210,8237,8200,8347,8321,8300,CLASSIC,13.6.499.7758,1813,6432686803
1,21,SILVER,UTILITY,100,True,3,8,20,21252,94,...,8321,8347,8300,8138,8135,8100,CLASSIC,13.6.499.7758,1988,6432688742
2,64,SILVER,JUNGLE,100,False,9,5,1,11452,23,...,8210,8237,8200,8473,8453,8400,CLASSIC,13.6.499.7758,1468,6432689755
3,67,GOLD,TOP,100,True,5,2,3,16028,22,...,8138,8106,8100,8446,8473,8400,CLASSIC,13.6.499.7758,1415,6432690956
4,106,SILVER,JUNGLE,100,True,9,4,19,18319,34,...,8210,8237,8200,8345,8347,8300,CLASSIC,13.6.499.7758,2157,6432690999


In [3]:
# 범주형 및 수치형 데이터 처리
df['target'] = df['win'].apply(lambda x: 1 if x == True else 0)
df = df.drop(columns=['win', 'win.1', 'win.2', 'win.3', 'win.4', 'win.5', 'win.6', 'win.7', 'win.8', 'win.9'])

categorical_columns = [ col for col in df.columns if df[col].dtype == "object"]
numerical_columns = [ col for col in df.columns if df[col].dtype != "object"]

numerical_columns.remove('target')

encoder = OrdinalEncoder()
df[categorical_columns] = encoder.fit_transform(df[categorical_columns])

# 수치형 데이터 스케일링
scaler = StandardScaler()
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])



train_df, valid_df = train_test_split(df, test_size=0.2, random_state=42)
print(train_df.head())
# print(valid_df.head())


        championId  tier  position  teamId     kills    deaths   assists  \
594831   -0.698368   4.0       2.0     0.0 -0.182268 -0.539462 -0.251538   
446276   -0.482053   4.0       3.0     0.0  2.586362 -0.238031  0.112831   
539922    3.106999   2.0       4.0     0.0  0.030703 -0.539462 -0.798090   
73342    -0.504126   0.0       3.0     0.0 -0.395240 -0.238031  0.659383   
68163    -0.746928   3.0       4.0     0.0  0.456646 -0.539462 -0.251538   

        totalDamageDealtToChampions  visionScore  inhibitorKills  ...  \
594831                    -0.371710     0.427332         -0.3658  ...   
446276                     1.619904    -0.728404         -0.3658  ...   
539922                    -0.548631    -0.913322         -0.3658  ...   
73342                      0.475364    -0.127421         -0.3658  ...   
68163                      0.399051    -0.312339         -0.3658  ...   

         perk4.9  primaryStyle.9   perk5.9   perk6.9  subStyle.9  gameMode.9  \
594831 -1.285516       -

In [4]:
train_df['target']

594831    0
446276    1
539922    1
73342     0
68163     1
         ..
110268    0
259178    0
365838    1
131932    0
121958    0
Name: target, Length: 487322, dtype: int64

In [5]:
# 데이터 설정
data_config = DataConfig(
    target=['target'],             # 타겟 변수
    continuous_cols=numerical_columns,    # 수치형 변수
    categorical_cols=categorical_columns  # 범주형 변수
)

# 모델 설정
model_config = GANDALFConfig(
    task="classification",
    gflu_stages=6,
    gflu_feature_init_sparsity=0.3,
    gflu_dropout=0.0,
    learning_rate=1e-3,
)

# 학습 설정
trainer_config = TrainerConfig(
    auto_lr_find=True,  # Learning rate 자동 탐색
    batch_size=128,
    max_epochs=100,
    min_epochs=5,
)

# 모델 초기화 및 학습
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=OptimizerConfig(),
    trainer_config=trainer_config,
    verbose=True
)


In [6]:
missing_count = df.isnull().sum()
missing_count

championId        0
tier              0
position          0
teamId            0
kills             0
                 ..
gameMode.9        0
gameVersion.9     0
gameDuration.9    0
gameId.9          0
target            0
Length: 591, dtype: int64

In [7]:
tabular_model.fit(train=train_df, validation=valid_df)

tabular_model.save_model("model.pth")  # 학습된 모델 저장

Seed set to 42


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_encoded[col].fillna(self._imputed, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_encoded[col].fillna(self._imputed, inplace=True)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


c:\Users\a\AppData\Local\pypoetry\Cache\virtualenvs\tabulr_transformers-5PYuSfOC-py3.11\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory D:\silla\tab_tf\saved_models exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\a\AppData\Local\pypoetry\Cache\virtualenvs\tabulr_transformers-5PYuSfOC-py3.11\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\a\AppData\Local\pypoetry\Cache\virtualenvs\tabulr_transformers-5PYuSfOC-py3.11\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve per

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

Trainer was signaled to stop but the required `min_epochs=5` or `min_steps=None` has not been met. Training will continue...
`Trainer.fit` stopped: `max_steps=100` reached.
Learning rate set to 0.0003019951720402019
Restoring states from the checkpoint path at d:\silla\tab_tf\.lr_find_128aa016-9b5c-489b-9ec1-0f9d134a76fe.ckpt
Restored all states from the checkpoint at d:\silla\tab_tf\.lr_find_128aa016-9b5c-489b-9ec1-0f9d134a76fe.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

: 