In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from pytorch_tabular import TabularModel
from pytorch_tabular.models import GANDALFConfig
from pytorch_tabular.config import (
    DataConfig,
    OptimizerConfig,
    TrainerConfig,
)
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split


In [2]:
data = pd.read_csv("reshape_total_riot.csv")
df = pd.DataFrame(data)
df.head()

Unnamed: 0,championName,tier,position,teamId,win,kills,deaths,assists,totalDamageDealtToChampions,visionScore,...,pentaKills.9,neutralMinionsKilled.9,magicDamageDealt.9,magicDamageDealtToChampions.9,magicDamageTaken.9,wardsPlaced.9,gameMode.9,gameVersion.9,gameDuration.9,gameId.9
0,Malzahar,GOLD,MIDDLE,100,True,5,3,3,20429,19,...,0,136,993,892,8459,1,CLASSIC,13.6.499.7758,1415,6432690956
1,Morgana,GOLD,JUNGLE,100,True,5,7,20,29274,88,...,0,8,171895,45746,34326,6,CLASSIC,13.6.499.7758,2288,6432696459
2,Yasuo,BRONZE,BOTTOM,100,True,11,16,14,41681,12,...,0,11,185530,32666,13844,11,CLASSIC,13.6.499.7758,2267,6432704556
3,Kaisa,GOLD,BOTTOM,100,True,11,5,6,36301,34,...,0,24,153672,24773,8447,13,CLASSIC,13.6.499.7758,2320,6432704874
4,Warwick,SILVER,TOP,100,False,7,8,2,12299,9,...,0,0,114107,20033,10507,5,CLASSIC,13.6.499.7758,1489,6432709941


In [3]:
# 범주형 및 수치형 데이터 처리
df['target'] = df['win'].apply(lambda x: 1 if x == True else 0)
df = df.drop(columns=['win', 'win.1', 'win.2', 'win.3', 'win.4', 'win.5', 'win.6', 'win.7', 'win.8', 'win.9'])
df = df.drop(columns=['kda', 'kda.1', 'kda.2', 'kda.3', 'kda.4', 'kda.5', 'kda.6', 'kda.7', 'kda.8', 'kda.9'])

categorical_columns = [ col for col in df.columns if df[col].dtype == "object"]
numerical_columns = [ col for col in df.columns if df[col].dtype != "object"]

numerical_columns.remove('target')

encoder = OrdinalEncoder()
df[categorical_columns] = encoder.fit_transform(df[categorical_columns])

# 수치형 데이터 스케일링
scaler = StandardScaler()
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])



train_df, valid_df = train_test_split(df, test_size=0.2, random_state=42)
print(train_df.head())
# print(valid_df.head())


        championName  tier  position  teamId     kills    deaths   assists  \
187981          33.0   1.0       3.0     0.0  1.734062  0.371929 -0.232525   
67977          148.0   0.0       2.0     0.0 -0.592623 -0.228340 -0.961786   
64272          127.0   0.0       2.0     0.0  0.464961  0.672063  0.679051   
262999          68.0   1.0       2.0     0.0 -0.381106 -0.828610 -0.779471   
120108          72.0   3.0       0.0     0.0  0.041927  1.872602 -0.597155   

        totalDamageDealtToChampions  visionScore  inhibitorKills  ...  \
187981                     1.446699     0.224572       -0.359311  ...   
67977                     -1.131530    -0.708928       -0.359311  ...   
64272                      0.542272     0.224572       -0.359311  ...   
262999                    -0.847232    -0.475553       -0.359311  ...   
120108                     0.279939    -0.522228       -0.359311  ...   

        neutralMinionsKilled.9  magicDamageDealt.9  \
187981               -0.401625        

In [4]:
train_df['target']

187981    1
67977     0
64272     0
262999    1
120108    1
         ..
119879    0
259178    1
131932    0
146867    0
121958    1
Name: target, Length: 239448, dtype: int64

In [5]:
# 데이터 설정
data_config = DataConfig(
    target=['target'],             # 타겟 변수
    continuous_cols=numerical_columns,    # 수치형 변수
    categorical_cols=categorical_columns  # 범주형 변수
)

# 모델 설정
model_config = GANDALFConfig(
    task="classification",
    gflu_stages=6,
    gflu_feature_init_sparsity=0.3,
    gflu_dropout=0.0,
    learning_rate=1e-3,
)

# 학습 설정
trainer_config = TrainerConfig(
    auto_lr_find=True,  # Learning rate 자동 탐색
    batch_size=128,
    max_epochs=100,
    min_epochs=5,
)

# 모델 초기화 및 학습
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=OptimizerConfig(),
    trainer_config=trainer_config,
    verbose=True
)


TypeError: GANDALFConfig.__init__() got an unexpected keyword argument 'layers'

: 

In [None]:
missing_count = df.isnull().sum()
missing_count

championName                     0
tier                             0
position                         0
teamId                           0
kills                            0
deaths                           0
assists                          0
totalDamageDealtToChampions      0
visionScore                      0
inhibitorKills                   0
baronKills                       0
dragonKills                      0
goldEarned                       0
totalMinionsKilled               0
consumablesPurchased             0
champLevel                       0
goldSpent                        0
damageDealtToBuildings           0
damageDealtToObjectives          0
damageDealtToTurrets             0
damageSelfMitigated              0
itemsPurchased                   0
killingSprees                    0
largestCriticalStrike            0
largestKillingSpree              0
largestMultiKill                 0
tripleKills                      0
wardsKilled                      0
unrealKills         

In [None]:
tabular_model.fit(train=train_df, validation=valid_df)

tabular_model.save_model("model.pth")  # 학습된 모델 저장

Seed set to 42


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_encoded[col].fillna(self._imputed, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_encoded[col].fillna(self._imputed, inplace=True)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


c:\Users\a\AppData\Local\pypoetry\Cache\virtualenvs\tabulr_transformers-5PYuSfOC-py3.11\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:639: Checkpoint directory D:\silla\tab_tf\saved_models exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\a\AppData\Local\pypoetry\Cache\virtualenvs\tabulr_transformers-5PYuSfOC-py3.11\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\a\AppData\Local\pypoetry\Cache\virtualenvs\tabulr_transformers-5PYuSfOC-py3.11\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve per

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

Trainer was signaled to stop but the required `min_epochs=10` or `min_steps=None` has not been met. Training will continue...
`Trainer.fit` stopped: `max_steps=100` reached.
Learning rate set to 0.0002089296130854041
Restoring states from the checkpoint path at d:\silla\tab_tf\.lr_find_130e0847-4173-4923-a98f-db94a7abef05.ckpt
Restored all states from the checkpoint at d:\silla\tab_tf\.lr_find_130e0847-4173-4923-a98f-db94a7abef05.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()