In [6]:
# pip install openpyxl

In [28]:
# 데이터 처리
import pandas as pd
import numpy as np

# 시각화
import matplotlib.pyplot as plt
import seaborn as sns

# 모델 및 평가
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc
from sklearn.inspection import permutation_importance

# 부스터 모델
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

롤(게임) 유저들의 게임 데이터를 기반으로 승,패를 예측하는 모델 개발

1. 데이터 불러오기
2. 데이터 전처리
   - NULL 값 처리
   - 더미변수
3. 머신러닝 : 로지스틱회귀분석, 랜덤포레스트, decisiontree, xgboost, lightgbm
4. 성능평가 : AUC,ROC

# 데이터 불러오기

In [60]:
df = pd.read_excel('league_data.xlsx')

In [61]:
# 데이터 확인
df.head(5)

Unnamed: 0,game_id,game_start_utc,game_duration,game_mode,game_type,game_version,map_id,platform_id,queue_id,participant_id,...,final_magicPen,final_magicPenPercent,final_magicResist,final_movementSpeed,final_omnivamp,final_physicalVamp,final_power,final_powerMax,final_powerRegen,final_spellVamp
0,3727443167,2025-01-15 14:56:00,1714,CLASSIC,MATCHED_GAME,15.1.649.4112,11,EUN1,420,5,...,0,0,48,385,0,0,799,1134,147,0
1,3726377460,2025-01-13 10:50:00,1300,CLASSIC,MATCHED_GAME,15.1.648.3927,11,EUN1,420,5,...,0,0,38,390,0,0,970,970,105,0
2,3729643655,2025-01-19 18:15:00,2019,CLASSIC,MATCHED_GAME,15.1.649.4112,11,EUN1,420,2,...,0,0,121,431,0,0,10000,10000,0,0
3,3729915593,2025-01-20 01:27:00,1625,CLASSIC,MATCHED_GAME,15.1.649.4112,11,EUN1,420,8,...,12,0,47,380,0,0,1122,1596,37,0
4,3729901665,2025-01-20 00:40:00,1542,CLASSIC,MATCHED_GAME,15.1.649.4112,11,EUN1,420,10,...,0,0,40,534,0,0,1025,1025,109,0


In [62]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40410 entries, 0 to 40409
Data columns (total 94 columns):
 #   Column                                 Non-Null Count  Dtype         
---  ------                                 --------------  -----         
 0   game_id                                40410 non-null  int64         
 1   game_start_utc                         40410 non-null  datetime64[ns]
 2   game_duration                          40410 non-null  int64         
 3   game_mode                              40410 non-null  object        
 4   game_type                              40410 non-null  object        
 5   game_version                           40410 non-null  object        
 6   map_id                                 40410 non-null  int64         
 7   platform_id                            40410 non-null  object        
 8   queue_id                               40410 non-null  int64         
 9   participant_id                         40410 non-null  int64 

In [63]:
df['game_mode'].value_counts()

game_mode
CLASSIC      29400
ARAM          9730
SWIFTPLAY     1280
Name: count, dtype: int64

In [64]:
# game_mode가 'CLASSIC'인 게임만 필터링 (소환사의 협곡 게임만 분석)
df = df[df['game_mode'] == 'CLASSIC']

# 불필요한 컬럼 제거

In [65]:
# 데이터의 행,열의 개수
df.shape

(29400, 94)

In [66]:
df.columns

Index(['game_id', 'game_start_utc', 'game_duration', 'game_mode', 'game_type',
       'game_version', 'map_id', 'platform_id', 'queue_id', 'participant_id',
       'puuid', 'summoner_name', 'summoner_id', 'summoner_level',
       'champion_id', 'champion_name', 'team_id', 'win', 'individual_position',
       'team_position', 'lane', 'role', 'kills', 'deaths', 'assists',
       'baron_kills', 'dragon_kills', 'gold_earned', 'gold_spent',
       'total_damage_dealt', 'total_damage_dealt_to_champions',
       'physical_damage_dealt_to_champions', 'magic_damage_dealt_to_champions',
       'true_damage_dealt_to_champions', 'damage_dealt_to_objectives',
       'damage_dealt_to_turrets', 'total_damage_taken',
       'physical_damage_taken', 'magic_damage_taken', 'true_damage_taken',
       'time_ccing_others', 'vision_score', 'wards_placed', 'wards_killed',
       'vision_wards_bought_in_game', 'item0', 'item1', 'item2', 'item3',
       'item4', 'item5', 'item6', 'solo_tier', 'solo_rank', 'sol

In [67]:
df.sample(5)

Unnamed: 0,game_id,game_start_utc,game_duration,game_mode,game_type,game_version,map_id,platform_id,queue_id,participant_id,...,final_magicPen,final_magicPenPercent,final_magicResist,final_movementSpeed,final_omnivamp,final_physicalVamp,final_power,final_powerMax,final_powerRegen,final_spellVamp
27377,3722827419,2025-01-06 20:25:00,1819,CLASSIC,MATCHED_GAME,14.24.644.2327,11,EUN1,400,3,...,12,0,65,400,0,0,1008,1246,35,0
9041,3729192364,2025-01-18 21:33:00,1740,CLASSIC,MATCHED_GAME,15.1.649.4112,11,EUN1,420,9,...,0,0,56,384,0,0,1838,1838,270,0
2621,3727963925,2025-01-16 16:57:00,1352,CLASSIC,MATCHED_GAME,15.1.649.4112,11,EUN1,420,4,...,0,0,44,521,0,0,738,738,33,0
27801,3720571237,2025-01-02 21:36:00,1476,CLASSIC,MATCHED_GAME,14.24.644.2327,11,EUN1,420,1,...,0,0,50,409,0,0,535,656,24,0
11885,3728059126,2025-01-16 19:30:00,1603,CLASSIC,MATCHED_GAME,15.1.649.4112,11,EUN1,420,8,...,44,0,64,423,0,0,200,200,100,0


In [69]:
# 불필요한 기본 정보 컬럼 제거
df_drop = df.drop([
    'game_start_utc', 'game_type', 'game_version', 'map_id', 'platform_id','summoner_name',
    'participant_id', 'puuid', 'summoner_name', 'summoner_id', 'summoner_level',
    'champion_id', 'champion_name', 'individual_position', 'lane', 'role',
    'item0', 'item1', 'item2', 'item3', 'item4', 'item5', 'item6',
    'champion_mastery_lastPlayTime', 'champion_mastery_lastPlayTime_utc',
    'champion_mastery_pointsSinceLastLevel', 'champion_mastery_pointsUntilNextLevel',
    'champion_mastery_tokensEarned', 'final_abilityHaste',
    'flex_tier', 'flex_rank', 'flex_lp', 'flex_wins', 'flex_losses','game_mode', 'solo_lp'
], axis='columns')

# 결측치 처리

- team_position 결측치 행 삭제
- solo에 대한 정보가 결측치가 있는 이유 : 언랭 티어 존재.
    - solo_tier 결측치 -> Unranked로 대체
    - solo_rank 결측치 -> 1로 대체 (추후, solo_tier와 합칠 예정)
    - solo_wins, solo_losses 결측치 -> 0으로 대체
- champion_mastery_level, champion_mastery_points -> 평균값 대체


In [72]:
# 결측치 확인
df_drop.isnull().sum()[df.isnull().sum() > 0]

team_position                16
solo_tier                  4621
solo_rank                  4621
solo_wins                  4621
solo_losses                4621
champion_mastery_level        8
champion_mastery_points       8
dtype: int64

In [None]:
# 