In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import tensorflow as tf
import numpy as np

# Import our input dataset
champion_df = pd.read_csv("https://raw.githubusercontent.com/frlinh/game-analysis/main/Resources/csv/championPointsStats.csv")
champion_df.head()

Unnamed: 0,championId,championname,role1,role2,championLevel,attack,defense,magic,difficulty,hp,...,armor,spellblock,attackrange,hpregen,mpregen,crit,attackdamage,attackspeed,summoners_playing,total_championpoints
0,157,Yasuo,Fighter,Assassin,7,8.0,4.0,4.0,10.0,490.0,...,30.0,32.0,175.0,6.5,0.0,0.0,60.0,0.697,1146,282052647
1,81,Ezreal,Marksman,Mage,7,7.0,2.0,6.0,7.0,530.0,...,22.0,30.0,550.0,4.0,8.5,0.0,60.0,0.625,1262,206773062
2,64,LeeSin,Fighter,Assassin,7,8.0,5.0,3.0,6.0,575.0,...,33.0,32.1,125.0,7.5,50.0,0.0,70.0,0.651,1028,197462209
3,67,Vayne,Marksman,Assassin,7,10.0,1.0,1.0,8.0,515.0,...,23.0,30.0,550.0,3.5,6.972,0.0,60.0,0.658,1087,196539470
4,202,Jhin,Marksman,Mage,7,10.0,2.0,6.0,6.0,585.0,...,24.0,30.0,550.0,3.75,6.0,0.0,59.0,0.625,1075,193552573


In [2]:
# Drop all null values
cleaned_champion_df = champion_df.dropna()
cleaned_champion_df

Unnamed: 0,championId,championname,role1,role2,championLevel,attack,defense,magic,difficulty,hp,...,armor,spellblock,attackrange,hpregen,mpregen,crit,attackdamage,attackspeed,summoners_playing,total_championpoints
0,157,Yasuo,Fighter,Assassin,7,8.0,4.0,4.0,10.0,490.0,...,30.0,32.0,175.0,6.50,0.000,0.0,60.0,0.697,1146,282052647
1,81,Ezreal,Marksman,Mage,7,7.0,2.0,6.0,7.0,530.0,...,22.0,30.0,550.0,4.00,8.500,0.0,60.0,0.625,1262,206773062
2,64,LeeSin,Fighter,Assassin,7,8.0,5.0,3.0,6.0,575.0,...,33.0,32.1,125.0,7.50,50.000,0.0,70.0,0.651,1028,197462209
3,67,Vayne,Marksman,Assassin,7,10.0,1.0,1.0,8.0,515.0,...,23.0,30.0,550.0,3.50,6.972,0.0,60.0,0.658,1087,196539470
4,202,Jhin,Marksman,Mage,7,10.0,2.0,6.0,6.0,585.0,...,24.0,30.0,550.0,3.75,6.000,0.0,59.0,0.625,1075,193552573
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1117,104,Graves,Marksman,Marksman,1,8.0,5.0,3.0,3.0,555.0,...,33.0,32.0,425.0,8.00,8.000,0.0,68.0,0.475,481,403126
1118,145,Kaisa,Marksman,Marksman,1,8.0,5.0,3.0,6.0,600.0,...,28.0,30.0,525.0,3.50,8.200,0.0,59.0,0.644,481,385318
1119,157,Yasuo,Fighter,Assassin,1,8.0,4.0,4.0,10.0,490.0,...,30.0,32.0,175.0,6.50,0.000,0.0,60.0,0.697,486,374759
1120,236,Lucian,Marksman,Marksman,1,8.0,5.0,3.0,6.0,571.0,...,28.0,30.0,500.0,3.75,8.176,0.0,64.0,0.638,445,369465


In [4]:
# Drop columns
cleaned_champion_df = cleaned_champion_df.drop(columns=['championId', 'crit', 'summoners_playing', 'total_championpoints'])
cleaned_champion_df

Unnamed: 0,championname,role1,role2,championLevel,attack,defense,magic,difficulty,hp,mp,mpperlevel,movespeed,armor,spellblock,attackrange,hpregen,mpregen,attackdamage,attackspeed
0,Yasuo,Fighter,Assassin,7,8.0,4.0,4.0,10.0,490.0,100.00,0.0,345.0,30.0,32.0,175.0,6.50,0.000,60.0,0.697
1,Ezreal,Marksman,Mage,7,7.0,2.0,6.0,7.0,530.0,375.00,50.0,325.0,22.0,30.0,550.0,4.00,8.500,60.0,0.625
2,LeeSin,Fighter,Assassin,7,8.0,5.0,3.0,6.0,575.0,200.00,0.0,345.0,33.0,32.1,125.0,7.50,50.000,70.0,0.651
3,Vayne,Marksman,Assassin,7,10.0,1.0,1.0,8.0,515.0,231.80,35.0,330.0,23.0,30.0,550.0,3.50,6.972,60.0,0.658
4,Jhin,Marksman,Mage,7,10.0,2.0,6.0,6.0,585.0,300.00,50.0,330.0,24.0,30.0,550.0,3.75,6.000,59.0,0.625
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1117,Graves,Marksman,Marksman,1,8.0,5.0,3.0,3.0,555.0,325.00,40.0,340.0,33.0,32.0,425.0,8.00,8.000,68.0,0.475
1118,Kaisa,Marksman,Marksman,1,8.0,5.0,3.0,6.0,600.0,344.88,38.0,335.0,28.0,30.0,525.0,3.50,8.200,59.0,0.644
1119,Yasuo,Fighter,Assassin,1,8.0,4.0,4.0,10.0,490.0,100.00,0.0,345.0,30.0,32.0,175.0,6.50,0.000,60.0,0.697
1120,Lucian,Marksman,Marksman,1,8.0,5.0,3.0,6.0,571.0,348.88,38.0,335.0,28.0,30.0,500.0,3.75,8.176,64.0,0.638


In [5]:
cleaned_champion_df["attackspeed"] = cleaned_champion_df["attackspeed"].apply(lambda x: x*10)

In [7]:
# Change datatype to integer
cleaned_champion_df["attack"] = cleaned_champion_df["attack"].astype(int)
cleaned_champion_df["defense"] = cleaned_champion_df["defense"].astype(int)
cleaned_champion_df["magic"] = cleaned_champion_df["magic"].astype(int)
cleaned_champion_df["difficulty"] = cleaned_champion_df["difficulty"].astype(int)
cleaned_champion_df["hp"] = cleaned_champion_df["hp"].astype(int)
cleaned_champion_df["mp"] = cleaned_champion_df["mp"].astype(int)
cleaned_champion_df["mpperlevel"] = cleaned_champion_df["mpperlevel"].astype(int)
cleaned_champion_df["movespeed"] = cleaned_champion_df["movespeed"].astype(int)
cleaned_champion_df["armor"] = cleaned_champion_df["armor"].astype(int)
cleaned_champion_df["spellblock"] = cleaned_champion_df["spellblock"].astype(int)
cleaned_champion_df["attackrange"] = cleaned_champion_df["attackrange"].astype(int)
cleaned_champion_df["hpregen"] = cleaned_champion_df["hpregen"].astype(int)
cleaned_champion_df["mpregen"] = cleaned_champion_df["mpregen"].astype(int)
cleaned_champion_df["attackdamage"] = cleaned_champion_df["attackdamage"].astype(int)
cleaned_champion_df["attackspeed"] = cleaned_champion_df["attackspeed"].astype(int)

In [8]:
# Show data types
cleaned_champion_df.dtypes

championname     object
role1            object
role2            object
championLevel     int64
attack            int64
defense           int64
magic             int64
difficulty        int64
hp                int64
mp                int64
mpperlevel        int64
movespeed         int64
armor             int64
spellblock        int64
attackrange       int64
hpregen           int64
mpregen           int64
attackdamage      int64
attackspeed       int64
dtype: object

In [9]:
# Check for null values
cleaned_champion_df.isnull().any()

championname     False
role1            False
role2            False
championLevel    False
attack           False
defense          False
magic            False
difficulty       False
hp               False
mp               False
mpperlevel       False
movespeed        False
armor            False
spellblock       False
attackrange      False
hpregen          False
mpregen          False
attackdamage     False
attackspeed      False
dtype: bool

In [12]:
# Generate our categorical variable list
champion_cat = cleaned_champion_df.dtypes[cleaned_champion_df.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
cleaned_champion_df[champion_cat].nunique()

championname    127
role1             6
role2             6
dtype: int64

In [13]:
# Check the unique value counts to see if binning is required
cleaned_champion_df.difficulty.value_counts()

5     189
4     133
6     126
8     126
7     105
3      84
2      42
10     35
9      35
1      14
Name: difficulty, dtype: int64

In [14]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(cleaned_champion_df[champion_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(champion_cat)
encode_df.head()



Unnamed: 0,championname_Aatrox,championname_Ahri,championname_Alistar,championname_Amumu,championname_Anivia,championname_Aphelios,championname_Ashe,championname_Azir,championname_Bard,championname_Blitzcrank,...,role1_Mage,role1_Marksman,role1_Support,role1_Tank,role2_Assassin,role2_Fighter,role2_Mage,role2_Marksman,role2_Support,role2_Tank
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [15]:
# Merge one-hot encoded features and drop the originals
cleaned_champion_df = cleaned_champion_df.merge(encode_df,left_index=True, right_index=True)
cleaned_champion_df = cleaned_champion_df.drop(champion_cat,1)
cleaned_champion_df.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,championLevel,attack,defense,magic,difficulty,hp,mp,mpperlevel,movespeed,armor,...,role1_Mage,role1_Marksman,role1_Support,role1_Tank,role2_Assassin,role2_Fighter,role2_Mage,role2_Marksman,role2_Support,role2_Tank
0,7,8,4,4,10,490,100,0,345,30,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,7,7,2,6,7,530,375,50,325,22,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,7,8,5,3,6,575,200,0,345,33,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,7,10,1,1,8,515,231,35,330,23,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,7,10,2,6,6,585,300,50,330,24,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [16]:
# Remove difficulty status target from features data
y = cleaned_champion_df.difficulty
X = cleaned_champion_df.drop(columns=["difficulty"])

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [17]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 0.983
