In [None]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#imports the csv file
champs = pd.read_csv('Worlds 2024 Champions Data.csv')
champs.head()

Unnamed: 0,Champion,Picks,Bans,Presence,Wins,Losses,Winrate,KDA,Avg BT,GT,CSM,DPM,GPM,CSD@15,GD@15,XPD@15
0,Aurora,14,60,96%,9.0,5.0,64%,5.6,3.5,33:36:00,7.6,684.0,394.0,-2.6,-7.0,-94.0
1,Yone,28,45,95%,18.0,10.0,64%,4.3,3.4,32:14:00,9.5,645.0,442.0,5.4,303.0,265.0
2,Skarner,32,35,87%,23.0,9.0,72%,5.7,3.9,32:02:00,5.9,346.0,338.0,-0.8,11.0,82.0
3,Jax,35,28,82%,19.0,16.0,54%,3.0,5.2,34:25:00,8.0,440.0,402.0,-5.1,38.0,-164.0
4,Ashe,18,43,79%,13.0,5.0,72%,6.2,6.1,33:30:00,9.5,553.0,444.0,-2.8,-156.0,17.0


In [None]:
#if the value is not an object, convert the value to a decimal
if champs['Presence'].dtype != object:
    # If Presence column is already numeric, divide by 100 if greater than 1
    if champs['Presence'].max() > 1:
        champs['Presence'] = champs['Presence'] / 100
else:
    # If Presence column is string type, apply stripping and type conversion
    champs['Presence'] = champs['Presence'].str.rstrip('%').astype(float) / 100
#drops champions if they are below 10 games played
for index, row in champs.iterrows():
    if row['Picks'] <= 10:
        champs.drop(index, inplace=True)
#inverses the average build time to make an indirect relationship
champs['Inverse_Avg_BT'] = 1 / champs['Avg BT']
X = champs[['Wins', 'Inverse_Avg_BT']]
y = champs['Presence']
#creates a train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
#makes a pipeline that scales the data and creates a linear regression model
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LinearRegression())

])

In [None]:
#fits the model
model1 = pipe.fit(X_train, y_train)

In [None]:
#scores the model
model1.score(X_test, y_test)

0.4560454159010323

In [None]:
#states the mean squared error
MSE = mean_squared_error(y_test, model1.predict(X_test))
MSE

0.022992280326633238

In [None]:
#makes the same model using KDA column
X = champs[['Wins', 'Inverse_Avg_BT', 'KDA']]
y = champs['Presence']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LinearRegression())

])
model1 = pipe.fit(X_train, y_train)

In [None]:
model1.score(X_test, y_test)

0.48604835015112424

In [None]:
MSE = mean_squared_error(y_test, model1.predict(X_test))
MSE

0.021724093799549662

In [None]:
#makes a baseline using the mean of the Presence column
baseline = champs['Presence'].mean()
baseline

np.float64(0.5344827586206896)

In [None]:
from sklearn.neighbors import KNeighborsRegressor

In [None]:
#makes a KNN model of the same data
XKNN = champs[['Wins', 'Inverse_Avg_BT']]
yKNN = champs['Presence']

XKNN_train, XKNN_test, yKNN_train, yKNN_test = train_test_split(XKNN, yKNN, random_state=42)

pipe2 = Pipeline([
    ('scaler2', StandardScaler()),
    ('model2', KNeighborsRegressor())
])

In [None]:
model2 = pipe2.fit(XKNN_train, yKNN_train)

In [None]:
model2.score(XKNN_test, yKNN_test)

0.4716309330178913

In [None]:
MSE2 = mean_squared_error(yKNN_test, model2.predict(XKNN_test))
MSE2

0.022333500000000003

In [None]:
#adds the KDA column
XKNN = champs[['Wins', 'Inverse_Avg_BT', 'KDA']]
yKNN = champs['Presence']

XKNN_train, XKNN_test, yKNN_train, yKNN_test = train_test_split(XKNN, yKNN, random_state=42)

pipe2 = Pipeline([
    ('scaler2', StandardScaler()),
    ('model2', KNeighborsRegressor())
])

In [None]:
model2 = pipe2.fit(XKNN_train, yKNN_train)

In [None]:
model2.score(XKNN_test, yKNN_test)

0.7082951352949874

In [None]:
MSE2 = mean_squared_error(yKNN_test, model2.predict(XKNN_test))
MSE2

0.012329999999999997

In [None]:
#makes a RandomForest model
from sklearn.ensemble import RandomForestRegressor
XForest = champs[['Wins', 'Inverse_Avg_BT']]
yForest = champs['Presence']

XForest_train, XForest_test, yForest_train, yForest_test = train_test_split(XForest, yForest, random_state=42)
pipe3 = Pipeline([
    ('scaler3', StandardScaler()),
    ('model3', RandomForestRegressor())
])

In [None]:
model3 = pipe3.fit(XForest_train, yForest_train)

In [None]:
model3.score(XForest_test, yForest_test)

0.1798802898122155

In [None]:
MSE3 = mean_squared_error(yForest_test, model3.predict(XForest_test))
MSE3

0.03466543499999991

In [None]:
#adds the KDA column to the model
XForest = champs[['Wins', 'Inverse_Avg_BT', 'KDA']]
yForest = champs['Presence']

XForest_train, XForest_test, yForest_train, yForest_test = train_test_split(XForest, yForest, random_state=42)
pipe3 = Pipeline([
    ('scaler3', StandardScaler()),
    ('model3', RandomForestRegressor())
])

In [None]:
model3 = pipe3.fit(XForest_train, yForest_train)

In [None]:
model3.score(XForest_test, yForest_test)

0.22241452018335062

In [None]:
MSE3 = mean_squared_error(yForest_test, model3.predict(XForest_test))
MSE3

0.032867566249999994