In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import joblib

In [2]:
matches_df = pd.read_csv('data/matches_summarized.csv')

In [3]:
columns=['placementValue','scoreValue','killsValue',
 'deathsValue','assistsValue','damageValue','damageReceivedValue',
 'grenadeCastsValue','ability1CastsValue','ability2CastsValue',
 'ultimateCastsValue','econRatingValue','suicidesValue',
 'firstBloodsValue','firstDeathsValue','lastDeathsValue',
 'survivedValue','tradedValue','kASTValue',
 'scorePerRoundValue','damagePerRoundValue','hasWon']

In [4]:
data=matches_df[columns]

In [5]:
data.isna().sum()

placementValue           0
scoreValue               0
killsValue               0
deathsValue              0
assistsValue             0
damageValue              0
damageReceivedValue      0
grenadeCastsValue      156
ability1CastsValue     156
ability2CastsValue     156
ultimateCastsValue     156
econRatingValue          0
suicidesValue            0
firstBloodsValue         0
firstDeathsValue         0
lastDeathsValue          0
survivedValue            0
tradedValue              0
kASTValue                0
scorePerRoundValue       0
damagePerRoundValue      0
hasWon                   0
dtype: int64

In [6]:
data.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.dropna(inplace=True)


In [7]:
X=data.drop('hasWon',axis=1)
y=data['hasWon']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42)

In [12]:
from sklearn.metrics import accuracy_score, log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

classifiers = [
    KNeighborsClassifier(3),
    #SVC(kernel="rbf", C=0.025, probability=True),
    #NuSVC(probability=True),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    GaussianNB(),
    LinearDiscriminantAnalysis(),
    QuadraticDiscriminantAnalysis()]

# Logging for Visual Comparison
log_cols=["Classifier", "Accuracy", "Log Loss"]
log = pd.DataFrame(columns=log_cols)

for clf in classifiers:
    clf.fit(X_train, y_train)
    name = clf.__class__.__name__
    
    print("="*30)
    print(name)
    
    print('****Results****')
    train_predictions = clf.predict(X_test)
    acc = accuracy_score(y_test, train_predictions)
    print("Accuracy: {:.4%}".format(acc))
    
    train_predictions = clf.predict_proba(X_test)
    ll = log_loss(y_test, train_predictions)
    print("Log Loss: {}".format(ll))
    joblib.dump(clf,f"{clf.__class__.__name__}.joblib")
    
print("="*30)

KNeighborsClassifier
****Results****
Accuracy: 68.2076%
Log Loss: 3.5303402480231187
DecisionTreeClassifier
****Results****
Accuracy: 74.4201%
Log Loss: 8.83498339483218
RandomForestClassifier
****Results****
Accuracy: 80.9863%
Log Loss: 0.38292648230863136
AdaBoostClassifier
****Results****
Accuracy: 79.1836%
Log Loss: 0.6741274205179728
GradientBoostingClassifier
****Results****
Accuracy: 80.3749%
Log Loss: 0.3914056281898907
GaussianNB
****Results****
Accuracy: 73.8732%
Log Loss: 0.8206151393774845
LinearDiscriminantAnalysis
****Results****
Accuracy: 81.0867%
Log Loss: 0.4162871903643533
QuadraticDiscriminantAnalysis
****Results****
Accuracy: 78.7326%
Log Loss: 0.5379281441396154
