In [1]:
# Function to calculate accuracy score
from sklearn.metrics import accuracy_score

def calculate_score(X_train,y_train,X_test,y_test, clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    return score

In [2]:
# Function to calculate AUROC score
from sklearn.metrics import roc_auc_score

def calculate_auroc(X_train,y_train,X_test,y_test, clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    auroc = roc_auc_score(y_test, y_pred, average=None)
    return auroc

In [19]:
import pandas as pd
import csv

df = pd.read_csv("data_combined.csv")

In [20]:
# Displaying data
df

Unnamed: 0,match_id,match_seq_num,radiant_win,start_time,duration,avg_mmr,num_mmr,lobby_type,game_mode,avg_rank_tier,num_rank_tier,cluster,radiant_team,dire_team
0,6119181915,5121569669,False,1628035579,2535,,,7,22,33,3,251,15741412986,120102111104
1,6119181906,5121569435,True,1628035575,2439,3595.0,2.0,7,22,57,5,202,94112596896,25119838632
2,6119181904,5121568204,False,1628035574,2333,3926.0,4.0,7,22,64,5,274,742945827,86841129338
3,6119181903,5121566055,False,1628035575,1993,2290.0,1.0,7,22,15,2,251,1001410412875,3789611222
4,6119181902,5121573472,False,1628035575,2922,1875.0,2.0,7,22,34,3,273,391884836,932211010076
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5861254,6011384113,5035294458,False,1621984962,2108,,,7,22,12,2,181,10929274642,59841213644
5861255,6011384112,5035292430,False,1621984957,1843,,,7,22,39,2,251,22112612314,467932713
5861256,6011384111,5035296717,False,1621984957,2428,3039.0,3.0,7,22,52,7,186,2698409386,503512799
5861257,6011384108,5035291897,True,1621984947,1759,3819.0,4.0,7,22,46,7,251,17364112119,531134914


In [30]:
# Separate the features (X) from the ‘radiant_win’ (y) column
import numpy as np
from sklearn.preprocessing import LabelEncoder
X = df.copy()
X = X[['radiant_team','dire_team']]

y = np.array(df["radiant_win"])

# X = X.head()
lb = LabelEncoder()
X['radiant_team'] = lb.fit_transform(X['radiant_team'])
X['dire_team'] = lb.fit_transform(X['dire_team'])
X

Unnamed: 0,radiant_team,dire_team
0,1470849,22569
1,5550704,1999318
2,4606688,5222981
3,129501,2825003
4,2855691,5488634
...,...,...
5861254,499923,3888304
5861255,1891384,3280517
5861256,2224505,3592867
5861257,1556408,3682701


In [33]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Perceptron
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LinearRegression
from statistics import mean, stdev

# Creating arrays to store the accuracy scores and AUROC
perceptron_accu = []
knn_accu = []
nb_accu = []
trees_accu = []
perceptron_auroc = []
knn_auroc = []
nb_auroc = []
trees_auroc = []
avg_accu = []
avg_auroc = []

# Evaluate the data with 1000 iterations
for x in range(100):
    # Split the data with 90% train data and 10% test data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.1)
    
#     reg = LinearRegression().fit(X, y)
#     reg.score(X, y)


    # Evaluate data with Perceptron
    clf = Perceptron(tol=1e-3, random_state=0)
    perceptron_accu.append(calculate_score(X_train, y_train, X_test, y_test, clf))
    perceptron_auroc.append(calculate_auroc(X_train, y_train, X_test, y_test, clf))

    # Evaluate data with KNeighborsClassifier
    clf = KNeighborsClassifier(n_neighbors=5)
    knn_accu.append(calculate_score(X_train, y_train, X_test, y_test, clf))
    knn_auroc.append(calculate_auroc(X_train, y_train, X_test, y_test, clf))

    # Evaluate data with GaussianNBClassifier
    clf = GaussianNB()
    nb_accu.append(calculate_score(X_train, y_train, X_test, y_test, clf))
    nb_auroc.append(calculate_auroc(X_train, y_train, X_test, y_test, clf))

    # Evaluate data with DecisionTreeClassifier
    clf = DecisionTreeClassifier(random_state=0)
    trees_accu.append(calculate_score(X_train, y_train, X_test, y_test, clf))
    trees_auroc.append(calculate_auroc(X_train, y_train, X_test, y_test, clf))

avg_accu.append(round(mean(perceptron_accu), 3))
avg_accu.append(round(mean(knn_accu), 3))
avg_accu.append(round(mean(nb_accu), 3))
avg_accu.append(round(mean(trees_accu), 3))
avg_auroc.append(round(mean(perceptron_auroc), 3))
avg_auroc.append(round(mean(knn_auroc), 3))
avg_auroc.append(round(mean(nb_auroc), 3))
avg_auroc.append(round(mean(trees_auroc), 3))
                      
print("Perceptron accuracy length is:", len(perceptron_accu), ", average:", avg_accu[0])
print("Perceptron AUROC length is:", len(perceptron_auroc), ", average:", avg_auroc[0])
print("KNN accuracy length is:", len(knn_accu), ", average:", avg_accu[1])
print("KNN AUROC length is:", len(knn_auroc), ", average:", avg_auroc[1])
print("NB accuracy length is:", len(nb_accu), ", average:", avg_accu[2])
print("NB AUROC length is:", len(nb_auroc), ", average:", avg_auroc[2])
print("Decision trees accuracy length is:", len(trees_accu), ", average:", avg_accu[3])
print("Decision trees AUROC length is:", len(trees_auroc), ", average:", avg_auroc[3])
print("Standard Deviation of the accuracy scores is:", round(stdev(avg_accu), 3))
print("Standard Deviation of the AUROC scores is:", round(stdev(avg_auroc), 3))

Perceptron accuracy length is: 100 , average: 0.501
Perceptron AUROC length is: 100 , average: 0.5
KNN accuracy length is: 100 , average: 0.507
KNN AUROC length is: 100 , average: 0.505
NB accuracy length is: 100 , average: 0.525
NB AUROC length is: 100 , average: 0.5
Decision trees accuracy length is: 100 , average: 0.505
Decision trees AUROC length is: 100 , average: 0.504
Standard Deviation of the accuracy scores is: 0.011
Standard Deviation of the AUROC scores is: 0.003
