In [None]:
#Importing Required Libraries
import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import os

from sklearn import model_selection
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC 
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [None]:
#Reading data
game = pd.read_csv('../input/online-chess-games/chess_games.csv')
game.head(1)

In [None]:
#Checking thhe shape of the dataset
game.shape

In [None]:
#Dropping columns
data=game.drop(labels=['rated','victory_status','time_increment','white_id','black_id','moves','opening_fullname','opening_variation','opening_response'],axis=1)
data.shape

In [None]:
le = LabelEncoder()
target = le.fit_transform(data['winner'])
target

In [None]:
le2 = LabelEncoder()
data['opening_shortname'] = le2.fit_transform(data['opening_shortname'])
data.head()

In [None]:
# Checking for missing value counts
print(data.isnull().sum())

In [None]:
data.dtypes

In [None]:
f, ax = plt.subplots(figsize=(10,7))
sns.histplot(x="opening_moves", hue="winner", data=data, multiple='stack', linewidth=0.5, edgecolor=".3")

In [None]:
# Function to split the dataset
def splitdataset(balance_data,target):
  
    # Separating the target variable
    X = balance_data.values[:,[1,3,4,6,7]]
    Y = target
  
    # Splitting the dataset into train and test
    X_train, X_test, y_train, y_test = train_test_split( 
    X, Y, test_size = 0.15, random_state = 40)
      
    return X, Y, X_train, X_test, y_train, y_test

In [None]:
# Function to perform training with Decision Tree || giniIndex.
def train_using_gini(X_train, X_test, y_train):
    # Creating the classifier object
    clf_gini = DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=8, min_samples_leaf=8)
    # Performing training
    clf_gini.fit(X_train, y_train)
    return clf_gini
      
# Function to perform training with Decision Tree || entropy.
def train_using_entropy(X_train, X_test, y_train):
    # Decision tree with entropy
    clf_entropy = DecisionTreeClassifier(
            criterion = "entropy", random_state = 100,max_depth=50, min_samples_leaf = 8)  
    # Performing training
    clf_entropy.fit(X_train, y_train)
    return clf_entropy

# Function to perform training with SVM
def train_using_SVM(X_train, X_test, y_train):
    # Creating the SVM classifier object
    clf_SVM = SVC(kernel='rbf',C=1.4,gamma='scale')
    #Training SVM
    clf_SVM.fit(X_train, y_train) 
    return clf_SVM

# Function to perform training with Logistic Regression
def train_using_logistic(X_train, X_test, y_train):
    # Creating the Logistic classifier object
    clf_logistic = LogisticRegression(solver='liblinear')
    #Training Logistic classifier
    clf_logistic.fit(X_train, y_train)
    return clf_logistic

def train_using_randomforest(X_train, X_test, y_train):
    clf_randomF = RandomForestClassifier(n_estimators = 100)
    clf_randomF.fit(X_train, y_train)
    return clf_randomF

In [None]:
# Function to make predictions
def prediction(X_test, clf_object):
  
    # Predicton on test with giniIndex
    y_pred = clf_object.predict(X_test)
    print("Predicted values:")
    print(y_pred,"\n")
    return y_pred

In [None]:
# Function to calculate accuracy
def cal_accuracy(y_test, y_pred):
      
    print ("\nAccuracy : ",
    accuracy_score(y_test,y_pred)*100)
    print("\n")
      
    print("Report : ",
    classification_report(y_test, y_pred))

In [None]:
# Building Phase
X, Y, X_train, X_test, y_train, y_test = splitdataset(data,target)
clf_gini = train_using_gini(X_train, X_test, y_train)
clf_entropy = train_using_entropy(X_train, X_test, y_train)
clf_SVM = train_using_SVM(X_train, X_test, y_train)
clf_logistic = train_using_logistic(X_train, X_test, y_train)
clf_randomF = train_using_randomforest(X_train, X_test, y_train)

In [None]:
print("Results Using Decision Tree || Gini Index:\n")
      
# Prediction using gini
y_pred_gini = prediction(X_test,clf_gini)
cal_accuracy(y_test, y_pred_gini)

In [None]:
print("Results Using Decision Tree || Entropy:\n")

# Prediction using entropy
y_pred_entropy = prediction(X_test, clf_entropy)
cal_accuracy(y_test, y_pred_entropy)

In [None]:
print("Results Using SVM Classifier:\n")

# Prediction using SVM
y_pred_SVM = prediction(X_test, clf_SVM)
cal_accuracy(y_test, y_pred_SVM)

In [None]:
print("Results Using Logistic Regression:\n")
      
# Prediction using Logistic Regression
y_pred_logistic = prediction(X_test,clf_logistic)
cal_accuracy(y_test, y_pred_logistic)

In [None]:
print("Results Using Random Forest\n")
      
# Prediction using Random Forest
y_pred_randomF = prediction(X_test,clf_randomF)
cal_accuracy(y_test, y_pred_randomF)