In [1]:
import os
import numpy as np
import pandas as pd

batch = 10

FILE = f'Batch_{batch}.csv'
PATH = os.getcwd()
FULL_PATH = os.path.join(PATH, FILE)

data = pd.read_csv(FULL_PATH, sep='\t', encoding='utf-8')

def catEncode(series, input_type=str):
    if input_type == list:
        final_input = np.array([eval(i)[0] for i in series]).reshape(-1,1)
    elif input_type == tuple:
        final_input = np.array([str(i) for i in series]).reshape(-1,1)
    else:
        final_input = np.array(series).reshape(-1,1)
    
    #Encode categorical variables
    from sklearn.preprocessing import OrdinalEncoder
    ord_enc = OrdinalEncoder(categories='auto')
    newCats = ord_enc.fit_transform(final_input)
    return newCats.ravel()

def catScale(dataframe):
    #from sklearn.preprocessing import StandardScaler
    #scaler = StandardScaler()
    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler()
    
    df_out = scaler.fit_transform(dataframe)
    return df_out

def catSplit(X_input, y_input, test_size=0.2):
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X_input, y_input, test_size=test_size, random_state=42, stratify=y_input)
    return X_train, X_test, y_train, y_test

def rndForest(X_train, X_test, y_train, y_test, dataframe):
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import accuracy_score
    clf = RandomForestClassifier(random_state=0, n_jobs=-1, min_samples_split=2, max_features=None)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    print(f'Accuracy Score:{accuracy}')

    return pd.DataFrame([clf.feature_importances_], columns=dataframe.columns.tolist()[0:-1], index=['Feature_Importance'])

def prepData(dataframe):
    d2 = dataframe.loc[:, ['Active_Player', 'Active_Enemy', 'AP_HP', 'AP_STR', 'AP_WPN', 'Played_Cards', 'Faceoff','Match_Result']].copy()
    
    #Encode data
    df_train = pd.DataFrame(None)
    df_train['AP'] = catEncode(d2.Active_Player)
    df_train['AE'] = catEncode(d2.Active_Enemy)
    df_train = pd.concat([df_train, d2.loc[:, ['AP_HP', 'AP_STR']]], axis=1)
    df_train['AP_WPN'] = catEncode(d2.AP_WPN)
    df_train['AP_Cards'] = catEncode(d2.Played_Cards, input_type=list)
    df_train['Faceoff'] = catEncode(d2.Faceoff, input_type=tuple)
    df_train['Result'] = catEncode(d2.Match_Result)
    df_train.fillna(0, inplace=True) #Replace NaN values with zeros

    #Scale and split data
    X = catScale(df_train.loc[:, ['AP', 'AE', 'AP_HP', 'AP_STR', 'AP_WPN', 'AP_Cards', 'Faceoff']])
    y = df_train.loc[:, 'Result']
    X_train, X_test, y_train, y_test = catSplit(X, y)
    return X_train, X_test, y_train, y_test, df_train

X_train, X_test, y_train, y_test, df_train = prepData(data)
print('Data prepared!') #DEBUG

Data prepared!


In [2]:
feat_imp = rndForest(X_train, X_test, y_train, y_test, df_train)
feat_imp

Accuracy Score:0.8883931522242051


Unnamed: 0,AP,AE,AP_HP,AP_STR,AP_WPN,AP_Cards,Faceoff
Feature_Importance,0.030387,0.01989,0.089556,0.741815,0.002709,0.07061,0.045032


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
svm = SVC(C=0.8, kernel='rbf', degree=3, random_state=42)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy Score:{accuracy}')

In [None]:
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import accuracy_score
gbc = HistGradientBoostingClassifier(random_state=42, categorical_features=[True, True, False, False, True, True, True])
gbc.fit(X_train, y_train)
y_pred = gbc.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy Score:{accuracy}')

#Batch_3
RFC no params == 0.8087 accuracy
SVM rbf, C=0.8 == 0.7669 accuracy
SVM poly3, C=0.8 == 0.733 accuracy
HistGBC  == ??? accuracy

#Batch_4
RFC no params == 0.8087 accuracy
HistGBC == 0.5851 accuracy

#Batch_5
RFC max_est=None; == 0.8115 accuracy
HistGBC == 0.5866 accuracy

def prepDataLite(dataframe):
    d2 = dataframe.loc[:, ['Active_Enemy', 'AP_HP', 'AP_STR', 'AP_WPN', 'Played_Cards','Match_Result']].copy()
    
    #Encode data
    df_train = pd.DataFrame(None)
    df_train['AE'] = catEncode(d2.Active_Enemy)
    df_train = pd.concat([df_train, d2.loc[:, ['AP_HP', 'AP_STR']]], axis=1)
    df_train['AP_WPN'] = catEncode(d2.AP_WPN)
    df_train['AP_Cards'] = catEncode(d2.Played_Cards, input_type=list)
    df_train['Result'] = catEncode(d2.Match_Result)
    df_train.fillna(0, inplace=True) #Replace NaN values with zeros

    #Scale and split data
    X = catScale(df_train.loc[:, ['AE', 'AP_HP', 'AP_STR', 'AP_WPN', 'AP_Cards']])
    y = df_train.loc[:, 'Result']
    X_train, X_test, y_train, y_test = catSplit(X, y)
    return X_train, X_test, y_train, y_test, df_train

X_train2, X_test2, y_train2, y_test2, df_train2 = prepDataLite(data)
feat_imp2 = rndForest(X_train2, X_test2, y_train2, y_test2, df_train2)
feat_imp2