In [1]:
import numpy as np
import pandas as pd
import scipy as scp
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import os

ROOT = os.path.dirname(os.getcwd())
train_data = pd.read_csv(os.path.join(ROOT, 'data', 'pokerhands', 'train.csv'))
test_data = pd.read_csv(os.path.join(ROOT, 'data', 'pokerhands', 'test.csv'))
test_data_correct = pd.read_csv(os.path.join(ROOT, 'data', 'pokerhands', 'test_correct.csv'))

test_data = test_data.head(10000)
test_data_correct = test_data_correct.head(10000)

In [2]:
def preproc(df):
    df['min-flush'] = (df['S1']==df['S2'])&(df['S2']==df['S3'])&(df['S3']==df['S4'])&(df['S4']==df['S5'])
    a1 = df[['C1', 'C2', 'C3', 'C4', 'C5']].min(axis = 1)
    b1 = 5 * a1 + 10
    a2 = df[['C1', 'C2', 'C3', 'C4', 'C5']].max(axis = 1).apply(scp.math.factorial)
    b2 = (df[['C1', 'C2', 'C3', 'C4', 'C5']].min(axis = 1) - 1).apply(scp.math.factorial)
    c = a2/b2
    df['min-straight'] = ((df['C1']*df['C2']*df['C3']*df['C4']*df['C5'] == c) & (df['C1']+df['C2']+df['C3']+df['C4']+df['C5'] == b1)) | ( (df[['C1', 'C2', 'C3', 'C4', 'C5']].min(axis = 1) == 1) & (df['C1']*df['C2']*df['C3']*df['C4']*df['C5'] == 17160))
    df['min - straight flush'] = df['min-flush'] & df['min-straight']
    df['RF'] = df[df['min - straight flush'] == True][['C1', 'C2', 'C3', 'C4', 'C5']].sum(axis = 1) == 47
    df['RF'] = df['RF'].fillna(False)
    df['straight flush'] = (df['min - straight flush'] == True) & (df['RF'] == False)
    df['straight flush'] = df['straight flush'].fillna(False)
    df = df.drop('min - straight flush', axis = 1)
    df['flush'] = (df['straight flush'] == False) & (df['min-flush'] == True) & (df['RF'] == False)
    df['straight'] = (df['straight flush'] == False) & (df['min-straight'] == True) & (df['RF'] == False)
    df = df.drop('min-straight', axis = 1)
    df = df.drop('min-flush', axis = 1)
    df['flush'] = df['flush'].fillna(False)
    df['straight'] = df['straight'].fillna(False)
    df['Nothing in hand'] = (df[['C1', 'C2', 'C3', 'C4', 'C5']].nunique(axis = 1) == 5) & (df['straight'] == False) & (df['flush'] == False) & (df['RF'] == False)& (df['straight flush'] == False) 
    df['Nothing in hand'] = df['Nothing in hand'].fillna(False)
    df['One pair'] = (df[['C1', 'C2', 'C3', 'C4', 'C5']].nunique(axis = 1) == 4)
    df['One pair'] = df['One pair'].fillna(False)
    df['Four of a kind or Full house'] = df[['C1', 'C2', 'C3', 'C4', 'C5']].nunique(axis = 1) == 2
    df['Two pairs or Three of a kind'] = df[['C1', 'C2', 'C3', 'C4', 'C5']].nunique(axis = 1) == 3
    df['Four of a kind or Full house'] = df['Four of a kind or Full house'].fillna(False)
    df['Two pairs or Three of a kind'] = df['Two pairs or Three of a kind'].fillna(False)
    dff = np.array(df[['C1', 'C2', 'C3', 'C4', 'C5']])
    a = []
    for i in range(dff.shape[0]):
        u, v  = np.unique(dff[i], return_counts=True)
        a.append(4 in v)
    df['Four of a kind'] = np.array(a)
    df['Full house'] = (df['Four of a kind']==False) & (df['Four of a kind or Full house'] ==True)
    df = df.drop('Four of a kind or Full house', axis =1)
    a = []
    for i in range(dff.shape[0]):
        u, v  = np.unique(dff[i], return_counts=True)
        a.append(set(v) == set([1,1,3]))
    df['Three of a kind'] = np.array(a)
    df['Two pairs'] = (df['Three of a kind']==False) & (df['Two pairs or Three of a kind'] ==True)
    df = df.drop('Two pairs or Three of a kind', axis =1)
    df = df.drop(['S1', 'S2','S3','S4','S5','C1','C2','C3','C4','C5'], axis = 1)
    return df

In [3]:
#train_df = preproc(train_data)
train_df = train_data

x = train_df.drop('hand', axis =1)
y = train_df['hand']
x = np.array(x).astype(int)

In [5]:
model = RandomForestClassifier()
model.fit(x, y)
x_test = test_data
x_test = x_test.drop('id', axis = 1)
#x_test = preproc(x_test)

In [6]:
x_test = np.array(x_test).astype(int)
predictions = model.predict(x_test).astype(int)
output = pd.DataFrame({'id': test_data.id, 'hand_predict': predictions})

output = pd.concat([output, test_data_correct], axis=1)
correct = output[(output['hand'] == output['hand_predict'])].count()
print(correct / 10000)

id              0.618
hand_predict    0.618
id              0.618
hand            0.618
dtype: float64


In [7]:
model = XGBClassifier()
model.fit(x, y)
x_test = test_data
x_test = x_test.drop('id', axis = 1)
#x_test = preproc(x_test)

x_test = np.array(x_test).astype(int)
predictions = model.predict(x_test).astype(int)
output = pd.DataFrame({'id': test_data.id, 'hand_predict': predictions})

output = pd.concat([output, test_data_correct], axis=1)
correct = output[(output['hand'] == output['hand_predict'])].count()
print("XGBoost:", correct / 10000)





XGBoost: id              0.7298
hand_predict    0.7298
id              0.7298
hand            0.7298
dtype: float64
