# Experiment Draft
## Poker Hand Data Set

[Poker Database](https://archive.ics.uci.edu/ml/datasets/Poker+Hand)


In [12]:
import pandas as pd
import urllib.request

In [3]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/poker/poker-hand-training-true.data'
df = pd.read_csv(url, nrows=1000)
df.shape

(1000, 11)

In [29]:
col_names = []
for number in range(1,6):
    for name in ['Suit_', 'Rank_']:
        col = f'{name}{number}'
        col_names.append(col)

col_names.append('class')
df.columns = col_names

## Get a testing set

In [41]:
from sklearn.model_selection import train_test_split

X = df.copy()
X.drop('class', axis=1, inplace=True)
y = df['class'].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=42, )

## Classification models

In [46]:
from sklearn.linear_model.logistic import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [50]:
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)

0.5228571428571429

## Decision tree

In [61]:
from sklearn.tree import DecisionTreeClassifier

In [66]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [67]:
model.get_params()

{'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'presort': False,
 'random_state': None,
 'splitter': 'best'}

In [63]:
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)

print(train_score, test_score)

1.0 0.45


In [71]:
def trainer(model, model_name):
    model.fit(X_train, y_train)
    train_score = model.score(X_train, y_train)
    test_score = model.score(X_test, y_test)
    return {'model name': model_name,
            'train_score': train_score,
            'test_score': test_score,
            'parameters': model.get_params()
           }
    

In [72]:
model = LogisticRegression()
results = trainer(model, 'logistic regression')
results



{'model name': 'logistic regression',
 'train_score': 0.5228571428571429,
 'test_score': 0.49,
 'parameters': {'C': 1.0,
  'class_weight': None,
  'dual': False,
  'fit_intercept': True,
  'intercept_scaling': 1,
  'max_iter': 100,
  'multi_class': 'warn',
  'n_jobs': None,
  'penalty': 'l2',
  'random_state': None,
  'solver': 'warn',
  'tol': 0.0001,
  'verbose': 0,
  'warm_start': False}}

In [73]:
model = DecisionTreeClassifier()
results = trainer(model, 'decision tree')
results


{'model name': 'decision tree',
 'train_score': 1.0,
 'test_score': 0.44,
 'parameters': {'class_weight': None,
  'criterion': 'gini',
  'max_depth': None,
  'max_features': None,
  'max_leaf_nodes': None,
  'min_impurity_decrease': 0.0,
  'min_impurity_split': None,
  'min_samples_leaf': 1,
  'min_samples_split': 2,
  'min_weight_fraction_leaf': 0.0,
  'presort': False,
  'random_state': None,
  'splitter': 'best'}}