In [1]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade

Requirement already up-to-date: sklearn in c:\users\brett\anaconda3\lib\site-packages (0.0)


In [2]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [4]:
import pandas as pd

# Read the CSV and Perform Basic Data Cleaning

In [5]:
df = pd.read_csv("../../data/chess.csv")
df.head()

Unnamed: 0,id,format,victory_status,book_moves,opening_name,winner,turns,white_id,white_rating,black_id,black_rating
0,l1NXvwaE,Blitz,resign,4,Nimzowitsch Defense: Kennedy Variation,black,16,a-00,1322,skinnerua,1261
1,mIICvQHh,Blitz,mate,3,King's Pawn Game: Leonardis Variation,white,61,ischia,1496,a-00,1500
2,kWKvrqYL,Classical,mate,3,Queen's Pawn Game: Zukertort Variation,white,61,daniamurashov,1439,adivanov2009,1454
3,9tXo1AUZ,Classical,mate,5,Philidor Defense,white,95,nik221107,1523,adivanov2009,1469
4,qwU9rasv,Rapid,resign,10,Blackmar-Diemer Gambit: Pietrowsky Defense,white,33,capa_jr,1520,daniel_likes_chess,1423


# Select your features (columns)

In [6]:
# Set features. This will also be used as your x values.
X = df.drop(columns=['id', 'winner'])
X.head()

Unnamed: 0,format,victory_status,book_moves,opening_name,turns,white_id,white_rating,black_id,black_rating
0,Blitz,resign,4,Nimzowitsch Defense: Kennedy Variation,16,a-00,1322,skinnerua,1261
1,Blitz,mate,3,King's Pawn Game: Leonardis Variation,61,ischia,1496,a-00,1500
2,Classical,mate,3,Queen's Pawn Game: Zukertort Variation,61,daniamurashov,1439,adivanov2009,1454
3,Classical,mate,5,Philidor Defense,95,nik221107,1523,adivanov2009,1469
4,Rapid,resign,10,Blackmar-Diemer Gambit: Pietrowsky Defense,33,capa_jr,1520,daniel_likes_chess,1423


In [7]:
# label encode categorical data
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()

from tensorflow.keras.utils import to_categorical

In [8]:
label_encoder.fit(df['format'])
label_encoder.classes_
X['format'] = to_categorical(label_encoder.transform(df['format']))#.astype('int')

In [9]:
label_encoder.fit(df['victory_status'])
label_encoder.classes_
X['victory_status'] = to_categorical(label_encoder.transform(df['victory_status']))#.astype('int')

In [10]:
label_encoder.fit(df['opening_name'])
label_encoder.classes_
X['opening_name'] = to_categorical(label_encoder.transform(df['opening_name']))#.astype('int')

In [11]:
label_encoder.fit(df['white_id'])
label_encoder.classes_
X['white_id'] = to_categorical(label_encoder.transform(df['white_id']))#.astype('int')

In [12]:
label_encoder.fit(df['black_id'])
label_encoder.classes_
X['black_id'] = to_categorical(label_encoder.transform(df['black_id']))#.astype('int')

In [13]:
X.head()

Unnamed: 0,format,victory_status,book_moves,opening_name,turns,white_id,white_rating,black_id,black_rating
0,1.0,0.0,4,0.0,16,0.0,1322,0.0,1261
1,1.0,0.0,3,0.0,61,0.0,1496,0.0,1500
2,0.0,0.0,3,0.0,61,0.0,1439,0.0,1454
3,0.0,0.0,5,0.0,95,0.0,1523,0.0,1469
4,0.0,0.0,10,0.0,33,0.0,1520,0.0,1423


In [14]:
label_encoder.fit(df['winner'])
label_encoder.classes_

array(['black', 'draw', 'white'], dtype=object)

In [15]:
y_values = label_encoder.transform(df['winner'])#.astype('int')
y_values = y_values.reshape(-1, 1)

y = to_categorical(y_values)
type(y)

numpy.ndarray

# Create a Train Test Split


In [16]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [17]:
X_train.head()

Unnamed: 0,format,victory_status,book_moves,opening_name,turns,white_id,white_rating,black_id,black_rating
7971,0.0,0.0,1,0.0,69,0.0,1695,0.0,1702
12751,0.0,0.0,6,0.0,93,0.0,1792,0.0,1688
14657,0.0,0.0,1,0.0,85,0.0,1486,0.0,1350
2347,0.0,0.0,1,0.0,25,0.0,1848,0.0,1186
8856,0.0,0.0,3,0.0,86,0.0,1576,0.0,1638


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [18]:
# Scale your data
from sklearn.preprocessing import MinMaxScaler
X_minmax = MinMaxScaler().fit(X_train)
y_minmax = MinMaxScaler().fit(y_train)

X_train_scaled = X_minmax.transform(X_train)
X_test_scaled = X_minmax.transform(X_test)

print(f"Y Shape: {y_train.shape}")
print(f"X Shape: {X_train_scaled.shape}")

Y Shape: (12116, 3)
X Shape: (12116, 9)


# Train the Model



In [19]:
from sklearn.svm import SVC 
model = SVC(kernel='linear')
model

SVC(kernel='linear')

In [20]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

SVC(decision_function_shape=None, gamma='auto', kernel='linear')

# Hyperparameter Tuning

Use `GridSearchCV` to tune the model's parameters

In [21]:
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10, 50],
              'gamma': [0.0001, 0.0005, 0.001, 0.005]}
grid = GridSearchCV(model, param_grid, verbose=3)

In [23]:
grid.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV 1/5] END ..............................C=1, gamma=0.0001; total time=   0.0s
[CV 2/5] END ..............................C=1, gamma=0.0001; total time=   0.0s
[CV 3/5] END ..............................C=1, gamma=0.0001; total time=   0.0s
[CV 4/5] END ..............................C=1, gamma=0.0001; total time=   0.0s
[CV 5/5] END ..............................C=1, gamma=0.0001; total time=   0.0s
[CV 1/5] END ..............................C=1, gamma=0.0005; total time=   0.0s
[CV 2/5] END ..............................C=1, gamma=0.0005; total time=   0.0s
[CV 3/5] END ..............................C=1, gamma=0.0005; total time=   0.0s
[CV 4/5] END ..............................C=1, gamma=0.0005; total time=   0.0s
[CV 5/5] END ..............................C=1, gamma=0.0005; total time=   0.0s
[CV 1/5] END ...............................C=1, gamma=0.001; total time=   0.0s
[CV 2/5] END ...............................C=1,

Traceback (most recent call last):
  File "C:\Users\brett\anaconda3\envs\PythonAdv\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\brett\anaconda3\envs\PythonAdv\lib\site-packages\sklearn\svm\_base.py", line 171, in fit
    accept_large_sparse=False)
  File "C:\Users\brett\anaconda3\envs\PythonAdv\lib\site-packages\sklearn\base.py", line 433, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "C:\Users\brett\anaconda3\envs\PythonAdv\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "C:\Users\brett\anaconda3\envs\PythonAdv\lib\site-packages\sklearn\utils\validation.py", line 826, in check_X_y
    y = column_or_1d(y, warn=True)
  File "C:\Users\brett\anaconda3\envs\PythonAdv\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "C:\Users\brett\anaconda3\envs\Pyth

ValueError: y should be a 1d array, got an array of shape (12116, 3) instead.

In [13]:
print(grid.best_params_)
print(grid.best_score_)

{'C': 1, 'gamma': 0.0001}
0.5056265872986996


# Save the Model

In [None]:
# save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
# if joblib fails to import, try running the command to install in terminal/git-bash

# import joblib
# filename = 'LogisticRegression.sav'
# joblib.dump(grid, filename)