# Machine Learning

### K Fold Validation

In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import StratifiedKFold
import numpy as np

# Loading digit array and targets
X = load_digits().data
y = load_digits().target

In [2]:
# Function which takes model and datasets and returns model score
def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)

In [3]:
# k-folds create multiple datasets for training and testing on models
# for us to easily find the best model for a certain dataset
folds = StratifiedKFold(n_splits=3)

score = {
    'Logistic Regression': [],
    'Support Vector Classifier': [],
    'Random Forest Classifier': [],
}

for train_index, test_index in folds.split(X, y):
    args = X[train_index], X[test_index], y[train_index], y[test_index]
    
    score['Logistic Regression'].append(get_score(LogisticRegression(solver='newton-cg'), *args))
    score['Support Vector Classifier'].append(get_score(SVC(), *args))
    score['Random Forest Classifier'].append(get_score(RandomForestClassifier(), *args))

In [4]:
score

{'Logistic Regression': [0.9248747913188647,
  0.9382303839732888,
  0.9232053422370617],
 'Support Vector Classifier': [0.9649415692821369,
  0.9799666110183639,
  0.9649415692821369],
 'Random Forest Classifier': [0.9348914858096828,
  0.9632721202003339,
  0.9265442404006677]}

### Easier method to do the same

In [5]:
scores = {
    'lr': [],
    'svm': [],
    'rfc': []
}

from sklearn.model_selection import cross_val_score
scores['lr'].append(cross_val_score(LogisticRegression(solver='newton-cg'), X, y))
scores['svm'].append(cross_val_score(SVC(C=100), X, y))
scores['rfc'].append(cross_val_score(RandomForestClassifier(n_estimators=500), X, y))

In [6]:
for key in scores:
    values = scores[key]
    print(f"{key} : {values[0].mean()}")

lr : 0.9143160012380068
svm : 0.9738502011761063
rfc : 0.9393624264933458
