# Classification Algorithm (Cross Validation)

In [10]:
from sklearn import datasets
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

iris = datasets.load_iris()
irisDF = pd.DataFrame(iris.data, columns = iris.feature_names)
irisDF['target'] = iris.target
print(irisDF.shape)
print(irisDF.head(5))

(150, 5)
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  


In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(irisDF.drop('target', axis=1), irisDF['target'],
                                                   test_size = 0.2, random_state = 1)


print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_train.shape)

(120, 4) (120,)
(30, 4) (120,)


# Support Vector Classification Approach

In [26]:
from sklearn.svm import SVC


folds = KFold(n_splits = 4, shuffle = True, random_state = 1)


model = SVR()
model.fit(X_train, Y_train)

crossValScore = cross_val_score(model, X_train, Y_train, cv = folds, scoring = 'neg_mean_squared_error')
print('Cross Validation Score with MSE in Training Set: {}'.format(crossValScore.mean()))
crossValScore = cross_val_score(model, X_train, Y_train, cv = folds, scoring = 'r2')
print('Cross Validation Score with R^2 in Training Set: {}'.format(crossValScore.mean()))
print()

crossValScore = cross_val_score(model, X_test, Y_test, cv = folds, scoring = 'neg_mean_squared_error')
print('Cross Validation Score with MSE in Test Set: {}'.format(crossValScore.mean()))
crossValScore = cross_val_score(model, X_test, Y_test, cv = folds, scoring = 'r2')
print('Cross Validation Score with R^2 in Test Set: {}'.format(crossValScore.mean()))
print()

Cross Validation Score with MSE in Training Set: -0.040630256664548815
Cross Validation Score with R^2 in Training Set: 0.9397837060155332

Cross Validation Score with MSE in Test Set: -0.05514936704140071
Cross Validation Score with R^2 in Test Set: 0.876211644516768



# K Nearest Neighbor

In [23]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier()
model.fit(X_train, Y_train)

crossValScore = cross_val_score(model, X_train, Y_train, cv = folds, scoring = 'neg_mean_squared_error')
print('Cross Validation Score with MSE in Training Set: {}'.format(crossValScore.mean()))
crossValScore = cross_val_score(model, X_train, Y_train, cv = folds, scoring = 'r2')
print('Cross Validation Score with R^2 in Training Set: {}'.format(crossValScore.mean()))
print()

crossValScore = cross_val_score(model, X_test, Y_test, cv = folds, scoring = 'neg_mean_squared_error')
print('Cross Validation Score with MSE in Test Set: {}'.format(crossValScore.mean()))
crossValScore = cross_val_score(model, X_test, Y_test, cv = folds, scoring = 'r2')
print('Cross Validation Score with R^2 in Test Set: {}'.format(crossValScore.mean()))
print()

Cross Validation Score with MSE in Training Set: -0.058333333333333334
Cross Validation Score with R^2 in Training Set: 0.91322507836753

Cross Validation Score with MSE in Test Set: -0.03571428571428571
Cross Validation Score with R^2 in Test Set: 0.9485294117647058



# Decision Tree

In [24]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeRegressor()
model.fit(X_train, Y_train)

crossValScore = cross_val_score(model, X_train, Y_train, cv = folds, scoring = 'neg_mean_squared_error')
print('Cross Validation Score with MSE in Training Set: {}'.format(crossValScore.mean()))
crossValScore = cross_val_score(model, X_train, Y_train, cv = folds, scoring = 'r2')
print('Cross Validation Score with R^2 in Training Set: {}'.format(crossValScore.mean()))
print()

crossValScore = cross_val_score(model, X_test, Y_test, cv = folds, scoring = 'neg_mean_squared_error')
print('Cross Validation Score with MSE in Test Set: {}'.format(crossValScore.mean()))
crossValScore = cross_val_score(model, X_test, Y_test, cv = folds, scoring = 'r2')
print('Cross Validation Score with R^2 in Test Set: {}'.format(crossValScore.mean()))
print()

Cross Validation Score with MSE in Training Set: -0.06666666666666667
Cross Validation Score with R^2 in Training Set: 0.9498516279327749

Cross Validation Score with MSE in Test Set: -0.03571428571428571
Cross Validation Score with R^2 in Test Set: 0.9485294117647058



# Random Forest

In [19]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X_train, Y_train)

crossValScore = cross_val_score(model, X_train, Y_train, cv = folds, scoring = 'neg_mean_squared_error')
print('Cross Validation Score with MSE in Training Set: {}'.format(crossValScore.mean()))
crossValScore = cross_val_score(model, X_train, Y_train, cv = folds, scoring = 'r2')
print('Cross Validation Score with R^2 in Training Set: {}'.format(crossValScore.mean()))
print()

crossValScore = cross_val_score(model, X_test, Y_test, cv = folds, scoring = 'neg_mean_squared_error')
print('Cross Validation Score with MSE in Test Set: {}'.format(crossValScore.mean()))
crossValScore = cross_val_score(model, X_test, Y_test, cv = folds, scoring = 'r2')
print('Cross Validation Score with R^2 in Test Set: {}'.format(crossValScore.mean()))
print()

Cross Validation Score with MSE in Training Set: -0.04166666666666667
Cross Validation Score with R^2 in Training Set: 0.9259961693875534

Cross Validation Score with MSE in Test Set: -0.03571428571428571
Cross Validation Score with R^2 in Test Set: 0.8972473604826545



# XGBoost [not implemented]

# Perceptron

In [22]:
from sklearn.linear_model import Perceptron

model = Perceptron()
model.fit(X_train, Y_train)

from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X_train, Y_train)

crossValScore = cross_val_score(model, X_train, Y_train, cv = folds, scoring = 'neg_mean_squared_error')
print('Cross Validation Score with MSE in Training Set: {}'.format(crossValScore.mean()))
crossValScore = cross_val_score(model, X_train, Y_train, cv = folds, scoring = 'r2')
print('Cross Validation Score with R^2 in Training Set: {}'.format(crossValScore.mean()))
print()

crossValScore = cross_val_score(model, X_test, Y_test, cv = folds, scoring = 'neg_mean_squared_error')
print('Cross Validation Score with MSE in Test Set: {}'.format(crossValScore.mean()))
crossValScore = cross_val_score(model, X_test, Y_test, cv = folds, scoring = 'r2')
print('Cross Validation Score with R^2 in Test Set: {}'.format(crossValScore.mean()))
print()

Cross Validation Score with MSE in Training Set: -0.04166666666666667
Cross Validation Score with R^2 in Training Set: 0.9134122767701036

Cross Validation Score with MSE in Test Set: -0.06696428571428571
Cross Validation Score with R^2 in Test Set: 0.8972473604826545

