## Use iris flower dataset from sklearn library and use cross_val_score against following
models to measure the performance of each. In the end figure out the model with best performance,
# 1. Logistic Regression
# 2. SVM
# 3. Decision Tree
# 4. Random Forest


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn import datasets
iris = datasets.load_iris()

In [3]:
df = pd.DataFrame(iris.data , columns=iris.feature_names)
df['Flower'] = iris.target
df['Flower']=df['Flower'].apply(lambda x : iris.target_names[x])

In [4]:
x = df.iloc[:,0:4]
y = df.iloc[:,-1]

In [5]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),Flower
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [6]:
from sklearn.model_selection import train_test_split
x_train ,  x_test ,y_train ,y_test = train_test_split(x,y,test_size=0.3)

In [7]:
from sklearn import svm

In [23]:
model = svm.SVC(kernel='rbf',C=30,gamma='auto')
model.fit(x_train,y_train)
model.score(x_test,y_test)

0.8888888888888888

In [24]:
from sklearn.model_selection import cross_val_score

In [25]:
cvs = cross_val_score
cvs

<function sklearn.model_selection._validation.cross_val_score(estimator, X, y=None, *, groups=None, scoring=None, cv=None, n_jobs=None, verbose=0, fit_params=None, params=None, pre_dispatch='2*n_jobs', error_score=nan)>

In [26]:
cvs(svm.SVC(kernel='linear' , C=10, gamma='auto'),x_train, y_train,cv=5)

array([1.        , 1.        , 0.95238095, 0.95238095, 0.85714286])

In [27]:
cvs(svm.SVC(kernel='rbf' , C=10, gamma='auto'),x_train, y_train,cv=5)

array([1.        , 1.        , 0.95238095, 0.95238095, 0.85714286])

In [28]:
cvs(svm.SVC(kernel='rbf' , C=20, gamma='auto'),x_train, y_train,cv=5)

array([1.        , 1.        , 0.95238095, 0.95238095, 0.85714286])

In [32]:
kernels = ['rbf' , 'linear']
C = [1,10,20]
avg_scores = {}
for kval in kernels:
    for cval in C:
        cv_scores = cvs(svm.SVC(kernel=kval, C=cval , gamma='auto'),x_train , y_train,cv=5)
        avg_scores[kval + '_' + str(cval)] = np.average(cv_scores)
        
         
avg_scores      

{'rbf_1': np.float64(0.9428571428571428),
 'rbf_10': np.float64(0.9523809523809523),
 'rbf_20': np.float64(0.9523809523809523),
 'linear_1': np.float64(0.961904761904762),
 'linear_10': np.float64(0.9523809523809523),
 'linear_20': np.float64(0.9428571428571427)}

In [33]:
from sklearn.model_selection import GridSearchCV

In [36]:
clf = GridSearchCV(svm.SVC(gamma='auto'),{
    'C':[1,10,20],
    'kernel':['rbf','linear']
},cv=5,return_train_score=False)

clf.fit(x_train , y_train)
clf.cv_results_

{'mean_fit_time': array([0.00270195, 0.00893207, 0.00204277, 0.00376163, 0.00323091,
        0.00358143]),
 'std_fit_time': array([0.0023374 , 0.00805177, 0.00269708, 0.00463722, 0.0019299 ,
        0.003756  ]),
 'mean_score_time': array([0.01300745, 0.00470142, 0.00222802, 0.00130343, 0.00193815,
        0.00238528]),
 'std_score_time': array([0.02126774, 0.00308032, 0.00314559, 0.00260687, 0.00262005,
        0.0027784 ]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value=999999),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value=np.str_('?'),
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'kernel': 'rbf'},
  {'C': 20, 'kernel': 'linear'}],
 'split0_test_s

In [37]:
df = pd.DataFrame(clf.cv_results_)

In [38]:
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.002702,0.002337,0.013007,0.021268,1,rbf,"{'C': 1, 'kernel': 'rbf'}",1.0,0.952381,0.904762,0.952381,0.904762,0.942857,0.035635,5
1,0.008932,0.008052,0.004701,0.00308,1,linear,"{'C': 1, 'kernel': 'linear'}",1.0,0.952381,1.0,0.952381,0.904762,0.961905,0.035635,1
2,0.002043,0.002697,0.002228,0.003146,10,rbf,"{'C': 10, 'kernel': 'rbf'}",1.0,1.0,0.952381,0.952381,0.857143,0.952381,0.052164,2
3,0.003762,0.004637,0.001303,0.002607,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.952381,0.952381,0.857143,0.952381,0.052164,2
4,0.003231,0.00193,0.001938,0.00262,20,rbf,"{'C': 20, 'kernel': 'rbf'}",1.0,1.0,0.952381,0.952381,0.857143,0.952381,0.052164,2
5,0.003581,0.003756,0.002385,0.002778,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,0.952381,0.952381,0.952381,0.857143,0.942857,0.046657,6
