# Using multiple classification model 

In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
iris=load_iris()

In [2]:
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(iris.data, iris.target, test_size=0.3)

In [4]:
lr=LogisticRegression(solver='liblinear',multi_class='ovr')
lr.fit(X_train, y_train)
lr.score(X_test, y_test)

0.9333333333333333

In [5]:
svm=SVC(gamma='auto')
svm.fit(X_train, y_train)
svm.score(X_test, y_test)

1.0

In [6]:
rf=RandomForestClassifier(n_estimators=10)
rf.fit(X_train, y_train)
rf.score(X_test, y_test)

1.0

In [7]:
from sklearn.model_selection import KFold
kf=KFold(n_splits=3)   #splitting the data into three folds and in each fold there will be random train&test samples.
kf

KFold(n_splits=3, random_state=None, shuffle=False)

In [8]:
for train_index, test_index in kf.split(iris.data, iris.target):     #there 150 datas in sklearn of iris flower
   print(train_index, test_index)                                    #each data contains petal, sepal width and length

[ 50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67
  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85
  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102 103
 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
 140 141 142 143 144 145 146 147 148 149] [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49]
[  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49 100 101 102 103
 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
 140 141 142 143 144 145 146 147 148 149] [50 51 52 53 54 55 56 57 58 59 6

In [10]:
def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)
 

In [11]:
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits=3)        # "StratifiedKFold" ensures that each fold must have same proportionof data

scores_logistic = []
scores_svm = []
scores_rf = []

for train_index, test_index in folds.split(iris.data,iris.target):
    X_train, X_test, y_train, y_test = iris.data[train_index], iris.data[test_index],iris.target[train_index], iris.target[test_index]
    scores_logistic.append(get_score(LogisticRegression(solver='liblinear',multi_class='ovr'), X_train, X_test, y_train, y_test))  
    scores_svm.append(get_score(SVC(gamma='auto'), X_train, X_test, y_train, y_test))
    scores_rf.append(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))

In [12]:
scores_logistic

[0.96, 0.96, 0.94]

In [13]:
scores_svm

[0.98, 0.98, 0.96]

In [14]:
scores_rf

[0.98, 0.94, 0.96]

## Doing same thing using cross validation score

In [15]:
from sklearn.model_selection import cross_val_score # No loop recquired

In [16]:
l_scores=cross_val_score(LogisticRegression(), iris.data, iris.target, cv=3)
l_scores

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


array([0.98, 0.96, 0.98])

In [17]:
svm_score=cross_val_score(SVC(), iris.data, iris.target, cv=3)
svm_score

array([0.96, 0.98, 0.94])

In [18]:
rf_scores=cross_val_score(RandomForestClassifier(), iris.data, iris.target, cv=3)

In [19]:
rf_scores

array([0.98, 0.94, 0.94])

In [20]:
import numpy as np
np.average(l_scores)   #Logistic regression classifier giving the best result

0.9733333333333333

In [21]:
np.average(svm_score)

0.96

In [22]:
np.average(rf_scores)

0.9533333333333333