In [1]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [2]:
from sklearn.datasets import load_iris
iris = load_iris()

In [3]:
dir(iris)

['DESCR', 'data', 'feature_names', 'filename', 'target', 'target_names']

In [4]:
X = pd.DataFrame(iris.data, columns=iris.feature_names)

In [5]:
y = iris.target

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.3)

In [8]:
len(X_train)

105

In [9]:
len(X_test)

45

### Logistic Regression

In [10]:
lr = LogisticRegression(max_iter=10,solver='liblinear')
lr.fit(X_train,y_train)
lr.score(X_test,y_test)

0.9111111111111111

### SVM

In [11]:
svm = SVC()
svm.fit(X_train, y_train)
svm.score(X_test, y_test)

0.9777777777777777

### Decision Tree

In [12]:
dt = DecisionTreeClassifier(ccp_alpha=0.0)
dt.fit(X_train,y_train)
dt.score(X_test,y_test)

0.9555555555555556

### Random Forest

In [13]:
rf = RandomForestClassifier(n_estimators=40)
rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.9555555555555556

### K-Fold Validation

In [14]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=3)
kf

KFold(n_splits=3, random_state=None, shuffle=False)

In [15]:
def get_score(model,X_train,X_test,y_train,y_test):
    model.fit(X_train,y_train)
    return model.score(X_test,y_test)

In [16]:
get_score(LogisticRegression(solver='sag'),X_train,X_test,y_train,y_test)



0.9555555555555556

In [17]:
get_score(SVC(),X_train,X_test,y_train,y_test)

0.9777777777777777

### K-Fold Cross Validation

In [18]:
from sklearn.model_selection import cross_val_score

In [19]:
lr_score = cross_val_score(LogisticRegression(solver='liblinear'),X,y,cv=3)
lr_score

array([0.96, 0.96, 0.94])

In [20]:
np.average(lr_score)

0.9533333333333333

In [21]:
svc_score = cross_val_score(SVC(),X,y,cv=3)
svc_score

array([0.96, 0.98, 0.94])

In [22]:
np.average(svc_score)

0.96

In [23]:
dtc_score = cross_val_score(DecisionTreeClassifier(),X,y,cv=3)
dtc_score

array([0.98, 0.94, 1.  ])

In [24]:
np.average(dtc_score)

0.9733333333333333

In [25]:
rfc_score = cross_val_score(RandomForestClassifier(n_estimators=20),X,y,cv=3)
rfc_score

array([0.98, 0.94, 0.96])

In [26]:
np.average(rfc_score)

0.96