In [1]:
import pandas as pd

df = pd.read_csv("./iris.csv")
df.head() # show head part of dataset

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [2]:
data = df.to_numpy()

In [3]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(data, test_size=0.4)
print(train.shape)
print(test.shape)


(90, 5)
(60, 5)


In [4]:
# modify the datasets
train_x = train[:, :4]
train_y = train[:, 4]

test_x = test[:, :4]
test_y = test[:, 4]

In [5]:
# implement some models
# KNN
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=7)

knn.fit(train_x, train_y) # training
print('Completed')

Completed


In [6]:
pred = knn.predict(test_x) # store the prediction in pred

In [7]:
for i in range(test.shape[0]):
    if pred[i] != test_y[i]:
        print(pred[i], test_y[i])

Virginica Versicolor
Virginica Versicolor


In [8]:
# evaluate the results
from sklearn.metrics import accuracy_score, f1_score, precision_score

acc = accuracy_score(y_pred=pred, y_true=test_y)
f1 = f1_score(y_pred=pred, y_true=test_y, average='weighted') # average for multi-class
pres = precision_score(y_pred=pred, y_true=test_y, average='weighted')

print("Accuracy = %.5f, F1 = %.5f, Precision = %.5f" % (acc, f1, pres))

Accuracy = 0.96667, F1 = 0.96698, Precision = 0.97037


In [9]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

In [10]:
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression

bg = BaggingClassifier(n_estimators=10) # add base_estimator
bg.fit(train_x, train_y)

pred = bg.predict(test_x)

acc = accuracy_score(y_pred=pred, y_true=test_y)
f1 = f1_score(y_pred=pred, y_true=test_y, average='weighted') # average for multi-class
pres = precision_score(y_pred=pred, y_true=test_y, average='weighted')

print("Accuracy = %.5f, F1 = %.5f, Precision = %.5f" % (acc, f1, pres))

Accuracy = 0.95000, F1 = 0.95027, Precision = 0.95129


In [11]:
from sklearn.ensemble import AdaBoostClassifier

ab = AdaBoostClassifier(n_estimators=20, learning_rate=0.2)
ab.fit(train_x, train_y)

pred = ab.predict(test_x)

acc = accuracy_score(y_pred=pred, y_true=test_y)
f1 = f1_score(y_pred=pred, y_true=test_y, average='weighted') # average for multi-class
pres = precision_score(y_pred=pred, y_true=test_y, average='weighted')

print("Accuracy = %.5f, F1 = %.5f, Precision = %.5f" % (acc, f1, pres))

Accuracy = 0.90000, F1 = 0.89643, Precision = 0.90556


In [12]:
from sklearn.ensemble import StackingClassifier

estimators = [
    ('knn', KNeighborsClassifier(n_neighbors=3)),
    ('dt', DecisionTreeClassifier()),
    ('svm', SVC())
]
st = StackingClassifier(estimators)
st.fit(train_x, train_y)

pred = st.predict(test_x)

acc = accuracy_score(y_pred=pred, y_true=test_y)
f1 = f1_score(y_pred=pred, y_true=test_y, average='weighted') # average for multi-class
pres = precision_score(y_pred=pred, y_true=test_y, average='weighted')

print("Accuracy = %.5f, F1 = %.5f, Precision = %.5f" % (acc, f1, pres))

Accuracy = 0.95000, F1 = 0.95027, Precision = 0.95129
