In [4]:
import pandas as pd
import numpy as np
from sklearn import model_selection,neighbors,metrics
from scipy import stats

In [6]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
df = pd.read_csv(url, header=None)

In [7]:
k = 10
cv = model_selection.StratifiedKFold(n_splits=k)

In [9]:
data = df.values
X = data[:,:-1]
Y = data[:,-1]

In [12]:
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X,Y,test_size=.333,random_state=0)

In [16]:
fold = 0
for train, test in cv.split(X_train,Y_train):
    print("Fold %i (%i w TS, %i w VS)" % (fold, len(train), len(test)))
    fold +=1

Fold 0 (88 w TS, 12 w VS)
Fold 1 (89 w TS, 11 w VS)
Fold 2 (89 w TS, 11 w VS)
Fold 3 (89 w TS, 11 w VS)
Fold 4 (90 w TS, 10 w VS)
Fold 5 (91 w TS, 9 w VS)
Fold 6 (91 w TS, 9 w VS)
Fold 7 (91 w TS, 9 w VS)
Fold 8 (91 w TS, 9 w VS)
Fold 9 (91 w TS, 9 w VS)


In [17]:
clf = neighbors.KNeighborsClassifier()
clf.fit(X_train,Y_train)
score = clf.score(X_test,Y_test)

In [19]:
support_vector = clf.predict_proba(X_test)
prediction = np.argmax(support_vector,axis = 1)
prediction = clf.classes_[prediction]

In [20]:
confusion_matrix = metrics.confusion_matrix(Y_test, prediction)
print(confusion_matrix)
accuracy = metrics.accuracy_score(Y_test,prediction)
print(accuracy)

[[16  0  0]
 [ 0 18  1]
 [ 0  0 15]]
0.98


In [21]:
accuracies = []
for train, test in cv.split(X, Y):
    X_train, Y_train = X[train], Y[train]
    X_test, Y_test = X[test], Y[test]

    clf.fit(X_train, Y_train)
    probas = clf.predict_proba(X_test)
    prediction = np.argmax(probas,axis = 1)
    prediction = clf.classes_[prediction]
    accuracy = metrics.accuracy_score(Y_test, prediction)
    accuracies.append(accuracy)

print(accuracies)

[1.0, 0.9333333333333333, 1.0, 1.0, 0.8666666666666667, 0.9333333333333333, 0.9333333333333333, 1.0, 1.0, 1.0]


In [22]:
mean_accuracy = np.mean(accuracies)
std_accuracy = np.std(accuracies)
print("Średnia dokładność %.3f (+- %.2f)" % (mean_accuracy, std_accuracy))

Średnia dokładność 0.967 (+- 0.04)


In [23]:
clfs = [
    neighbors.KNeighborsClassifier(n_neighbors = 1),
    neighbors.KNeighborsClassifier(n_neighbors = 5)
       ]
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
df = pd.read_csv(url, header=None)
data = df.values
X = data[:,:-1]
y = data[:,-1]

In [26]:
k = 10
cv = model_selection.StratifiedKFold(n_splits=k)

results = []
for train, test in cv.split(X,y):
    X_train, y_train = X[train], y[train]
    X_test, y_test = X[test], y[test]
    result = []
    for clf in clfs:
        clf.fit(X_train, y_train)
        probas = clf.predict_proba(X_test)
        prediction = np.argmax(probas,axis = 1)
        prediction = clf.classes_[prediction]
        accuracy = metrics.accuracy_score(y_test, prediction),
        result.append(accuracy[0])
    results.append(result)
results = np.array(results)

print(results)

test_t = stats.ttest_ind(results[:,0],results[:,1])
test_w = stats.wilcoxon(results[:,0],results[:,1])

[[1.         1.        ]
 [0.93333333 0.93333333]
 [1.         1.        ]
 [0.93333333 1.        ]
 [0.86666667 0.86666667]
 [1.         0.93333333]
 [0.86666667 0.93333333]
 [1.         1.        ]
 [1.         1.        ]
 [1.         1.        ]]




In [27]:
print(np.mean(results,axis=0))
print('Test_t: ',test_t)
print('Test_w: ',test_w)

[0.96       0.96666667]
Test_t:  Ttest_indResult(statistic=-0.2873478855663521, pvalue=0.7771278487505224)
Test_w:  WilcoxonResult(statistic=2.0, pvalue=0.5637028616507731)
