In [3]:
from sklearn import datasets # iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from random import randint

import random
import os

from copy import deepcopy
from math import ceil

import numpy as np
from matplotlib import pyplot as plt

# Data loading and its structure

In [4]:
iris = datasets.load_iris()
print("Dataset loaded into variable:\t", "iris")
print("\nIris keys:\t\t\t", list(iris.keys()) )

Dataset loaded into variable:	 iris

Iris keys:			 ['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename']


In [5]:
# Data info structure
print("Iris data example:\n\n", iris.data[0:4], " \n ...")
print("\nIris target example:\t\t", iris.target[0:4], "...")
print("\nIris target_names example:\t", iris.target_names[0:4])

Iris data example:

 [[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]]  
 ...

Iris target example:		 [0 0 0 0] ...

Iris target_names example:	 ['setosa' 'versicolor' 'virginica']


# Environment definition

In [6]:
print('Our environment:\t\t X, Y, l')
def start_env():
    global X, Y, l
    X = iris.data
    Y = iris.target
    l = len(iris.data)
    
def start_test_env(n):
    global X, Y, l
    l = n
    X, Y = X[:l], Y[:l]
    print('Starting test environment:\t\t X, Y,', n)
    
def end_test_env():
    start_env()
    print('Ending test environment...')
    
def testify(test, params, what, exp=''):
    print(test, ':\t\t\t\t', params, '\t', what, '\t', '->', exp)
    
start_env()

Our environment:		 X, Y, l


# List of models generation

In [7]:
knn_metrics = ['euclidean', 'manhattan', 'chebyshev']
knn_neightbors = range(1, 10)

models = [KNeighborsClassifier(n_neighbors=n, metric=m) for n in knn_neightbors for m in knn_metrics]
models_info = [['KNeighborsClassifier', n, m] for n in knn_neightbors for m in knn_metrics]

### Table of our models list

In [8]:
print('\tclassifier   neightbors  metric\n')
models_info

	classifier   neightbors  metric



[['KNeighborsClassifier', 1, 'euclidean'],
 ['KNeighborsClassifier', 1, 'manhattan'],
 ['KNeighborsClassifier', 1, 'chebyshev'],
 ['KNeighborsClassifier', 2, 'euclidean'],
 ['KNeighborsClassifier', 2, 'manhattan'],
 ['KNeighborsClassifier', 2, 'chebyshev'],
 ['KNeighborsClassifier', 3, 'euclidean'],
 ['KNeighborsClassifier', 3, 'manhattan'],
 ['KNeighborsClassifier', 3, 'chebyshev'],
 ['KNeighborsClassifier', 4, 'euclidean'],
 ['KNeighborsClassifier', 4, 'manhattan'],
 ['KNeighborsClassifier', 4, 'chebyshev'],
 ['KNeighborsClassifier', 5, 'euclidean'],
 ['KNeighborsClassifier', 5, 'manhattan'],
 ['KNeighborsClassifier', 5, 'chebyshev'],
 ['KNeighborsClassifier', 6, 'euclidean'],
 ['KNeighborsClassifier', 6, 'manhattan'],
 ['KNeighborsClassifier', 6, 'chebyshev'],
 ['KNeighborsClassifier', 7, 'euclidean'],
 ['KNeighborsClassifier', 7, 'manhattan'],
 ['KNeighborsClassifier', 7, 'chebyshev'],
 ['KNeighborsClassifier', 8, 'euclidean'],
 ['KNeighborsClassifier', 8, 'manhattan'],
 ['KNeighbo

### Example of learning and prediction

In [9]:
x = [[5.1, 3.5, 1.4, 0.2]]
print('a( %s ) = ...' % x[0])
models[0].fit(X, Y).predict(x)

a( [5.1, 3.5, 1.4, 0.2] ) = ...


array([0])

### For these purpose we need write fit-predict strean

In [10]:
def fit_predict(X, Y, x):
    global models
    return [model.fit(X, Y).predict(x) for model in models]

### And we need validate models 
It is ability to learn

In [11]:
# Model validation
x = [[5.1, 3.5, 1.4, 0.2]]
print('Result: we tested %d models of %d' % (len(fit_predict(X, Y, x*3)),  len(models)) )

Result: we tested 27 models of 27


# Making list of criterias

### For validation spliting into train:test define function

In [12]:
def three_d_split(l, r):
    """
    Split to tuple of 3 lists : [0, l) [l,r) [r,n)
    """
    a = {'X': X[0:l],  'Y': Y[0:l]}
    b = {'X': X[l:r],  'Y': Y[l:r]}
    c = {'X': X[r:],     'Y': Y[r:]}
    return a, b, c

def combine(a, c):
    ax, cx = a['X'], c['X']
    ay, cy = a['Y'], c['Y']
    
    combined = {'X': np.concatenate((ax, cx), axis=0), 'Y': np.concatenate((ay, cy), axis=0)}
    return combined

### Testing of 3d split

In [13]:
# Testing
start_test_env(10)
a, b, c = three_d_split(5, 7)
a, b, c = a['X'], b['X'], c['X']
print("three_d_split(5, 7):\t\t\t", len(a), len(b), len(c), (5, 2, 3))
end_test_env()

Starting test environment:		 X, Y, 10
three_d_split(5, 7):			 5 2 3 (5, 2, 3)
Ending test environment...


### Shuffle implementation

In [14]:
# -- Pasted
def __shuffle_int(n):
    lst = [x for x in range(n)]
    for _it in range(2*n):
        i, j = randint(0, n-1), randint(0, n-1)
        lst[i], lst[j] = lst[j], lst[i]
    return lst

def my_shuffle(lst):
    size = len(lst)
    if size == 0 or size==1:
        return lst

    indexes = __shuffle_int(size)
    return [lst[i] for i in indexes]

    
def shuffle_XY():
    combined = list(zip(X, Y))
    shuffled = my_shuffle(combined)
    X[:], Y[:] = zip(*shuffled)
    return X, Y

def q_from_size(size):
    return ceil(l/size)

### Generator definition

In [15]:
# Bosses
def generator_qfold(q):
    size = int(l/q)
    # q_it in [1, q]
    for q_it in range(1,q+1):
        a, b, c = three_d_split(q_it*size, max(q_it*(size+1), l) )
        yield combine(a, c), b

def generator_bootstrap(t):
    for _t in range(t):
        size = randint(1, l-1)
        index = randint(0, l-size)
        
        X, Y = shuffle_XY()
        a, b, c = three_d_split(index, index+size)
        yield combine(a, c), b
            
# Workes;)

def generator_ccv():
    for q in range(1, l):
        for generated in generator_qfold(q):
            yield generated
            
def generator_loo():
    return generator_qfold(l)
        
def generator_txq(t, q):
    for _t in range(t):
        X, Y = shuffle_XY()
        for generated in generator_qfold(q):
            yield generated
        
def generator_randomcv(t):
    for _t in range(t):        
        size = randint(1, l-1)
        q = int(l/size)
        for generated in generator_qfold(q):
            yield generated


### Testing

In [16]:
# Testing
start_test_env(10)
q = 2
t = 4
a, b, c = three_d_split(5, 7)
a, b, c = a['X'], b['X'], c['X']
testify('three_d_split', (5, 7), (len(a), len(b), len(c)), (5, 2, 3) )
end_test_env()

Starting test environment:		 X, Y, 10
three_d_split :				 (5, 7) 	 (5, 2, 3) 	 -> (5, 2, 3)
Ending test environment...


In [17]:
# X =np.arange(500).reshape(100, 5)
# print(X)
start_test_env(10)
t=3
q=4

def generators_reinit():
    global generators, generators_info
    _generator_qfold = generator_qfold(q)
    _generator_ccv = generator_ccv()
    _generator_bootstrap = generator_bootstrap(t)
    _generator_loo = generator_loo()
    _generator_txq = generator_txq(t, q)
    _generator_randomcv = generator_randomcv(t)
    generators = [_generator_qfold , _generator_ccv,
                  _generator_bootstrap, _generator_loo,
                  _generator_txq, _generator_randomcv]
    generators_info = [['qfold'], ['CCV'], 
                       ['bootstrap'], ['LOO'], 
                       ['txqfold'], ['random CV'] ]
    
_generator_qfold = generator_qfold(q)
_generator_ccv = generator_ccv()
_generator_bootstrap = generator_bootstrap(t)
_generator_loo = generator_loo()
_generator_txq = generator_txq(t, q)
_generator_randomcv = generator_randomcv(t)

generators_reinit()

def print_first(g, n):
    for _i in range(n):
        a = next(g)
        ax, ay = a[0]['X'], a[0]['Y']
        print(ax)
        ax, ay = a[1]['X'], a[1]['Y']
        print(ax, '\n\n')

# print_first(generator_txq(t, q), 12)

testify('gen_ccv', (), len([x for x in _generator_ccv]), 2**(l-2)-2*l)
testify('gen_qfold', (), len([x for x in _generator_qfold]), q)
testify('gen_bootstrap', (), len([x for x in _generator_bootstrap]), t)
testify('gen_loo', (), len([x for x in _generator_loo]), l)
testify('gen_txq', (), len([x for x in _generator_txq]), t*q)
testify('gen_randomcv', (), len([x for x in _generator_randomcv]), )

end_test_env()

Starting test environment:		 X, Y, 10


NameError: name '_generator_ccv' is not defined

# Learning

In [None]:
class Method:
    def __init__(self, model, generator=''):
        self.model = model
        if generator:
            self.generator = generator
        
    def fit_predict(self):
        for generated in self.generator:
            g = generated
            ax, ay = g[0]['X'], g[0]['Y']
            bx, by = g[1]['X'], g[1]['Y']
            yield self.__fit_predict(self.model, ax, ay, bx), by
    
    def fit_predict_count(self, generator=''):
        if generator:
            self.generator = generator
        res = 0.0
        times = 0
        for _y, y in self.fit_predict():
            sum = 0
            num = 0
            for _el, el in zip(_y, y):
                if(_el == el):
                    sum += 1
                num += 1
            sum /= num
            times += 1
            res += sum
        sum /= times
        return sum
    
    @staticmethod
    def __fit_predict(model, X, Y, x):
        _model = deepcopy(model)
        return _model.fit(X, Y).predict(x)

In [None]:
start_env()
method = Method(models[15], generator_qfold(q))

data = []
for m, mi in zip(models, range(len(models))):
    generators_reinit()
    for g, gi in zip(models, range(len(generators))):
        method = Method(m, g)
        data += models_info[mi] + generators_info[mi] + [method.fit_predict_count()]
        

end_test_env()