In [1]:
from sklearn import model_selection, ensemble, datasets, metrics
import numpy as np

In [2]:
def write_answer_to_file(answer, filename):
    print(answer)
    with open(filename, 'w') as f_out:
        f_out.write(str(answer))

In [3]:
digits = datasets.load_digits()
X_train, X_test, y_train, y_test = model_selection.train_test_split(digits.data, digits.target,
                                                                    test_size=0.25, shuffle=False)

In [4]:
class One_NN():
    def __init__(self):
        self._X = np.array([])
        self._y = np.array([])
        
    def train(self, X_tr, y_tr):
        assert(isinstance(X_tr, np.ndarray) and isinstance(y_tr, np.ndarray))
        self._X = X_tr
        self._y = y_tr
        
    def predict(self, vec):
        assert(isinstance(vec, np.ndarray))
        if len(vec.shape) == 1:
            return np.array([self._find_neighbor(vec)])
        else:
            prediction = []
            for v in vec:
                prediction += [self._find_neighbor(v)]
            return np.array(prediction)
               
    def _find_neighbor(self, vec):
        best_dist = np.inf
        answer = -1
        for ind, neighbor in enumerate(self._X):
            neigh_dist = self._dist(vec, neighbor)
            if neigh_dist < best_dist:
                best_dist = neigh_dist
                answer = self._y[ind]
        return answer
    
    def _dist(self, vec1, vec2):
        assert(len(vec1) == len(vec2))
        return ((vec1 - vec2)**2).sum()

In [5]:
estim = One_NN()
estim.train(X_train, y_train)
pred = estim.predict(X_test)
write_answer_to_file(1 - metrics.accuracy_score(y_test, pred), '1.txt')

0.0377777777777778


In [6]:
estim = ensemble.RandomForestClassifier(n_estimators=1000)
estim.fit(X_train, y_train)
pred = estim.predict(X_test)
write_answer_to_file(1 - metrics.accuracy_score(y_test, pred), '2.txt')

0.06222222222222218
