In [20]:
from PIL import Image
import numpy as np
import os

In [21]:
class Predictor:
    def __init__(self):
        self.w = None
        self.metric = None
        self.prev_metric = 0
        self.border = None
    
    def fit(self, class_1_arr, class_2_arr):
        self.w = (np.sum(class_1_arr, axis=0)*(1/len(class_1_arr)) - \
                  np.sum(class_2_arr, axis=0)*(1/len(class_2_arr)))
        self.metric = self.__calculate_metric(class_1_arr, class_2_arr)
        i = 0
                
        while abs(self.prev_metric - self.metric) > 0.0001:
            self.prev_metric = self.metric
            max_class_1 = class_1_arr[self.__find_max_index(class_1_arr)]
            min_class_2 = class_2_arr[self.__find_min_index(class_2_arr)]
            grad = (np.dot(self.w,self.w)*(max_class_1 - min_class_2) - \
                  np.dot(self.w,max_class_1 - min_class_2)*self.w)/(np.dot(self.w,self.w)**(3/2))
            h = 1/(i+1)**(2/3)
            i += 1
            self.w = self.__get_w()-h*grad
            self.metric = self.__calculate_metric(class_1_arr, class_2_arr)


        self.border = ((np.dot(self.w,class_2_arr[self.__find_max_index(class_2_arr)]) + \
                        np.dot(self.w,class_1_arr[self.__find_min_index(class_1_arr)]))/2)
                  
            
                  
    def predict(self,value):
        if np.dot(self.w,value) > self.border:
            print('class_1')
        else:
            print('class_2')
            
            
    def test_score(self,class_1_test, class_2_test):
        right_answers = 0
        for i in class_1_test:
            if np.dot(self.w,i) > self.border:
                right_answers += 1
        for i in class_2_test:
            if np.dot(self.w,i) < self.border:
                right_answers += 1
                
        print('Score:', right_answers*100/(len(class_1_test) + len(class_2_test)))
        
    def __find_min_index(self,vectors):
        _min = 10000
        i = 0
        min_index = 0
        for vector in vectors:
            res = np.dot(vector, self.w)/(np.dot( self.w, self.w)**(1/2))
            if res < _min:
                _min = res
                min_index = i
            i = i + 1
        return min_index
        
    def __find_max_index(self,vectors):
        _max = -10000
        i = 0
        max_index = 0
        for vector in vectors:
            res = np.dot(vector, self.w)/(np.dot( self.w, self.w)**(1/2))
            if res > _max:
                _max = res
                max_index = i
            i = i + 1
        return max_index
    
    def __calculate_metric(self,class_1_arr,class_2_arr):
          return np.dot(self.w,class_1_arr[self.__find_min_index(class_1_arr)])/(np.dot(self.w,self.w)**(1/2)) - \
                  np.dot(self.w,class_2_arr[self.__find_max_index(class_2_arr)])/(np.dot(self.w,self.w)**(1/2))
        
    def __get_w(self):
        return self.w.copy()

In [22]:
def load_imgs(folder):
    imgs = []
    for i in os.listdir(folder):
        im = np.asarray(Image.open(folder+'/'+i)).reshape(28*28)
        imgs.append(im)
    return np.asarray(imgs)

def normalize(arr):
    arr_res = arr.copy()
    for i in range(len(arr)):
        for j in range(len(arr[i])):
            if arr[i,j]:
                arr_res[i,j] = (arr[i,j] - np.mean(arr[i]))/np.std(arr[i])
    return arr_res

In [23]:
train_1 = normalize(load_imgs('1_500'))
train_3 = normalize(load_imgs('3_500'))
test_1 = normalize(load_imgs('test1'))
test_3 = normalize(load_imgs('test3'))

In [24]:
clf = Predictor()
clf.fit(train_1,train_3)
clf.test_score(train_1,train_3)

Score: 87.5


In [25]:
clf.test_score(test_1,test_3)

Score: 92.36111111111111
