In [44]:
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import copy
import random
from mnist import MNIST


In [45]:
x_train_dir = './data'
mndata = MNIST(x_train_dir)
mndata.gz = True

X, y = mndata.load_training()

In [4]:
X = np.array(X)

In [5]:
record_num, features_num = X.shape
print( record_num, features_num)

60000 784


In [6]:
def scale_data(X):
    X = X / 255
    
    return X

In [7]:
X = scale_data(X)

In [8]:
bias_f = np.ones((record_num, 1))
X = np.hstack((bias_f, X))

record_num, features_num = X.shape
print( record_num, features_num)

60000 785


In [10]:
# ДАННЫЕ ГОТОВЫ

In [11]:
def dot_product(features, weights):
    inner_sum = np.sum(features * weights)
        
    return inner_sum

In [13]:
def init_weights(features_num, a, b):
    return np.array(a + (b - a) * np.random.random(features_num))

In [14]:
def num_gradient_st(loss_fun, predict_fun, X, y, model_weights, w_delta=0.01, lambd=1):
    i = np.random.randint(0, len(X) - 1)
    current_loss = loss_fun(predict_fun(X[i], model_weights), y[i], model_weights)
    weights_delta = model_weights[:]
    grad = []
    
    for coord in range(len(model_weights)):
        weights_delta[coord] += w_delta
        delta_loss = loss_fun(predict_fun(X[i], weights_delta), y[i], weights_delta)
        deriv = (delta_loss - current_loss) / w_delta
        grad.append(deriv)
        weights_delta[coord] -= w_delta
        
    return np.array(grad)

In [15]:
def gradient_descent(loss_fun, predict_fun, X, y, initial_weights, learning_rate, iter_num, verbose=True, lambd=1):
    model_weights = initial_weights[:]
    
    for counter in range(iter_num):
        grad = num_gradient_st(loss_fun, predict_fun, X, y, model_weights)
        model_weights -= learning_rate * grad
        #loss = loss_fun(X, y, model_weights)
        
        #if verbose:
        #    print("Iter: %i, loss_value: %f" % (counter, loss))
        
    return model_weights

In [16]:
def one_vs_all(y, value):
    y_new = []
    for i in range(len(y)):
        if y[i] == value:
            y_new.append(1)
        else:
            y_new.append(-1)
    return y_new

In [18]:
def svm_predict(features, weights):
    score = dot_product(features, weights)
    
    return score

In [19]:
def my_loss(predicted_value, true_value, model_weights):
    result = np.sum(model_weights * model_weights)/100000 + max(0.0, 1.0 - predicted_value * true_value)
        
    return result

In [20]:
def one_vs_all(y, value):
    y_new = []
    for i in range(len(y)):
        if y[i] == value:
            y_new.append(1)
        else:
            y_new.append(-1)
    return y_new

In [21]:
def find_10_weights(X, y_bin, learning_rate=0.1, iter_num=100, gradient_descent=gradient_descent, lambd=1):
    weights_vector = []
    for yi in y_bin:
        initial_weights = init_weights(features_num, -0.5, 0.5)
        optimal_weights_i = gradient_descent(my_loss, svm_predict, X, yi, initial_weights, learning_rate, iter_num, lambd)
        weights_vector.append(optimal_weights_i)
    return weights_vector
        
        
    
    
    

In [22]:
def y_split(y):
    y_10 = []
    for i in range(10):
        yi = one_vs_all(y, i)
        y_10.append(yi)
    return np.array(y_10)

In [39]:
def last_predict(X, weights_vector):
    predict_y = []
    for elem in X:
        score = []
        for weights in weights_vector:
            score.append(np.sum(elem*weights))
        score = np.array(score)
        i, = np.where(score == np.max(score))
        #if int(i) == 8:
         #   if score[9] / score[8] > 1/1.5:
          #      i = 9
        predict_y.append(int(i))
    return predict_y

In [25]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=1)


In [26]:
y_train_binary = y_split(y_train)

In [27]:
y_val_binary = y_split(y_val)

In [28]:
%%time
opt_weights_vector350000 = find_10_weights(X_train, y_train_binary, iter_num = 350000, learning_rate=0.01)

Wall time: 8h 44min 9s


In [29]:
np.save('model_weights350000.npy', opt_weights_vector350000, allow_pickle=False)

In [35]:
np.save('model_weights.npy', opt_weights_vector350000, allow_pickle=False)

In [42]:
predicted_value_train = last_predict(X, opt_weights_vector350000)

In [43]:
print(classification_report(np.array(y), predicted_value_train))

             precision    recall  f1-score   support

          0       0.96      0.97      0.97      5923
          1       0.92      0.98      0.95      6742
          2       0.91      0.90      0.91      5958
          3       0.95      0.83      0.89      6131
          4       0.89      0.95      0.92      5842
          5       0.83      0.90      0.86      5421
          6       0.96      0.94      0.95      5918
          7       0.93      0.94      0.93      6265
          8       0.88      0.85      0.86      5851
          9       0.90      0.86      0.88      5949

avg / total       0.91      0.91      0.91     60000



In [40]:
predicted_value_val = last_predict(X_val, opt_weights_vector350000)

In [41]:
print(classification_report(np.array(y_val), predicted_value_val))

             precision    recall  f1-score   support

          0       0.96      0.97      0.96      1158
          1       0.91      0.98      0.95      1373
          2       0.91      0.89      0.90      1194
          3       0.95      0.81      0.88      1237
          4       0.86      0.95      0.90      1157
          5       0.81      0.90      0.85      1110
          6       0.96      0.93      0.94      1187
          7       0.92      0.93      0.93      1265
          8       0.85      0.82      0.83      1172
          9       0.90      0.85      0.87      1147

avg / total       0.91      0.90      0.90     12000

