In [134]:
import cv2
import numpy as np
import pandas as pd

### 1. Загрузка данных

In [80]:
train = np.loadtxt(r"D:\НЕТОЛОГИЯ\Comp Vision\файлы\train.csv", delimiter=',', skiprows=1)
test = np.loadtxt(r"D:\НЕТОЛОГИЯ\Comp Vision\файлы\test.csv", delimiter=',', skiprows=1)

In [118]:
# Выделяме массив с классами
train_label = train[:, 0]

# Переводим вектора объектов в матрицу черно-белого изображения размером 28х28 пикс.
train_img = np.resize(train[:, 1:], (train.shape[0], 28, 28))
test_img = np.resize(test, (test.shape[0], 28, 28))

### 2. Гистограммы градиентов

In [120]:
# Вычисление горизонатльной и вертикальной компонент градиента через оператор Собеля
train_sobel_x = np.zeros_like(train_img)
train_sobel_y = np.zeros_like(train_img)
for i in range(len(train_img)):
    train_sobel_x[i] = cv2.Sobel(train_img[i], cv2.CV_64F, dx=1, dy=0, ksize=3)
    train_sobel_y[i] = cv2.Sobel(train_img[i], cv2.CV_64F, dx=0, dy=1, ksize=3)

test_sobel_x = np.zeros_like(test_img)
test_sobel_y = np.zeros_like(test_img)
for i in range(len(test_img)):
    test_sobel_x[i] = cv2.Sobel(test_img[i], cv2.CV_64F, dx=1, dy=0, ksize=3)
    test_sobel_y[i] = cv2.Sobel(test_img[i], cv2.CV_64F, dx=0, dy=1, ksize=3)

In [122]:
# Вычисление угла и длины векторов градиентов
train_g, train_theta = cv2.cartToPolar(train_sobel_x, train_sobel_y)
test_g, test_theta = cv2.cartToPolar(test_sobel_x, test_sobel_y)

In [124]:
# Вычисление гистограмм градиентов
train_hist = np.zeros((len(train_img), 16))
for i in range(len(train_img)):
    hist, borders = np.histogram(train_theta[i],
                                 bins=16,
                                 range=(0., 2. * np.pi),
                                 weights=train_g[i])
    train_hist[i] = hist

test_hist = np.zeros((len(test_img), 16))
for i in range(len(test_img)):
    hist, borders = np.histogram(test_theta[i],
                                 bins=16,
                                 range=(0., 2. * np.pi),
                                 weights=test_g[i])
    test_hist[i] = hist

In [125]:
# Нормирование векторов гистограмм (по Евклидовому расстоянию)
train_hist = train_hist / np.linalg.norm(train_hist, axis=1)[:, None]
test_hist = test_hist / np.linalg.norm(test_hist, axis=1)[:, None]

### 3. Модель для предсказания

In [126]:
# Разбиваем выборку
from sklearn.model_selection import train_test_split

y_train, y_val, x_train, x_val = train_test_split(train_label, train_hist, test_size=0.2, random_state=42)

In [130]:
# Обучение модели случайного леса
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(max_depth = 20, min_samples_leaf = 2, min_samples_split = 5, random_state = 42)
rf.fit(x_train, y_train)

In [132]:
# Точность модели на валидационной и тестовой выборке
from sklearn.metrics import accuracy_score

pred_val = rf.predict(x_val)
acc_val = accuracy_score(y_val, pred_val)
print(f'Точность на вал. данных: {acc_val:.3f}')

Точность на вал. данных: 0.647


In [158]:
# Предсказание на тестовых данных
pred_test = rf.predict(test_hist)

df = pd.DataFrame({'ImageId': np.arange(1, len(pred_test) + 1, 1),
                  'Label': pred_test})

df['Label'] = df['Label'].astype(int)
df.to_csv(r"D:\НЕТОЛОГИЯ\Comp Vision\файлы\mnist_prediction.csv", index = False)