In [1]:
import pandas as pd
import numpy as np

In [2]:
test_file_path = './datasets/test.csv'
train_file_path = './datasets/train.csv'
test_df = pd.read_csv(test_file_path)
train_df = pd.read_csv(train_file_path)

# Remove the first column
np_train_df = train_df.to_numpy()[:, 1:]
np_test_df = test_df.to_numpy()[:, 1:]

# CONSTRAINTS
DIGIT = 0
FIRST_PIXEL = 1

In [3]:
def pos_pixel(size, i ,j):
  return i*size + j

def symmetry(image):
  size = 28
  sv = 0
  sh = 0
  
  for i in range(size):
    for j in range(int(size/2)):
      p1 = image[pos_pixel(size, i, j)]
      p2 = image[pos_pixel(size, i, (size-1)-j)]
      sv += abs(p1 - p2)
            
      p1 = image[pos_pixel(size, j, i)]
      p2 = image[pos_pixel(size, (size-1)-j, i)]
      sh += abs(p1 - p2)

  sv = sv/255
  sh = sh/255

  s = sv + sh
  
  return s
  
def intensity(image):
  return (np.sum(image)/255.0)


def get_numbers_1_and_5(df):
  filtered_data = []

  for item in df:
    if(item[0] == 1 or item[0] == 5):
      filtered_data.append(item.tolist())

  return filtered_data

def add_1_column(X):
  ones_list = map(lambda i: [1, i[0], i[1]], X)
  return np.array(list(ones_list))
  

In [4]:
new_train_df = []
images = get_numbers_1_and_5(np_train_df)

for image in images:
  symmetry_ = symmetry(image[FIRST_PIXEL:])
  intensity_ = intensity(image[FIRST_PIXEL:])

  label = -1 if image[DIGIT] == 1 else 1

  new_train_df.append([label, intensity_, symmetry_])

new_train_df = np.array(new_train_df)

In [5]:
new_test_df = []
images = get_numbers_1_and_5(np_test_df)

for image in images:
  symmetry_ = symmetry(image[FIRST_PIXEL:])
  intensity_ = intensity(image[FIRST_PIXEL:])

  label = -1 if image[DIGIT] == 1 else 1

  new_test_df.append([label, intensity_, symmetry_,])

new_test_df = np.array(new_test_df)

In [26]:
class Perceptron:
  def __init__(self, max_iter):
    self.max_iter = max_iter

  def fit(self, _X, _y):
    dimension = len(_X[0])
    self.w = 2 * np.random.random(size=dimension) - 1
    self.best_error = dimension
    best_w = self.w
    
    for i in range(self.max_iter):

      for x_n, y_n in zip(_X, _y):
        y_pred = np.sign(np.dot(x_n[1:], self.w[1:]) + (x_n[0] * self.w[0]))

        if y_pred != y_n:
          self.w = self.w + x_n * y_n
          error = self.__error_in(zip(_X, _y))
          if self.best_error > error:
            self.best_error = error
            best = self.w
      
      self.w = best_w

  def predict(self, x_test, first_digit, second_digit):
    y_pred = np.sign(np.dot(x_test, self.w))
    return np.where(y_pred == -1.0, first_digit, second_digit)

  def get_weights(self):
    return self.w[1:]

  def get_bias(self):
    return self.w[0]

  # Private

  def __error_in(self, points):
    error = 0

    for x_n, y_n in points:
      y_pred = np.sign(np.dot(x_n, self.w))
      error += 1 if y_pred != y_n else 0

    return error

In [None]:
def PLA_predict(X_test,w):

    preds = np.sign(np.dot(X_test,w))

    return np.where(preds == 1.0, 1, 5)

In [27]:
# PERCEPTRON

pla = Perceptron(1)
pla.fit(add_1_column(new_train_df[:, 1:]), new_train_df[:, 0])


In [28]:
pla.get_weights()

array([-0.0679487 ,  0.76251975])

In [29]:
pla.get_bias()

0.3822019509301895

In [13]:
type(pla.w)

numpy.ndarray