In [1]:
import os
import sys
import pickle
import numpy as np
import pandas as pd
from PIL import Image, ImageFilter
from tqdm import tqdm_notebook
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_squared_error, log_loss, confusion_matrix
import matplotlib.pyplot as plt

np.random.seed(100)
LEVEL = 'level_1'

In [2]:
class SigmoidNeuron:
  
  def __init__(self):
    self.w = None
    self.b = None
    
  def perceptron(self, x):
    return np.dot(x, self.w.T) + self.b
  
  def sigmoid(self, x):
    return 1.0/(1.0 + np.exp(-x))
  
  def grad_w_mse(self, x, y):
    y_pred = self.sigmoid(self.perceptron(x))
    return (y_pred - y) * y_pred * (1 - y_pred) * x
  
  def grad_b_mse(self, x, y):
    y_pred = self.sigmoid(self.perceptron(x))
    return (y_pred - y) * y_pred * (1 - y_pred)
  
  def grad_w_ce(self, x, y):
    y_pred = self.sigmoid(self.perceptron(x))
    if y == 0:
      return y_pred * x
    elif y == 1:
      return -1 * (1 - y_pred) * x
    else:
      raise ValueError("y should be 0 or 1")
    
  def grad_b_ce(self, x, y):
    y_pred = self.sigmoid(self.perceptron(x))
    if y == 0:
      return y_pred 
    elif y == 1:
      return -1 * (1 - y_pred)
    else:
      raise ValueError("y should be 0 or 1")
  
  def fit(self, X, Y, epochs=1, learning_rate=1, initialise=True, loss_fn="mse", display_loss=False):
    
    # initialise w, b
    if initialise:
      self.w = np.random.randn(1, X.shape[1])
      self.b = 0
      
    if display_loss:
      loss = {}
    
    for i in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
      dw = 0
      db = 0
      for x, y in zip(X, Y):
        if loss_fn == "mse":
          dw += self.grad_w_mse(x, y)
          db += self.grad_b_mse(x, y) 
        elif loss_fn == "ce":
          dw += self.grad_w_ce(x, y)
          db += self.grad_b_ce(x, y)
      self.w -= learning_rate * dw
      self.b -= learning_rate * db
      
      if display_loss:
        Y_pred = self.sigmoid(self.perceptron(X))
        if loss_fn == "mse":
          loss[i] = mean_squared_error(Y, Y_pred)
        elif loss_fn == "ce":
          loss[i] = log_loss(Y, Y_pred)
    
    if display_loss:
      plt.plot(loss.values())
      plt.xlabel('Epochs')
      if loss_fn == "mse":
        plt.ylabel('Mean Squared Error')
      elif loss_fn == "ce":
        plt.ylabel('Log Loss')
      plt.show()
      
  def predict(self, X):
    Y_pred = []
    for x in X:
      y_pred = self.sigmoid(self.perceptron(x))
      Y_pred.append(y_pred)
    return np.array(Y_pred)

In [3]:
def read_all(folder_path, key_prefix=""):
    '''
    It returns a dictionary with 'file names' as keys and 'flattened image arrays' as values.
    '''
    print("Reading:")
    images = {}
    files = os.listdir(folder_path)
    for i, file_name in tqdm_notebook(enumerate(files), total=len(files)):
        file_path = os.path.join(folder_path, file_name)
        image_index = key_prefix + file_name[:-4]
        image = Image.open(file_path)
        image = image.convert("L")
        images[image_index] = np.array(image.copy()).flatten()
        image.close()
    return images

In [4]:
languages = ['ta', 'hi', 'en']

images_train = read_all("../input/level_1_train/level_1"+"/background", key_prefix='bgr_') 
for language in languages:
  images_train.update(read_all("../input/level_1_train/level_1/"+language, key_prefix=language+"_" ))
print(len(images_train))

images_test = read_all("../input/level_1_test/kaggle_level_1", key_prefix='') 
print(len(images_test))

Reading:


HBox(children=(IntProgress(value=0, max=450), HTML(value='')))


Reading:


HBox(children=(IntProgress(value=0, max=150), HTML(value='')))


Reading:


HBox(children=(IntProgress(value=0, max=150), HTML(value='')))


Reading:


HBox(children=(IntProgress(value=0, max=150), HTML(value='')))


900
Reading:


HBox(children=(IntProgress(value=0, max=300), HTML(value='')))


300


In [5]:
list(images_test.keys())[:5]

['145', '34', '90', '261', '48']

In [6]:
X_train = []
Y_train = []
for key, value in images_train.items():
    X_train.append(value)
    if key[:4] == "bgr_":
        Y_train.append(0)
    else:
        Y_train.append(1)

ID_test = []
X_test = []
for key, value in images_test.items():
  ID_test.append(int(key))
  X_test.append(value)
  
        
X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_test = np.array(X_test)

print(X_train.shape, Y_train.shape)
print(X_test.shape)

(900, 256) (900,)
(300, 256)


In [7]:
def binarise(X):
    for i in range(len(X)):
        X[i] = 0 if (X[i] == 255) else i
    return X

In [8]:
X_binarised_train = np.array(list(map(binarise, X_train)))
X_binarised_test = np.array(list(map(binarise, X_test)))

In [9]:
class MPNeuron:
  
  def __init__(self):
    self.b = None
  
  def model(self, x):
    return(int(sum(x) >= self.b))
  
  def predict(self, X):
    Y = []
    for x in X:
      result = self.model(x)
      Y.append(result)
    return np.array(Y)
  
  def fit(self, X, Y):
    accuracy = {}
    
    for b in range(X.shape[1] + 1):
      self.b = b
      Y_pred = self.predict(X)
      accuracy[b] = accuracy_score(Y_pred, Y)
      
    best_b = max(accuracy, key = accuracy.get)
    self.b = best_b
    
    print('Optimal value of b is', best_b)
    print('Highest accuracy is', accuracy[best_b])

In [10]:
mpneuron = MPNeuron()

In [11]:
mpneuron.fit(X_binarised_train,Y_train)

Optimal value of b is 1
Highest accuracy is 1.0


## Sample Submission

In [12]:
Y_pred_test = mpneuron.predict(X_binarised_test)

submission = {}
submission['ImageId'] = ID_test
submission['Class'] = Y_pred_test

submission = pd.DataFrame(submission)
submission = submission[['ImageId', 'Class']]
submission = submission.sort_values(['ImageId'])
submission.to_csv("submisision.csv", index=False)

In [13]:
print(submission)

     ImageId  Class
289        0      1
147        1      0
245        2      1
175        3      1
43         4      1
224        5      0
115        6      0
90         7      0
148        8      0
71         9      1
36        10      1
77        11      0
141       12      1
88        13      1
298       14      1
138       15      0
193       16      1
22        17      1
295       18      0
133       19      1
226       20      1
40        21      1
143       22      0
228       23      1
113       24      1
137       25      1
187       26      1
194       27      1
76        28      0
202       29      1
..       ...    ...
60       270      0
243      271      1
44       272      1
17       273      1
11       274      1
145      275      0
196      276      0
34       277      0
181      278      0
52       279      0
247      280      1
14       281      1
172      282      0
155      283      1
233      284      0
112      285      0
151      286      0
29       287      0
