In [1]:
import os
import sys
import pickle
import numpy as np
import pandas as pd
from PIL import Image, ImageFilter
from tqdm import tqdm_notebook
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_squared_error, log_loss, confusion_matrix
import matplotlib.pyplot as plt

np.random.seed(100)
LEVEL = 'level_2'

In [2]:
def read_all(folder_path, key_prefix=""):
    '''
    It returns a dictionary with 'file names' as keys and 'flattened image arrays' as values.
    '''
    print("Reading:")
    images = {}
    files = os.listdir(folder_path)
    for i, file_name in tqdm_notebook(enumerate(files), total=len(files)):
        file_path = os.path.join(folder_path, file_name)
        image_index = key_prefix + file_name[:-4]
        image = Image.open(file_path)
        image = image.convert("L")
        images[image_index] = np.array(image.copy()).flatten()
        image.close()
    return images

In [3]:
languages = ['ta', 'hi', 'en']

images_train = read_all("../input/level_2_train/level_2"+"/background", key_prefix='bgr_') 
for language in languages:
  images_train.update(read_all("../input/level_2_train/level_2/"+language, key_prefix=language+"_" ))
print(len(images_train))

images_test = read_all("../input/level_2_test/kaggle_level_2", key_prefix='') 
print(len(images_test))

Reading:


HBox(children=(IntProgress(value=0, max=450), HTML(value='')))


Reading:


HBox(children=(IntProgress(value=0, max=150), HTML(value='')))


Reading:


HBox(children=(IntProgress(value=0, max=150), HTML(value='')))


Reading:


HBox(children=(IntProgress(value=0, max=150), HTML(value='')))


900
Reading:


HBox(children=(IntProgress(value=0, max=300), HTML(value='')))


300


In [4]:
list(images_test.keys())[:5]

['145', '34', '90', '261', '48']

In [5]:
X_train = []
Y_train = []
for key, value in images_train.items():
    X_train.append(value)
    if key[:4] == "bgr_":
        Y_train.append(0)
    else:
        Y_train.append(1)

ID_test = []
X_test = []
for key, value in images_test.items():
  ID_test.append(int(key))
  X_test.append(value)
  
        
X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_test = np.array(X_test)

print(X_train.shape, Y_train.shape)
print(X_test.shape)

(900, 256) (900,)
(300, 256)


In [6]:
def binarise(X):
    for i in range(len(X)):
        X[i] = 1 if (X[i] == 255) else 0
    return X

In [7]:
X_binarised_train = np.array(list(map(binarise, X_train)))
X_binarised_test = np.array(list(map(binarise, X_test)))

In [8]:
class MPNeuron:
  
  def __init__(self):
    self.b = None
  
  def model(self, x):
    return(int(sum(x) >= self.b))
  
  def predict(self, X):
    Y = []
    for x in X:
      result = self.model(x)
      Y.append(result)
    return np.array(Y)
  
  def fit(self, X, Y):
    accuracy = {}
    
    for b in range(X.shape[1] + 1):
      self.b = b
      Y_pred = self.predict(X)
      accuracy[b] = accuracy_score(Y_pred, Y)
      
    best_b = max(accuracy, key = accuracy.get)
    self.b = best_b
    
    print('Optimal value of b is', best_b)
    print('Highest accuracy is', accuracy[best_b])

In [9]:
mpneuron = MPNeuron()

In [10]:
mpneuron.fit(X_binarised_train,Y_train)

Optimal value of b is 11
Highest accuracy is 1.0


## Sample Submission

In [11]:
Y_pred_test = mpneuron.predict(X_binarised_test)

submission = {}
submission['ImageId'] = ID_test
submission['Class'] = Y_pred_test

submission = pd.DataFrame(submission)
submission = submission[['ImageId', 'Class']]
submission = submission.sort_values(['ImageId'])
submission.to_csv("submisision.csv", index=False)

In [12]:
print(submission)

     ImageId  Class
289        0      1
147        1      0
245        2      0
175        3      1
43         4      0
224        5      1
115        6      0
90         7      1
148        8      0
71         9      1
36        10      1
77        11      1
141       12      1
88        13      0
298       14      1
138       15      0
193       16      0
22        17      0
295       18      1
133       19      1
226       20      0
40        21      1
143       22      0
228       23      1
113       24      1
137       25      1
187       26      0
194       27      0
76        28      0
202       29      1
..       ...    ...
60       270      0
243      271      1
44       272      0
17       273      0
11       274      0
145      275      0
196      276      1
34       277      1
181      278      1
52       279      0
247      280      1
14       281      0
172      282      0
155      283      0
233      284      1
112      285      0
151      286      1
29       287      1
