# 1. Import libraries

In [1]:
import matplotlib.pyplot as plt # plotting
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
# from scipy import misc
# from hmmlearn import hmm

# 2. Load images

## 2.1. Load images into 2 lists: left_iris_l and right_iris_l

In [2]:
path = os.getcwd()
left_iris_l = []
right_iris_l = []

for dirname, _, filenames in os.walk(path+'/data/MMU-Iris-Database/'):
    for filename in filenames:
        if filename not in ['.DS_Store', 'ReadMe.txt', 'Thumbs.db']:
            dir_file_s = os.path.join(dirname, filename)
#             print(filename)
            dirname_l = dirname.split("/")
            if dirname_l[-1] == 'left':
                left_iris_l.append(plt.imread(dir_file_s))
            elif dirname_l[-1] == 'right':
                right_iris_l.append(plt.imread(dir_file_s))
            else:
                print(f'Neither left or right iris in {dir_file_s}')

## 2.2. Check the size of the 2 lists and each file

In [3]:
print(len(left_iris_l))
print(len(right_iris_l))

225
225


In [4]:
print(type(left_iris_l[0]), '\n')
print(left_iris_l[0].shape, '\n')
print(left_iris_l[0])

<class 'numpy.ndarray'> 

(240, 320, 3) 

[[[200 204 200]
  [184 188 184]
  [184 188 184]
  ...
  [ 64  68  64]
  [ 72  76  72]
  [ 80  76  72]]

 [[192 200 192]
  [184 188 184]
  [192 184 184]
  ...
  [ 80  84  80]
  [ 72  76  72]
  [ 80  80  80]]

 [[192 196 192]
  [184 188 184]
  [176 180 176]
  ...
  [ 40  52  40]
  [ 64  60  64]
  [ 80  76  72]]

 ...

 [[240 240 232]
  [184 192 192]
  [184 188 184]
  ...
  [128 124 120]
  [120 128 128]
  [128 124 120]]

 [[248 248 248]
  [248 248 240]
  [232 240 240]
  ...
  [152 156 152]
  [160 160 160]
  [160 156 152]]

 [[  0   4   0]
  [  8   8   8]
  [  0   4   0]
  ...
  [  0   8   0]
  [  8   4   8]
  [  8   8   8]]]


# 3. Slicing

In [14]:
one_iris_l = left_iris_l[0]
print(one_iris_l.shape)
print(240 * 320 * 3)

(240, 320, 3)
230400


In [10]:
def flatten(t):
    return [item for sublist in t for item in sublist]

In [12]:
after_flatten = flatten(one_iris_l)
print(len(after_flatten))

76800


In [9]:
def slice_data(data, number_of_slices):
    x_slice_interval_i = data.shape[1] // number_of_slices
    sliced_data = []
    for i in range(number_of_slices):
        sliced_data.append(data[:, x_slice_interval_i * i : x_slice_interval_i * (i+1), :])
    return sliced_data

In [90]:
number_of_slices = 4
data_sliced_into_4 = slice_data(one_iris_l, number_of_slices)
print(len(data_sliced_into_4))
print(data_sliced_into_4[0].shape)
print(data_sliced_into_4[1].shape)
print(data_sliced_into_4[2].shape)

4
(240, 80, 3)
(240, 80, 3)
(240, 80, 3)


In [91]:
number_of_slices = 5
data_sliced_into_5 = slice_data(one_iris_l, number_of_slices)
print(len(data_sliced_into_5))
print(data_sliced_into_5[0].shape)
print(data_sliced_into_5[1].shape)
print(data_sliced_into_5[2].shape)

5
(240, 64, 3)
(240, 64, 3)
(240, 64, 3)


In [92]:
number_of_slices = 10
data_sliced_into_10 = slice_data(one_iris_l, number_of_slices)
print(len(data_sliced_into_10))
print(data_sliced_into_10[0].shape)
print(data_sliced_into_10[1].shape)
print(data_sliced_into_10[2].shape)

10
(240, 32, 3)
(240, 32, 3)
(240, 32, 3)


Now we have 3 sliced datasets;
* data_sliced_into_4
  * 4 * (240, 80, 3)
* data_sliced_into_5
  * 5 * (240, 64, 3)
* data_sliced_into_10
  * 10 * (240, 32, 3)

# 4. Modeling: Naive Bayes

## 4.1. data_sliced_into_4

In [104]:
def create_x(data):
    length = len(data)
    x = data[0]
    for i in range(1, length-1):
        x = np.concatenate((x, data[i]), axis=1)
    return x

In [105]:
def create_y(data):
    return data[-1]

In [106]:
x = create_x(data_sliced_into_4)
y = create_y(data_sliced_into_4)

In [107]:
print(type(x))
print(x.shape)
print(type(y))
print(y.shape)

<class 'numpy.ndarray'>
(240, 240, 3)
<class 'numpy.ndarray'>
(240, 80, 3)


In [113]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=0)

In [116]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(120, 240, 3)
(120, 240, 3)
(120, 80, 3)
(120, 80, 3)


In [109]:
gnb = GaussianNB()

In [112]:
gnb.fit(x_train, y_train)

ValueError: Found array with dim 3. Estimator expected <= 2.

In [111]:
y_pred = gnb.fit(x_train, y_train).predict(x_test)

ValueError: Found array with dim 3. Estimator expected <= 2.