# 1. Import libraries

In [1]:
import matplotlib.pyplot as plt # plotting
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
# from scipy import misc
# from hmmlearn import hmm

# 2. Global Variables and Load images

## 2.1. Global Variables

In [67]:
TOTAL_NUMBER_OF_PEOPLE = 45
NUMBER_OF_EACH_IRIS_PER_PERSON = 5
TOTAL_NUMBER_OF_EACH_IRIS = TOTAL_NUMBER_OF_PEOPLE * NUMBER_OF_EACH_IRIS_PER_PERSON

## 2.2. Load images into 2 lists: left_iris_l and right_iris_l

In [2]:
path = os.getcwd()
left_iris_l = []
right_iris_l = []

for dirname, _, filenames in os.walk(path+'/data/MMU-Iris-Database/'):
    for filename in filenames:
        if filename not in ['.DS_Store', 'ReadMe.txt', 'Thumbs.db']:
            dir_file_s = os.path.join(dirname, filename)
#             print(filename)
            dirname_l = dirname.split("/")
            if dirname_l[-1] == 'left':
                left_iris_l.append(plt.imread(dir_file_s))
            elif dirname_l[-1] == 'right':
                right_iris_l.append(plt.imread(dir_file_s))
            else:
                print(f'Neither left or right iris in {dir_file_s}')

## 2.3. Check the size of the 2 lists and each file

In [3]:
print(len(left_iris_l))
print(len(right_iris_l))

225
225


In [4]:
print(type(left_iris_l[0]), '\n')
print(left_iris_l[0].shape, '\n')
print(left_iris_l[0])

<class 'numpy.ndarray'> 

(240, 320, 3) 

[[[200 204 200]
  [184 188 184]
  [184 188 184]
  ...
  [ 64  68  64]
  [ 72  76  72]
  [ 80  76  72]]

 [[192 200 192]
  [184 188 184]
  [192 184 184]
  ...
  [ 80  84  80]
  [ 72  76  72]
  [ 80  80  80]]

 [[192 196 192]
  [184 188 184]
  [176 180 176]
  ...
  [ 40  52  40]
  [ 64  60  64]
  [ 80  76  72]]

 ...

 [[240 240 232]
  [184 192 192]
  [184 188 184]
  ...
  [128 124 120]
  [120 128 128]
  [128 124 120]]

 [[248 248 248]
  [248 248 240]
  [232 240 240]
  ...
  [152 156 152]
  [160 160 160]
  [160 156 152]]

 [[  0   4   0]
  [  8   8   8]
  [  0   4   0]
  ...
  [  0   8   0]
  [  8   4   8]
  [  8   8   8]]]


# 3. Flatten

## 3.1. flatten()

In [6]:
def flatten(t):
    return [item for sublist in t for item in sublist]

## 3.2. Flatten the 2 lists of iris data: both left and right

In [7]:
flat_left_iris_l = []
for left_iris in left_iris_l:
    flat_left_iris_l.append(flatten(left_iris))

In [8]:
flat_right_iris_l = []
for right_iris in right_iris_l:
    flat_right_iris_l.append(flatten(right_iris))

## 3.3. Check the resulf of flatten

In [13]:
print(len(flat_left_iris_l) == len(left_iris_l))
print(len(flat_right_iris_l) == len(right_iris_l))

True
True


## 3.4. Shape of flat lists

In [17]:
print(len(flat_left_iris_l))
print(len(flat_left_iris_l[0]))

225
76800


# 4. Slicing

## 4.1. slice_data()

In [22]:
def slice_data(data, number_of_slices):
    x_slice_interval_i = len(data) // number_of_slices
    sliced_data_l = []
    for i in range(number_of_slices):
        sliced_data_l.append(data[x_slice_interval_i * i : x_slice_interval_i * (i+1)])
    return sliced_data_l

## 4.2. verification()

In [45]:
def verification(original_data, new_data):
    print(len(original_data) == len(new_data))
    print(len(original_data[0]) == len(new_data[0]) * len(new_data[0][0]))

## 4.3. Slice left iris

In [46]:
number_of_slices = 5
left_iris_sliced_into_5_l = []
for flat_left_iris in flat_left_iris_l:
    left_iris_sliced_into_5_l.append(slice_data(flat_left_iris, number_of_slices))

verification(flat_left_iris_l, left_iris_sliced_into_5_l)

True
True


In [47]:
number_of_slices = 10
left_iris_sliced_into_10_l = []
for flat_left_iris in flat_left_iris_l:
    left_iris_sliced_into_10_l.append(slice_data(flat_left_iris, number_of_slices))

verification(flat_left_iris_l, left_iris_sliced_into_10_l)

True
True


In [48]:
number_of_slices = 100
left_iris_sliced_into_100_l = []
for flat_left_iris in flat_left_iris_l:
    left_iris_sliced_into_100_l.append(slice_data(flat_left_iris, number_of_slices))

verification(flat_left_iris_l, left_iris_sliced_into_100_l)

True
True


## 4.4. Slice right iris

In [54]:
number_of_slices = 5
right_iris_sliced_into_5_l = []
for flat_right_iris in flat_right_iris_l:
    right_iris_sliced_into_5_l.append(slice_data(flat_right_iris, number_of_slices))

verification(flat_right_iris_l, right_iris_sliced_into_5_l)

True
True


In [50]:
number_of_slices = 10
right_iris_sliced_into_10_l = []
for flat_right_iris in flat_right_iris_l:
    right_iris_sliced_into_10_l.append(slice_data(flat_right_iris, number_of_slices))

verification(flat_right_iris_l, right_iris_sliced_into_10_l)

True
True


In [51]:
number_of_slices = 100
right_iris_sliced_into_100_l = []
for flat_right_iris in flat_right_iris_l:
    right_iris_sliced_into_100_l.append(slice_data(flat_right_iris, number_of_slices))

verification(flat_right_iris_l, right_iris_sliced_into_100_l)

True
True


Now we have 3 each sliced datasets for both left and right iris;
* left_iris_sliced_into_5_l
* left_iris_sliced_into_10_l
* left_iris_sliced_into_100_l
* right_iris_sliced_into_5_l
* right_iris_sliced_into_10_l
* right_iris_sliced_into_100_l

# 4. Modeling: Naive Bayes

In [57]:
print(len(left_iris_sliced_into_5_l))
print(len(left_iris_sliced_into_5_l[0]))
print(len(left_iris_sliced_into_5_l[0][0]))

225
5
15360


In [68]:
print(len(left_iris_sliced_into_10_l))
print(len(left_iris_sliced_into_10_l[0]))
print(len(left_iris_sliced_into_10_l[0][0]))

225
10
7680


## 4.1. left_iris_sliced_into_5

In [105]:
def create_x(data):
    return pd.DataFrame(data[:-1], columns = list(range(data[0])))

In [106]:
def create_y(data):
    return pd.DataFrame([data[-1]])

In [107]:
left_iris_sliced_into_5 = left_iris_sliced_into_5_l[0]
print(type(left_iris_sliced_into_5))
print(len(left_iris_sliced_into_5))
print(type(left_iris_sliced_into_5[0]))

<class 'list'>
5
<class 'list'>


In [108]:
x = create_x(left_iris_sliced_into_5)
y = create_y(left_iris_sliced_into_5)

TypeError: 'list' object cannot be interpreted as an integer

In [94]:
x

Unnamed: 0,0,1,2,3
0,"[[200, 204, 200], [184, 188, 184], [184, 188, ...","[[224, 228, 224], [216, 224, 216], [224, 220, ...","[[232, 236, 232], [232, 236, 232], [216, 220, ...","[[232, 240, 232], [224, 228, 224], [232, 232, ..."


In [92]:
print(type(x))
print(len(x))
print(type(y))
print(len(y))

<class 'pandas.core.frame.DataFrame'>
1
<class 'pandas.core.frame.DataFrame'>
1


In [93]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.7, random_state=0)

ValueError: With n_samples=1, test_size=0.7 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [116]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(120, 240, 3)
(120, 240, 3)
(120, 80, 3)
(120, 80, 3)


In [109]:
gnb = GaussianNB()

In [112]:
gnb.fit(x_train, y_train)

ValueError: Found array with dim 3. Estimator expected <= 2.

In [111]:
y_pred = gnb.fit(x_train, y_train).predict(x_test)

ValueError: Found array with dim 3. Estimator expected <= 2.