# 1. Import libraries

In [1]:
import matplotlib.pyplot as plt # plotting
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
# from scipy import misc
# from hmmlearn import hmm

# 2. Global Variables and Load images

## 2.1. Global Variables

In [67]:
TOTAL_NUMBER_OF_PEOPLE = 45
NUMBER_OF_EACH_IRIS_PER_PERSON = 5
TOTAL_NUMBER_OF_EACH_IRIS = TOTAL_NUMBER_OF_PEOPLE * NUMBER_OF_EACH_IRIS_PER_PERSON

## 2.2. Load images into 2 lists: left_iris_l and right_iris_l

In [2]:
path = os.getcwd()
left_iris_l = []
right_iris_l = []

for dirname, _, filenames in os.walk(path+'/data/MMU-Iris-Database/'):
    for filename in filenames:
        if filename not in ['.DS_Store', 'ReadMe.txt', 'Thumbs.db']:
            dir_file_s = os.path.join(dirname, filename)
#             print(filename)
            dirname_l = dirname.split("/")
            if dirname_l[-1] == 'left':
                left_iris_l.append(plt.imread(dir_file_s))
            elif dirname_l[-1] == 'right':
                right_iris_l.append(plt.imread(dir_file_s))
            else:
                print(f'Neither left or right iris in {dir_file_s}')

## 2.3. Check the size of the 2 lists and each file

In [3]:
print(len(left_iris_l))
print(len(right_iris_l))

225
225


In [4]:
print(type(left_iris_l[0]), '\n')
print(left_iris_l[0].shape, '\n')
print(left_iris_l[0])

<class 'numpy.ndarray'> 

(240, 320, 3) 

[[[200 204 200]
  [184 188 184]
  [184 188 184]
  ...
  [ 64  68  64]
  [ 72  76  72]
  [ 80  76  72]]

 [[192 200 192]
  [184 188 184]
  [192 184 184]
  ...
  [ 80  84  80]
  [ 72  76  72]
  [ 80  80  80]]

 [[192 196 192]
  [184 188 184]
  [176 180 176]
  ...
  [ 40  52  40]
  [ 64  60  64]
  [ 80  76  72]]

 ...

 [[240 240 232]
  [184 192 192]
  [184 188 184]
  ...
  [128 124 120]
  [120 128 128]
  [128 124 120]]

 [[248 248 248]
  [248 248 240]
  [232 240 240]
  ...
  [152 156 152]
  [160 160 160]
  [160 156 152]]

 [[  0   4   0]
  [  8   8   8]
  [  0   4   0]
  ...
  [  0   8   0]
  [  8   4   8]
  [  8   8   8]]]


# 3. Flatten

## 3.1. flatten()

In [6]:
def flatten(t):
    return [item for sublist in t for item in sublist]

## 3.2. Flatten the 2 lists of iris data: both left and right

In [7]:
flat_left_iris_l = []
for left_iris in left_iris_l:
    flat_left_iris_l.append(flatten(left_iris))

In [8]:
flat_right_iris_l = []
for right_iris in right_iris_l:
    flat_right_iris_l.append(flatten(right_iris))

## 3.3. Check the resulf of flatten

In [13]:
print(len(flat_left_iris_l) == len(left_iris_l))
print(len(flat_right_iris_l) == len(right_iris_l))

True
True


## 3.4. Shape of flat lists

In [17]:
print(len(flat_left_iris_l))
print(len(flat_left_iris_l[0]))

225
76800


# 4. Slicing

## 4.1. slice_data()

In [22]:
def slice_data(data, number_of_slices):
    x_slice_interval_i = len(data) // number_of_slices
    sliced_data_l = []
    for i in range(number_of_slices):
        sliced_data_l.append(data[x_slice_interval_i * i : x_slice_interval_i * (i+1)])
    return sliced_data_l

## 4.2. verification()

In [45]:
def verification(original_data, new_data):
    print(len(original_data) == len(new_data))
    print(len(original_data[0]) == len(new_data[0]) * len(new_data[0][0]))

## 4.3. Slice left iris

In [46]:
number_of_slices = 5
left_iris_sliced_into_5_l = []
for flat_left_iris in flat_left_iris_l:
    left_iris_sliced_into_5_l.append(slice_data(flat_left_iris, number_of_slices))

verification(flat_left_iris_l, left_iris_sliced_into_5_l)

True
True


In [47]:
number_of_slices = 10
left_iris_sliced_into_10_l = []
for flat_left_iris in flat_left_iris_l:
    left_iris_sliced_into_10_l.append(slice_data(flat_left_iris, number_of_slices))

verification(flat_left_iris_l, left_iris_sliced_into_10_l)

True
True


In [48]:
number_of_slices = 100
left_iris_sliced_into_100_l = []
for flat_left_iris in flat_left_iris_l:
    left_iris_sliced_into_100_l.append(slice_data(flat_left_iris, number_of_slices))

verification(flat_left_iris_l, left_iris_sliced_into_100_l)

True
True


## 4.4. Slice right iris

In [54]:
number_of_slices = 5
right_iris_sliced_into_5_l = []
for flat_right_iris in flat_right_iris_l:
    right_iris_sliced_into_5_l.append(slice_data(flat_right_iris, number_of_slices))

verification(flat_right_iris_l, right_iris_sliced_into_5_l)

True
True


In [50]:
number_of_slices = 10
right_iris_sliced_into_10_l = []
for flat_right_iris in flat_right_iris_l:
    right_iris_sliced_into_10_l.append(slice_data(flat_right_iris, number_of_slices))

verification(flat_right_iris_l, right_iris_sliced_into_10_l)

True
True


In [51]:
number_of_slices = 100
right_iris_sliced_into_100_l = []
for flat_right_iris in flat_right_iris_l:
    right_iris_sliced_into_100_l.append(slice_data(flat_right_iris, number_of_slices))

verification(flat_right_iris_l, right_iris_sliced_into_100_l)

True
True


Now we have 3 each sliced datasets for both left and right iris;
* left_iris_sliced_into_5_l
* left_iris_sliced_into_10_l
* left_iris_sliced_into_100_l
* right_iris_sliced_into_5_l
* right_iris_sliced_into_10_l
* right_iris_sliced_into_100_l

# 5. DataFrame and Grayscale

In [214]:
def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])

In [None]:
gray_left_iris_sliced_into_5_df = left_iris_sliced_into_5_df.applymap(rgb2gray)

# 6. Modeling: Naive Bayes

In [57]:
print(len(left_iris_sliced_into_5_l))
print(len(left_iris_sliced_into_5_l[0]))
print(len(left_iris_sliced_into_5_l[0][0]))

225
5
15360


In [68]:
print(len(left_iris_sliced_into_10_l))
print(len(left_iris_sliced_into_10_l[0]))
print(len(left_iris_sliced_into_10_l[0][0]))

225
10
7680


## 4.1. left_iris_sliced_into_5

In [116]:
left_iris_sliced_into_5 = left_iris_sliced_into_5_l[0]
print(type(left_iris_sliced_into_5))
print(len(left_iris_sliced_into_5))
print(type(left_iris_sliced_into_5[0]))

<class 'list'>
5
<class 'list'>


In [130]:
left_iris_sliced_into_5_df = pd.DataFrame(left_iris_sliced_into_5)
left_iris_sliced_into_5_df = left_iris_sliced_into_5_df.transpose()
left_iris_sliced_into_5_df.head(10)

Unnamed: 0,0,1,2,3,4
0,"[200, 204, 200]","[224, 228, 224]","[232, 236, 232]","[232, 240, 232]","[136, 140, 136]"
1,"[184, 188, 184]","[216, 224, 216]","[232, 236, 232]","[224, 228, 224]","[136, 140, 136]"
2,"[184, 188, 184]","[224, 220, 216]","[216, 220, 216]","[232, 232, 232]","[144, 144, 136]"
3,"[184, 184, 184]","[216, 220, 216]","[232, 232, 224]","[232, 232, 224]","[144, 144, 144]"
4,"[168, 176, 168]","[224, 224, 224]","[232, 228, 224]","[248, 248, 248]","[136, 144, 136]"
5,"[168, 168, 168]","[216, 224, 216]","[240, 244, 240]","[232, 236, 232]","[144, 144, 144]"
6,"[184, 184, 176]","[216, 220, 216]","[232, 240, 240]","[240, 240, 232]","[152, 152, 144]"
7,"[184, 184, 184]","[224, 220, 216]","[240, 244, 232]","[248, 248, 248]","[144, 148, 152]"
8,"[192, 192, 184]","[216, 224, 224]","[224, 224, 224]","[208, 216, 208]","[152, 152, 144]"
9,"[184, 192, 184]","[224, 224, 216]","[232, 232, 232]","[216, 212, 216]","[152, 156, 152]"


In [146]:
def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])

In [149]:
gray_left_iris_sliced_into_5_df = left_iris_sliced_into_5_df.applymap(rgb2gray)

In [150]:
gray_left_iris_sliced_into_5_df.head(10)

Unnamed: 0,0,1,2,3,4
0,202.328,226.3256,234.3248,236.6728,138.3344
1,186.3296,220.6744,234.3248,226.3256,138.3344
2,186.3296,220.7176,218.3264,231.9768,143.0736
3,183.9816,218.3264,231.0648,231.0648,143.9856
4,172.6792,223.9776,228.7168,247.9752,140.6824
5,167.9832,220.6744,242.324,234.3248,143.9856
6,183.0696,218.3264,237.5848,239.064,151.0728
7,183.9816,220.7176,241.412,247.9752,147.2456
8,191.0688,221.5864,223.9776,212.6752,151.0728
9,188.6776,223.0656,231.9768,213.6304,154.3328


In [138]:
def create_x(data):
    return data.iloc[:, :-1]

In [139]:
def create_y(data):
    return data.iloc[:, -1]

In [151]:
x = create_x(gray_left_iris_sliced_into_5_df)
y = create_y(gray_left_iris_sliced_into_5_df)

In [152]:
print(type(x))
print(len(x))
print(type(y))
print(len(y))

<class 'pandas.core.frame.DataFrame'>
15360
<class 'pandas.core.series.Series'>
15360


In [189]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.7, random_state=0)

In [208]:
def create_index(data):
#     data = pd.DataFrame(data)
    data.index = list(range(len(data)))
    return data

In [209]:
x_train = create_index(x_train)
x_test = create_index(x_test)
y_train = create_index(y_train)
y_test = create_index(y_test)

In [210]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(4608, 4)
(10752, 4)
(4608, 1)
(10752, 1)


In [211]:
print(x_train)

             0         1         2         3
0     207.9792  199.9800  157.5928   93.5992
1     226.3256  207.9792   68.7328   66.3416
2     175.9824  146.3336   77.6008   98.3384
3     181.5904  167.0712  108.7288  100.6864
4     220.6744  226.3256  231.9768  175.0704
...        ...       ...       ...       ...
4603  218.3264  223.9776  226.3256  157.5928
4604  207.0672  247.9752  181.5904  189.5896
4605  188.7208  169.4192  119.9880  116.6848
4606  191.9808  162.3320  108.7288  119.9880
4607  202.3280  201.4160  151.9848   90.3392

[4608 rows x 4 columns]


In [212]:
print(y_train)

             4
0     145.4216
1     207.9792
2       7.9992
3     108.6856
4     183.9816
...        ...
4603  178.3304
4604  167.9832
4605  140.6824
4606  141.5944
4607  125.5960

[4608 rows x 1 columns]


0       145.4216
1       207.9792
2         7.9992
3       108.6856
4       183.9816
          ...   
4603    178.3304
4604    167.9832
4605    140.6824
4606    141.5944
4607    125.5960
Name: 4, Length: 4608, dtype: float64

In [195]:
gnb = GaussianNB()

In [196]:
gnb.fit(x_train, y_train)

  return f(*args, **kwargs)


ValueError: Unknown label type: (array([  2.348 ,   4.696 ,   5.6512,   7.9992,  55.9944,  58.3424,
        63.9936,  66.3416,  68.7328,  71.0808,  71.9496,  71.9928,
        74.3408,  76.6888,  76.732 ,  77.6008,  77.644 ,  79.08  ,
        79.992 ,  81.428 ,  82.34  ,  83.252 ,  84.688 ,  84.7312,
        85.6   ,  85.6432,  87.0792,  87.9912,  89.4272,  90.3392,
        91.2512,  92.6872,  92.7304,  93.5992,  93.6424,  95.0784,
        95.9904,  97.4264,  98.3384,  99.2504, 100.6864, 100.7296,
       101.5984, 101.6416, 103.0776, 103.9464, 103.9896, 105.4256,
       106.3376, 107.2496, 108.6856, 108.7288, 109.5976, 109.6408,
       111.0768, 111.9456, 111.9888, 114.3368, 115.2488, 116.6848,
       116.728 , 117.5968, 117.64  , 119.076 , 119.988 , 121.424 ,
       122.336 , 122.3792, 123.248 , 124.684 , 124.7272, 125.596 ,
       125.6392, 127.032 , 127.0752, 127.944 , 127.9872, 129.4232,
       130.3352, 131.2472, 132.6832, 132.7264, 133.5952, 133.6384,
       135.0744, 135.9864, 137.4224, 138.3344, 138.3776, 139.2464,
       140.6824, 140.7256, 141.5944, 141.6376, 143.0736, 143.9424,
       143.9856, 145.4216, 146.3336, 147.2456, 148.6816, 148.7248,
       149.5936, 149.6368, 151.0728, 151.9416, 151.9848, 153.4208,
       154.3328, 155.2448, 156.6808, 156.724 , 157.5928, 157.636 ,
       159.072 , 159.9408, 159.984 , 161.42  , 162.332 , 162.3752,
       163.244 , 164.68  , 164.7232, 165.592 , 165.6352, 167.0712,
       167.94  , 167.9832, 169.4192, 170.3312, 171.2432, 172.6792,
       172.7224, 173.5912, 173.6344, 175.0704, 175.9392, 175.9824,
       177.4184, 178.3304, 179.2424, 180.6784, 180.7216, 181.5904,
       181.6336, 183.0696, 183.9384, 183.9816, 185.4176, 186.3296,
       186.3728, 187.2416, 188.6776, 188.7208, 189.5896, 189.6328,
       191.0688, 191.9808, 193.4168, 194.3288, 195.2408, 196.6768,
       196.72  , 197.5888, 197.632 , 199.0248, 199.068 , 199.9368,
       199.98  , 201.416 , 202.328 , 203.24  , 204.676 , 204.7192,
       205.588 , 205.6312, 207.0672, 207.936 , 207.9792, 209.4152,
       210.3272, 211.2392, 212.6752, 212.7184, 213.5872, 213.6304,
       215.0664, 215.9784, 217.4144, 218.3264, 219.2384, 220.6744,
       220.7176, 221.5864, 221.6296, 223.0656, 223.9776, 225.4136,
       226.3256, 227.2376, 228.6736, 228.7168, 229.5856, 229.6288,
       231.0216, 231.0648, 231.9768, 233.4128, 234.3248, 236.6728,
       236.716 , 237.5848, 239.064 , 239.976 , 242.324 , 244.672 ,
       245.584 , 247.0632, 247.9752]),)

In [111]:
y_pred = gnb.fit(x_train, y_train).predict(x_test)

ValueError: Found array with dim 3. Estimator expected <= 2.