In [25]:
import pandas as pd
import numpy as np
import keras
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
from keras.layers import Conv2D, Dense, Activation
from functools import reduce
from keras.models import Sequential
import pickle
from sklearn.model_selection import train_test_split

### Getting directory list

In [26]:
# f = Image.open('./fish/220px-Alligator_Gar_10.JPG.jpg', "r")
# d = 'fish/'
d = 'created-data/'
dirs = [d + x.strip(' ') + '/' for x in os.listdir(d)]
temp = []

# Removing DS store
for d in dirs:
    if d == 'fish/.DS_Store':
        continue
    temp.append(d)
dirs = temp

### Reading each image in a directory


In [27]:
def get_image_dict():
    fish_data = {}

    for directory in dirs:
        try:
            dir_files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
        except NotADirectoryError:
            continue

        images = []
        print("Start loading images for: " + directory)
        for image_file in dir_files:
            try:
                f = Image.open(directory + image_file)
                img_as_arr = np.array(f.resize((256,256)))
                images.append(img_as_arr)
                f.close()
            except OSError:
                continue
        fish_data[directory.split("/")[1]] = images
    return fish_data

fish_data = get_image_dict()

Start loading images for: created-data/bluegill/
Start loading images for: created-data/smallmouth_bass/
Start loading images for: created-data/blue_catfish/
Start loading images for: created-data/walleye/
Start loading images for: created-data/black_crappie/
Start loading images for: created-data/channel_catfish/
Start loading images for: created-data/alligator_gar/
Start loading images for: created-data/largemouth_bass/
Start loading images for: created-data/rainbow_trout/
Start loading images for: created-data/common_carp/
Start loading images for: created-data/green_sunfish/
Start loading images for: created-data/northern_pike/


In [28]:
def dict_to_vec(fish_data):
    """
    :returns: x_train, y_train
    """
    x_train = []
    y_train = []
    for key in fish_data:
        data = fish_data[key]
        
        samples = []
        for image in data:
            if len(image.shape) != 3 or image.shape[2] != 3:
                continue
            samples.append(image)

        shape = (len(samples),) + data[0].shape

        x_placeholder = np.zeros(shape)
        

        for i, sample in enumerate(samples):
            x_placeholder[i] = sample
        
        x_train.append(x_placeholder)
        y_train.append(key)
        
    return x_train, y_train


In [29]:
x_train_vec, labels = dict_to_vec(fish_data)

In [30]:
# zipped_list = list(zip(x_train_vec, labels))

In [31]:
# , [labels[0]]
# list(zip(x_train_vec, labels))[0]

In [32]:
# with open('training_data.pkl', 'wb') as f:
#     pickle.dump(zippped_list, f)
#     f.close()

# with open('training_data.pkl', 'r') as f:
#     pickle.load(f)

In [33]:
# Removing walleye
if x_train_vec[3].shape[0] < 1000:
    x_train_vec.pop(3)

In [34]:
[x.shape for x in x_train_vec]

[(1000, 256, 256, 3),
 (1000, 256, 256, 3),
 (1000, 256, 256, 3),
 (1000, 256, 256, 3),
 (1000, 256, 256, 3),
 (1000, 256, 256, 3),
 (1000, 256, 256, 3),
 (1000, 256, 256, 3),
 (1000, 256, 256, 3),
 (1000, 256, 256, 3),
 (1000, 256, 256, 3)]

In [35]:
list_num_index = []
for i in range(len(x_train_vec)):
    list_num_index.append(x_train_vec[i].shape[0])
list_num_index

[1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]

In [36]:
for i in range(len(x_train_vec)):
    x_train_vec[i] = x_train_vec[i][0:1000,:,:,:]

In [37]:
list_num_index

[1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]

In [38]:
y = np.zeros((len(x_train_vec)*1000))
for i in range(len(list_num_index)):
    y[i*1000:(i+1)*1000] = i
y.shape

(11000,)

In [39]:
x_train = np.concatenate(x_train_vec)

In [40]:
# X_train_, X_test, y_train, y_test = train_test_split(x_train, y, test_size=0.15, random_state=42)

In [41]:
print(X_train_.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

NameError: name 'X_train_' is not defined

In [None]:
x1 = x_train[0:2]
x2 = x_train[5000:]
y1 = y[0:2]
y2 = y[5000:]
with open('keras_model/train1.pkl', 'wb') as handle:
    pickle.dump((x_train[:5000], y[:5000]), handle, protocol=4)

In [None]:
with open('keras_model/train2.pkl', 'wb') as handle:
    pickle.dump((x_train[5000:], y[5000:]), handle, protocol=4)

In [None]:
with open('keras_model/test.pkl', 'wb') as handle:
    pickle.dump((X_test, y_test), handle)

In [34]:
# pickle.dump(x_train, 'x_train')
print(x_train.shape)
print(y_train.shape)

(880, 256, 256, 3)
(748,)


In [None]:
parameters = {
    'channels': 3,
    'window': 5,
    'stride': 3,
    'padding': 'same',
    'activation': 'relu'
}

channels = parameters['channels']
window = parameters['window']
stride = parameters['stride']
padding = parameters['padding']
activation = parameters['activation']

model = Sequential()

# magic numbers: 256 * 256 * 3 is the total pixels per image
model.add(Dense((256 * 256 * 3), input_shape=x_train.shape))

model.add(Conv2D(filters=channels, kernel_size=window, strides=stride, padding=padding, activation=activation,
    data_format='channels_last'))
