In [36]:
import pandas as pd
import numpy as np
import keras
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
from keras.layers import Conv2D, Dense, Activation
from functools import reduce
from keras.models import Sequential, Model
import pickle
from sklearn.model_selection import train_test_split

### Getting directory list

In [2]:
# f = Image.open('./fish/220px-Alligator_Gar_10.JPG.jpg', "r")
d = 'fish/'
# d = 'created-data/'
dirs = [d + x.strip(' ') + '/' for x in os.listdir(d)]
temp = []

# Removing DS store
for d in dirs:
    if d == 'fish/.DS_Store':
        continue
    temp.append(d)
dirs = temp

### Reading each image in a directory


In [3]:
def get_image_dict():
    fish_data = {}

    for directory in dirs:
        try:
            dir_files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
        except NotADirectoryError:
            continue

        images = []
        print("Start loading images for: " + directory)
        for image_file in dir_files:
            try:
                f = Image.open(directory + image_file)
                img_as_arr = np.array(f.resize((299,299)))
                images.append(img_as_arr)
                f.close()
            except OSError:
                continue
        fish_data[directory.split("/")[1]] = images
    return fish_data

fish_data = get_image_dict()

Start loading images for: fish/bluegill/
Start loading images for: fish/smallmouth_bass/
Start loading images for: fish/blue_catfish/
Start loading images for: fish/walleye/
Start loading images for: fish/black_crappie/
Start loading images for: fish/channel_catfish/
Start loading images for: fish/Green Sunfish/
Start loading images for: fish/alligator_gar/
Start loading images for: fish/rainbow_trout/
Start loading images for: fish/common_carp/
Start loading images for: fish/northern_pike/




Start loading images for: fish/Largemouth Bass/


In [4]:
def dict_to_vec(fish_data):
    """
    :returns: x_train, y_train
    """
    x_train = []
    y_train = []
    for key in fish_data:
        data = fish_data[key]
        
        samples = []
        for image in data:
            if len(image.shape) != 3 or image.shape[2] != 3:
                continue
            samples.append(image)

        shape = (len(samples),) + data[0].shape

        x_placeholder = np.zeros(shape)
        

        for i, sample in enumerate(samples):
            x_placeholder[i] = sample
        
        x_train.append(x_placeholder)
        y_train.append(key)
        
    return x_train, y_train


In [5]:
x_train_vec, labels = dict_to_vec(fish_data)

In [6]:
# zipped_list = list(zip(x_train_vec, labels))

In [7]:
# , [labels[0]]
# list(zip(x_train_vec, labels))[0]

In [8]:
# with open('training_data.pkl', 'wb') as f:
#     pickle.dump(zippped_list, f)
#     f.close()

# with open('training_data.pkl', 'r') as f:
#     pickle.load(f)

In [9]:
# Removing walleye
# if x_train_vec[3].shape[0] < 1000:
#     x_train_vec.pop(3)

In [10]:
[x.shape for x in x_train_vec]

[(50, 299, 299, 3),
 (49, 299, 299, 3),
 (68, 299, 299, 3),
 (32, 299, 299, 3),
 (52, 299, 299, 3),
 (52, 299, 299, 3),
 (56, 299, 299, 3),
 (60, 299, 299, 3),
 (41, 299, 299, 3),
 (42, 299, 299, 3),
 (67, 299, 299, 3),
 (42, 299, 299, 3)]

In [11]:
list_num_index = []
for vec in x_train_vec:
    list_num_index.append(vec.shape[0])

In [12]:
# for i in range(len(x_train_vec)):
#     x_train_vec[i] = x_train_vec[i][0:1000,:,:,:]

In [13]:
sum(list_num_index)

611

In [14]:
y = []

for i, num_examples in enumerate(list_num_index):
    y.append([i] * num_examples)
# flatten

y_train = np.array([item for sublist in y for item in sublist])

In [15]:
x_train = np.concatenate(x_train_vec)

In [16]:
# X_train, X_test, Y_train, Y_test = train_test_split(x_train, y_train, test_size=0.15, random_state=42)

In [17]:
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(519, 299, 299, 3)
(92, 299, 299, 3)
(519,)
(92,)


In [18]:
np.save('keras_model/x_train.npy', x_train)
np.save('keras_model/y_train.npy', y)

# with open('keras_model/train100.pkl', 'wb') as handle:
#     pickle.dump((X_train, Y_train), handle)
    
# with open('keras_model/test100.pkl', 'wb') as handle:
#     pickle.dump((X_test, Y_test), handle)

In [19]:
# with open('keras_model/train2.pkl', 'wb') as handle:
#     pickle.dump((x_train[5000:], y[5000:]), handle, protocol=4)

In [20]:
# with open('keras_model/test.pkl', 'wb') as handle:
#     pickle.dump((X_test, y_test), handle)

In [21]:
# pickle.dump(x_train, 'x_train')
print(x_train.shape)
print(y_train.shape)

(611, 299, 299, 3)
(611,)


In [23]:
x_input = keras.engine.topology.Input(shape=x_train.shape[1:])

In [34]:
initial_model = keras.applications.inception_v3.InceptionV3(include_top=False, weights='imagenet', input_tensor=x_input, input_shape=x_train.shape[1:])

In [35]:
last_layer = initial_model.output
x = keras.layers.Flatten()(last_layer)
predictions = Dense(len(labels), activation='softmax')(x)

In [38]:
model = Model(initial_model.input, predictions)

In [43]:
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])

In [44]:
y = keras.utils.to_categorical(y_train, num_classes=len(labels))

In [42]:
# dense = Dense(len(labels))
# output = keras.layers.concatenate([model.outputs[0], dense], axis=1)
# type()
# x_train.shape

model.fit(x_train, y, batch_size=10, epochs=5, verbose=1)

Epoch 1/1


<keras.callbacks.History at 0x255394c88>

In [None]:
model_json = model.to_json()
with open("model100.json", "w") as json_file:
    json_file.write(model_json)
    
# serialize weights to HDF5
model.save_weights("model100.h5")
print("Saved model to disk")