In [25]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model

import pandas as pd
from glob import glob
import os
from PIL import Image
from sklearn.model_selection import train_test_split

In [3]:
WIDTH, HEIGHT, CHANNELS = (1024, 768, 3)
DATA_PATH = 'data'

In [4]:
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(HEIGHT, WIDTH, CHANNELS))
    x = base_model.output
    x = Flatten()(x)
    x = Dense(36, activation='sigmoid')(x)
    model = Model(inputs = base_model.input, outputs = x)
    model.compile(optimizer=Adam(lr=0.01), loss='mse')

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


In [11]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            [(None, 768, 1024, 3 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 774, 1030, 3) 0           input_5[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 384, 512, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 384, 512, 64) 256         conv1_conv[0][0]                 
____________________________________________________________________________________________

In [None]:
# note: right now, the order of the output labels matters. Later we can make a network where this isn't the case

In [29]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, batch_size=32, shuffle=True):
        """
        Directory contains an "images" folder and a "outputs" folder.
        each image has name formatted like "00000001.jpg"
        each label has name formatted like "output00000001.csv"
        """
        self.batch_size = batch_size
        self.df = df
        self.indices = range(len(df))
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return len(self.df) // self.batch_size

    def __getitem__(self, index):
        indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        
        return self.get_data(indices)

    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)
    
    def get_index(self, idx):
        row = df.iloc[idx]
        X = np.array(Image.open(row.image)) / 255.0
        y = pd.read_csv(row.label).values[:, 1:].flatten()
        
        return X, y
    
    def get_data(self, indices):
        X = np.empty((len(indices), HEIGHT, WIDTH, 3))
        y = np.empty((len(indices), 36))
        
        for i, index in enumerate(indices):
            X[i], y[i] = get_index(index)

        return X, y
    
    @staticmethod
    def gen_df(directory):
        images, labels = [], []
        for image in glob(os.path.join(directory, 'images/*')):
            images.append(image)
            num = os.path.basename(image).split('.jpg')[0]
            labels.append(os.path.join(directory, f'labels/{num}.csv'))
        return pd.DataFrame({'image': images, 'label': labels})
    
    @staticmethod
    def splits(df, train_size=0.5):
        train, test = train_test_split(df, train_size=train_size)
        return DataGenerator(train), DataGenerator(test)

In [None]:
df = DataGenerator.gen_df(DATA_PATH)
train_generator, validation_generator = DataGenerator.splits(df, train_size=0.9)

In [None]:
NUM_EPOCHS=20

model.fit(
    train_generator,
    steps_per_epoch = len(train_generator),
    validation_data = validation_generator,
    validation_steps = len(validation_generator),
    epochs=NUM_EPOCHS,
    max_queue_size=20,
    workers=10,
    use_multiprocessing=True
)

model.save('model.h5')