In [1]:
zip_id = '1ljLFxboUWZWdSK_pWf0-p3SAzEXMzuqX'

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import zipfile, os

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

if not os.path.exists('MODEL'):
   os.makedirs('MODEL')
# DOWNLOAD ZIP
print ("Downloading zip file")
myzip = drive.CreateFile({'id': zip_id})
auto = myzip.GetContentFile('screen_type.zip')

# UNZIP ZIP
print ("Uncompressing zip file")
zip_ref = zipfile.ZipFile('screen_type.zip', 'r')
zip_ref.extractall('data/')
zip_ref.close()
##################### MOUNT YOUT GOOGLE DRIVE AS A FOLDER ######################
from google.colab import drive
drive.mount('/content/gdrive',force_remount=True)

ModuleNotFoundError: ignored

In [2]:
# Imports

import os
import os.path

import keras.callbacks
import matplotlib.pyplot as plt
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [0]:
# Initialize and load data

weights = []
n_classes = 11  # "skip" class ignored
n_channels = 1 + len(weights)

# Model 
batch_size = 64
max_epochs = 20
period_checkpoint = 10

# Testing
voting_type = 'probabilities'

# Image formatting
SEED = 200
WIDTH = 128
HEIGTH = 256

# Directory "cleanup"
SCREEN_CSV = "../input/hmmmmm/screen_type_format2.csv"
IMAGE_FOLDER = "../input/mydata/data/data/screen_type"
TRAIN_FOLDER = IMAGE_FOLDER + "/train_images"
TEST_FOLDER = IMAGE_FOLDER + "/test_images"

In [0]:
def read_screen_labels(csv, image_dir):
    '''
    Arrange the images from the file into a dictionary
    '''
    fields = ['UI.number', 'class']
    screen_type_csv = pd.read_csv(csv, usecols=fields,
                                  skipinitialspace=True).rename(index=str,
                                                                columns={"UI.number": "UINumber",
                                                                         "class": "Class"})

    screen_type_csv = screen_type_csv.sort_values(by=['Class'])
    categories = np.unique(screen_type_csv['Class'].values)

    naming_dict = {}
    for i in range(len(categories)):
        naming_dict[categories[i]] = i

    dirs = os.listdir(image_dir)
    filenames = []
    for d in dirs:
        images = os.listdir(image_dir + '/' + d)
        filenames.append([f.split('.')[0] for f in images if f.endswith('.jpg')])
    filenames = filenames[0]
    dictionary = {}
    
    for key in filenames:
        info = screen_type_csv.loc[screen_type_csv['UINumber'] == int(key)].Class.values
        dictionary[key] = naming_dict[info[0]]
    return dictionary, naming_dict

In [0]:
def get_images_labels(image_dir):
    dirs = os.listdir(image_dir)
    filenames = []
    for d in dirs: 
        images = os.listdir(image_dir+'/'+d)
        filenames.append([f.split('.')[0] for f in images if f.endswith('.jpg')])
    filenames = [item for sublist in filenames for item in sublist]
    return filenames

In [6]:
# Partition the data
label_dictionary, naming_dictionary = read_screen_labels(SCREEN_CSV, TRAIN_FOLDER)

partition = {}
# Load partitions - currently using "test" as validation
partition['train'] = get_images_labels(TRAIN_FOLDER)
partition['test'] = get_images_labels(TEST_FOLDER)

split = int(0.8 * len(partition['train']))
partition['validation'] = partition['train'][split:]
partition['train'] = partition['train'][:split]

print("UIs in train:", len(partition['train']))
print("UIs in validation:", len(partition['validation']))
print("UIs in test:", len(partition['test']))

FileNotFoundError: ignored

In [0]:
# Creating the model
model = Sequential()
# Layers
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(HEIGTH, WIDTH, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(96, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(32, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(11, activation='softmax'))

model.save('my_model.h5')
model.summary()

In [0]:
class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))

In [0]:
# Model optimizer - Adam
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Fit Model
# TODO?

# Data Generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    brightness_range=[0.5, 1.5],
    #featurewise_center=True
    #channel_shift_intensity=True,
    #featurewise_std_normalization=True,
    #shuffle=True
) 

test_datagen = ImageDataGenerator(rescale=1./255)

In [0]:
# Train/Test Generators
train_generator = train_datagen.flow_from_directory(TRAIN_FOLDER,
                                                    target_size=(HEIGTH, WIDTH),
                                                    color_mode="rgb",
                                                    batch_size=300,
                                                    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(TEST_FOLDER,
                                                  target_size=(HEIGTH, WIDTH),
                                                  color_mode="rgb",
                                                  batch_size=300,
                                                  class_mode='categorical'
                                                  )

In [0]:
hhistory = LossHistory()
history = model.fit_generator(
    train_generator,
    verbose=1,
    steps_per_epoch=(3507 + 877)// 300,
    epochs=20,
    validation_data=test_generator,
    use_multiprocessing=True,
    validation_steps=433 // 300,
    callbacks=[hhistory])

In [0]:
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

#see loss history (numbers)
print(hhistory.losses)
model.save('my_model.h5')