In [None]:
import gc
import importlib
import random
import pathlib
import os
import time

import numpy as np
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import seaborn as sns

import PIL
import skimage
from skimage import exposure, img_as_float
import cv2
import tensorflow as tf
#tf.enable_eager_execution()

In [None]:
%matplotlib inline

In [None]:
#tf.enable_eager_execution()

In [None]:
def select_objects(indexes_list, objects_names):
    return tuple(objects_names[i] for i in indexes_list)

In [None]:
def select_random_indexses_subset(size, subset_size):
    return random.sample(tuple(range(size)), subset_size)

In [None]:
def random_objects_select(objects_names, subset_size):
    objects_names_len = len(objects_names)
    indexes = select_random_indexses_subset(objects_names_len, subset_size)
    return select_objects(indexes, objects_names)

In [None]:
def compute_whole_image_mean_brightnes(image):
    return image.mean()

In [None]:
def show_images_grid(images_list, columns_num, figsize, images_names_list=None):
    rows_num = len(images_list) // columns_num
    fig, ax = plt.subplots(rows_num, columns_num, figsize=figsize)
    images_list_len = len(images_list)
    slice_len = columns_num
    images_list_slices_len = images_list_len // slice_len
    images_names_list_slices = []
    images_list_slices = [
        images_list[i * slice_len:(i+1) * slice_len]\
        for i in range(images_list_slices_len)
    ]
    if images_names_list:
        images_names_list_slices = [
            images_names_list[i * slice_len:(i+1) * slice_len]\
            for i in range(images_list_slices_len)
        ]
    #print("show_images_grid, images_names_list_slices:\n", images_names_list_slices)
    for i in range(ax.shape[0]):
        images_list_slice = images_list_slices[i]
        images_names_list_slice = []
        if images_names_list:
            images_names_list_slice = images_names_list_slices[i]
        #images_list_0 = [images_pair[0] for images_pair in paired_images_list_slice]
        #images_list_1 = [images_pair[1] for images_pair in paired_images_list_slice]
        for j in range(columns_num):
            if images_names_list:
                #print("show_images_grid, images_names_list_slice[{}]:\n".format(j), images_names_list_slice[j])
                ax[i, j].set_title(images_names_list_slice[j])            
            ax[i, j].imshow(images_list_slice[j])

            #ax[2 * i + 1, j].imshow(images_list_1[j])
    plt.tight_layout()
    plt.show()

In [None]:
def draw_images_with_histograms(images_list, columns_num, figsize, nbins):
    rows_num = 3 * len(images_list) // columns_num
    fig, ax = plt.subplots(rows_num, columns_num, figsize=figsize)
    images_list_len = len(images_list)
    slice_len = columns_num
    images_list_slices_len = images_list_len // slice_len
    images_list_slices = [
    images_list[i * slice_len:(i+1) * slice_len]\
        for i in range(images_list_slices_len)
    ]
    for i in range(ax.shape[0] // 3):
        images_list_slice = images_list_slices[i]
        for j in range(columns_num):
            ax[3 * i, j].imshow(images_list_slice[j])
            ax[3 * i, j].grid(False)
            img_cdf, cdf_bins = exposure.cumulative_distribution(images_list_slice[j].flatten())
            ax[3 * i + 1, j].hist(exposure.histogram(images_list_slice[j].flatten(), nbins=nbins))
            ax[3 * i + 2, j].plot(cdf_bins, img_cdf, color='green')
    plt.show()

In [None]:
ls ../input/

In [None]:
train_images_dir = '../input/train/'
test_images_dir = '../input/test'

In [None]:
#train_images_names = os.listdir(train_images_dir)
train_images_names = pathlib.Path().glob('../input/train/*')
for img_name in tuple(train_images_names)[:10]:
    print(img_name)

In [None]:
train_df = pd.read_csv('../input/train.csv')

In [None]:
train_df.info()

In [None]:
train_df.describe()

In [None]:
train_df.head()

In [None]:
#image_ids_and_class_ids = train_df['ImageId_ClassId'].map(lambda x: x.split('_'))

In [None]:
#type(image_ids_and_class_ids)

In [None]:
#image_ids_and_class_ids.head()

In [None]:
#type(image_ids_and_class_ids[0])

In [None]:
# image_descriptions_df = pd.DataFrame(
#     {'image_id': image_ids_and_class_ids[:][0], 'class_id': image_ids_and_class_ids[:][1]})

In [None]:
#image_descriptions_df.head()

In [None]:
image_descriptions_list = [row.split('_') for row in train_df['ImageId_ClassId']]

In [None]:
image_descriptions_list[:10]

In [None]:
train_df['image_name'] = [image_name for image_name, _ in image_descriptions_list]

In [None]:
train_df.head()

In [None]:
train_df['image_class'] = [image_class for _, image_class in image_descriptions_list]

In [None]:
train_df.head()

In [None]:
all([len(image_class) == 1 for _, image_class in image_descriptions_list])

In [None]:
#train_image_names = [image_class for _, image_class in image_descriptions_list]
len(train_image_names)

In [None]:
train_image_names[:4]

In [None]:
#full_image_path = pathlib.Path.joinpath(pathlib.Path(train_images_dir), pathlib.Path(train_image_names[0]))

In [None]:
#full_image_path.as_posix()

In [None]:
train_images_dict = {
    image_name: cv2.imread(os.path.join(train_images_dir, image_name)) for image_name in train_image_names
}

In [None]:
train_images_num = train_df.shape[0]
train_images_names_sample = random_objects_select(tuple(train_image_names), int(train_images_num / 4))
print(type(train_images_names_sample))
print(train_images_names_sample[:5])

In [None]:
train_images = tuple(train_images_dict.values())

In [None]:
#train_images[0]

In [None]:
images_subsample = [train_images_dict[img_name] for img_name in train_images_names_sample[:40]]

In [None]:
#images_subsample = [train_images_dict[img_name] for img_name in image_names_subsample]

In [None]:
show_images_grid(images_subsample, 8, (24, 24), images_names_list=train_images_names_sample[:40])

In [None]:
draw_images_with_histograms(images_subsample, 8, (24, 24), 20)

In [None]:
train_images_mean_brightness = [
    compute_whole_image_mean_brightnes(image) for image in train_images
]

In [None]:
fig = plt.figure(figsize=(24, 24))
ax = fig.add_subplot(111)
ax.hist(train_images_mean_brightness, bins=600)
ax.set_title("Mean brightnes distribution for train images")
#ax.set(title="Mean brightnes distribution for images with chips")
ax.set_xlabel("Mean brightness")
ax.set_ylabel("Num of images")
plt.show()

In [None]:
print(min(train_images_mean_brightness))
print(max(train_images_mean_brightness))

In [None]:
print(len(train_images_mean_brightness))

In [None]:
set_train_images_mean_brightness = set(train_images_mean_brightness)

In [None]:
print(len(set_train_images_mean_brightness))

In [None]:
train_df.head()

In [None]:
images_name = train_df['image_name']
images_class = train_df['image_class']

In [None]:
images_name[0]

In [None]:
#img_tensor = tf.image.decode_image(img_raw)
img_raw = tf.io.read_file(train_images_dir + images_name[0])

In [None]:
type(img_raw)

In [None]:
img_tensor = tf.image.decode_image(img_raw)

In [None]:
print(img_tensor.shape)
print(img_tensor.dtype)

In [None]:
#el X, train_images_dict
gc.collect()

In [None]:
def preprocess_image(image):
    #image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.decode_jpeg(image, channels=1)
    #image = tf.image.resize(image, [192, 192])
    image /= 255.0  # normalize to [0,1] range
    return image

In [None]:
img_names_train, img_names_holdout, img_clasess_train, img_classes_holdout = train_test_split(
    images_name,
    images_class,
    test_size=0.25,
    random_state=42
)

In [None]:
def read_image(filename, image_class):
    image_decoded = cv2.imread(filename.decode(), cv2.IMREAD_GRAYSCALE)
    return image_decoded, image_class

In [None]:
#def parse_function(filename, label):
def read_image(filename, label):
    image_string = tf.read_file(filename)

    # Don't use tf.image.decode_image, or the output shape will be undefined
    #image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.image.decode_jpeg(image_string, channels=1)

    # This will convert to float values in [0, 1]
    image = tf.image.convert_image_dtype(image, tf.float32)

    #image = tf.image.resize_images(image, [64, 64])
    return resized_image, label

In [None]:
train_dataset_names = tf.data.Dataset.from_tensor_slices((img_names_train, img_clasess_train))

In [None]:
validation_dataset_names = tf.data.Dataset.from_tensor_slices((img_names_holdout, img_classes_holdout))

In [None]:
train_dataset = train_dataset_names.map(
    lambda img_name, img_class: tuple(
        tf.py_func(
            read_image,
            [img_name, img_class],
            [tf.float32, img_class.dtype]
        )
    )
)

In [None]:
validation_dataset = validation_dataset_names.map(
    lambda img_name, img_class: tuple(
        tf.py_func(
            read_image,
            [img_name, img_class],
            [tf.float32, img_class.dtype]
        )
    )
)

In [None]:
n_inputs = 1600 * 250
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 5

In [None]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.int64, shape=(None, ), name='y')

In [None]:
with tf.name_scope('dnn'):
    hidden1 = tf.layers.dense(X, n_hidden1, name='hidden1', activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name='hidden2', activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name='outputs')

In [None]:
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

In [None]:
learning_rate = 0.01
with tf.name_scope('train'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [None]:
with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [None]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [None]:
n_epoch = 10
batch_size = 64
batch_nums = len(img_names_train) // batch_size
valid_batch_nums = len(img_names_holdout) // batch_size

In [None]:
batched_train_dataset = train_dataset.batch(batch_size)
batched_valid_dataset = validation_dataset.batch(batch_size)

In [None]:
train_iterator = batched_train_dataset.make_initializable_iterator()
valid_iterator = batched_valid_dataset.make_initializable_iterator()

In [None]:
next_train_element = train_iterator.get_next()
next_valid_element = valid_iterator.get_next()

In [None]:
with tf.Session() as sess:
    #tf.enable_eager_execution()
    init.run()
    acc_train = 0
    acc_valid = 0
    for epoch in range(n_epoch):
        sess.run(train_iterator.initializer)
        sess.run(valid_iterator.initializer)
        for iteration in range(batch_nums):
            #X_batch, y_batch = train_batch_iterator()
            #X_batch, y_batch = sess.run(next_train_element)
            sess.run(next_train_element)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            acc_train += accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        for _ in range(valid_batch_nums):
            X_valid, y_valid = sess.run(next_valid_element)
            acc_valid += accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Train accuracy:", acc_train / batch_nums, "Test accuracy:", acc_test / valid_batch_nums)
    save_path = saver.save(sess, "./simple_nn.ckpt")