In [17]:

# Common imports

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
from PIL import Image

from tensorflow.python.framework import random_seed


from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from numpy import array


# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "cnn"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)
os.chdir("/data/MIDS/w207/final/data")

In [5]:


train_labels = pd.read_csv("train_data/train_labels.csv")['invasive'].values


2295

In [54]:
def load_images(indices):
    images= np.zeros(shape=(len(indices), 866, 1154, 3), dtype=np.uint8)
    for p, i in enumerate(indices):
        im = Image.open("train_data/"+str(i+1)+".jpg")
    # open image file and store in variable `im`, then
        images[p] = np.array(im)
    images = images.astype(np.float32)
    images = np.multiply(images, 1.0 / 255.0)

    return images


In [70]:
class DataSet(object):

  def __init__(self,
               image_indices,
               labels,
               seed=None):
    seed1, seed2 = random_seed.get_seed(seed)
    # If op level seed is not set, use whatever graph level seed is returned
    np.random.seed(seed1 if seed is None else seed2)
    self._num_examples = len(image_indices)
    self._image_indices = image_indices
    self._labels = labels
    self._epochs_completed = 0
    self._index_in_epoch = 0

  @property
  def image_indices(self):
    return self._image_indices

  @property
  def labels(self):
    return self._labels

  @property
  def num_examples(self):
    return self._num_examples

  @property
  def epochs_completed(self):
    return self._epochs_completed

  def load_images(indices):

        images= np.zeros(shape=(len(indices), 866, 1154, 3), dtype=np.uint8)
        for i in indices:
            im = Image.open("train_data/"+str(i+1)+".jpg")
        # open image file and store in variable `im`, then
            images[i] = np.array(im)
        images = images.astype(np.float32)
        images = np.multiply(images, 1.0 / 255.0)

        return images

  def next_batch(self, batch_size, shuffle=True):
    """Return the next `batch_size` examples from this data set."""
    start = self._index_in_epoch
    # Shuffle for the first epoch
    if self._epochs_completed == 0 and start == 0 and shuffle:
      perm0 = np.arange(self._num_examples)
      np.random.shuffle(perm0)
      self._image_indices = self.image_indices[perm0]
      self._labels = self.labels[perm0]
    # Go to the next epoch
    if start + batch_size > self._num_examples:
      # Finished epoch
      self._epochs_completed += 1
      # Get the rest examples in this epoch
      rest_num_examples = self._num_examples - start
      images_rest_part = self._image_indices[start:self._num_examples]
      labels_rest_part = self._image_indices[start:self._num_examples]
      # Shuffle the data
      if shuffle:
        perm = np.arange(self._num_examples)
        np.random.shuffle(perm)
        self._image_indices = self.image_indices[perm]
        self._labels = self.labels[perm]
      # Start next epoch
      start = 0
      self._index_in_epoch = batch_size - rest_num_examples
      end = self._index_in_epoch
      images_new_part = self._image_indices[start:end]
      labels_new_part = self._labels[start:end]
      return load_images(np.concatenate((images_rest_part, images_new_part), axis =0)), np.concatenate((labels_rest_part, labels_new_part), axis=0)
    else:
      self._index_in_epoch += batch_size
      end = self._index_in_epoch
      return load_images(self._image_indices[start:end]), self._labels[start:end]

   
        

In [71]:
y_train = train_labels[:1600]
y_dev = train_labels[1600:]
train_data = DataSet(np.arange(0,1600), y_train)
test_data = DataSet(np.arange(1600,len(train_labels)), y_dev)

In [72]:
height = 866
width = 1154
channels = 3
n_inputs = height * width
batch_size=100

conv1_fmaps = 32
conv1_ksize = 3
conv1_stride = 1
conv1_pad = "SAME"

conv2_fmaps = 64
conv2_ksize = 3
conv2_stride = 2
conv2_pad = "SAME"

pool3_fmaps = conv2_fmaps

n_fc1 = 64
n_outputs = 10

reset_graph()

with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, height, width, channels], name="X")
    X_resized = tf.image.resize_images(X, [800, 800])

    y = tf.placeholder(tf.int32, shape=[None], name="y")

conv1 = tf.layers.conv2d(X_resized, filters=conv1_fmaps, kernel_size=conv1_ksize,
                         strides=conv1_stride, padding=conv1_pad,
                         activation=tf.nn.relu, name="conv1")
conv2 = tf.layers.conv2d(conv1, filters=conv2_fmaps, kernel_size=conv2_ksize,
                         strides=conv2_stride, padding=conv2_pad,
                         activation=tf.nn.relu, name="conv2")

with tf.name_scope("pool3"):
    pool3 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
    pool3_flat = tf.reshape(pool3, shape=[-1, pool3_fmaps * 200 * 200])
  

with tf.name_scope("fc1"):
    fc1 = tf.layers.dense(pool3_flat, n_fc1, activation=tf.nn.relu, name="fc1")

with tf.name_scope("output"):
    logits = tf.layers.dense(fc1, n_outputs, name="output")
    Y_proba = tf.nn.softmax(logits, name="Y_proba")

with tf.name_scope("train"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy)
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

with tf.name_scope("init_and_save"):
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()



In [55]:
X_test, y_test = test_data.next_batch(300)


In [None]:
n_epochs = 25
batch_size = 100

with tf.Session(config = tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1)) as sess:

    init.run()
    for epoch in range(n_epochs):
        for iteration in range(train_data.num_examples // batch_size):
            X_batch, y_batch = train_data.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y:y_test})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

        save_path = saver.save(sess, "./my_isd_model")

0 Train accuracy: 0.62 Test accuracy: 0.64
1 Train accuracy: 0.73 Test accuracy: 0.713333
2 Train accuracy: 0.8 Test accuracy: 0.713333
3 Train accuracy: 0.84 Test accuracy: 0.82
4 Train accuracy: 0.85 Test accuracy: 0.793333
