In [1]:
%load_ext autoreload
%autoreload 2

In [5]:
import pathlib
import os
import json
import tensorflow as tf
import numpy as np
import IPython.display as display
import matplotlib.pyplot as plt
from PIL import Image

In [13]:
# https://www.tensorflow.org/tutorials/load_data/images
AUTOTUNE = tf.data.experimental.AUTOTUNE

# data
data_dir = pathlib.Path("../data/processed")

In [22]:
metadata_file_path = str(data_dir.joinpath("Corn", "metadata.json"))
record_file_path = str(data_dir.joinpath("Corn", "images.tfrecord"))

In [23]:
with open(metadata_file_path) as json_file:
    metadata = json.load(json_file)

In [24]:
metadata

{'species': 'Corn',
 'num_classes': 4,
 'num_files': 2529,
 'class_names': ['Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot',
  'Corn_(maize)___Common_rust_',
  'Corn_(maize)___Northern_Leaf_Blight',
  'Corn_(maize)___healthy'],
 'created_date': '2020-01-25 23:37:28'}

In [25]:
# dataset
image_count = metadata["num_files"]
BATCH_SIZE = 32
IMG_HEIGHT = 256
IMG_WIDTH = 256
STEPS_PER_EPOCH = np.ceil(image_count / BATCH_SIZE)

In [26]:
# Decoder
def _parse_function(example_proto):
    # define your tfrecord again. Remember that you saved your image as a string.
    image_feature_description = {
        'image_raw': tf.io.FixedLenFeature([], tf.string),
        'label': tf.io.FixedLenFeature([], tf.int64),
        'label_text': tf.io.FixedLenFeature([], tf.string),
    }
    
    # Load one example
    parsed_features = tf.io.parse_single_example(example_proto, image_feature_description)
    
    # Parse image
    parsed_features["image"] = tf.image.decode_jpeg(parsed_features["image_raw"])
    #parsed_features["image"] = tf.reshape(image, [image.shape[2], image.shape[0], image.shape[1], 1])
    return parsed_features["image"], parsed_features["label"]


# Preprocessing: https://docs.databricks.com/applications/deep-learning/data-prep/tfrecords-to-tensorflow.html
def normalize(image, label):
    """Convert `image` from [0, 255] -> [-0.5, 0.5] floats."""
    image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
    return image, label


def create_dataset(files, batch_size):
    dataset = tf.data.TFRecordDataset(files)
    dataset = dataset.map(_parse_function, num_parallel_calls=4)
    dataset = dataset.map(normalize)
    dataset = dataset.repeat()
    dataset = dataset.batch(batch_size)
    iterator = dataset.make_one_shot_iterator()
    image, label = iterator.get_next()
    image = tf.reshape(image, [batch_size, 300, 300, 3])
    label = tf.one_hot(label, num_classes)
    return image, label

In [27]:
dataset = tf.data.TFRecordDataset(record_file_path)
dataset = dataset.map(_parse_function)
# Apply the normalize function
dataset = dataset.map(normalize)

In [28]:
help(tf.keras.Sequential)

Help on class Sequential in module tensorflow.python.keras.engine.sequential:

class Sequential(tensorflow.python.keras.engine.training.Model)
 |  Sequential(layers=None, name=None)
 |  
 |  Linear stack of layers.
 |  
 |  Arguments:
 |      layers: list of layers to add to the model.
 |  
 |  Example:
 |  
 |  ```python
 |  # Optionally, the first layer can receive an `input_shape` argument:
 |  model = Sequential()
 |  model.add(Dense(32, input_shape=(500,)))
 |  # Afterwards, we do automatic shape inference:
 |  model.add(Dense(32))
 |  
 |  # This is identical to the following:
 |  model = Sequential()
 |  model.add(Dense(32, input_dim=500))
 |  
 |  # And to the following:
 |  model = Sequential()
 |  model.add(Dense(32, batch_input_shape=(None, 500)))
 |  
 |  # Note that you can also omit the `input_shape` argument:
 |  # In that case the model gets built the first time you call `fit` (or other
 |  # training and evaluation methods).
 |  model = Sequential()
 |  model.add(Dense

In [29]:
help(tf.keras.layers.Conv2D)

Help on class Conv2D in module tensorflow.python.keras.layers.convolutional:

class Conv2D(Conv)
 |  Conv2D(filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs)
 |  
 |  2D convolution layer (e.g. spatial convolution over images).
 |  
 |  This layer creates a convolution kernel that is convolved
 |  with the layer input to produce a tensor of
 |  outputs. If `use_bias` is True,
 |  a bias vector is created and added to the outputs. Finally, if
 |  `activation` is not `None`, it is applied to the outputs as well.
 |  
 |  When using this layer as the first layer in a model,
 |  provide the keyword argument `input_shape`
 |  (tuple of integers, does not include the sample axis),
 |  e.g. `input_shape=(128, 128, 3)` for 128x

In [33]:
pathlib.Path(record_file_path).parent.joinpath("metadata.json")

PosixPath('../data/processed/Corn/metadata.json')

In [65]:
import src.data.io as io

In [38]:
metadata = io.read_metadata(metadata_file_path)

In [97]:
dataset = io.read_dataset(record_file_path, 32, metadata["num_classes"])

In [98]:
dataset

<BatchDataset shapes: ((None, 256, 256, 3), (None, 4)), types: (tf.float32, tf.float32)>

In [99]:
from src.models.train_model import model as create_model

In [105]:
model = create_model(metadata["num_classes"])

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_30 (Conv2D)           (None, 256, 256, 32)      896       
_________________________________________________________________
leaky_re_lu_40 (LeakyReLU)   (None, 256, 256, 32)      0         
_________________________________________________________________
max_pooling2d_30 (MaxPooling (None, 128, 128, 32)      0         
_________________________________________________________________
dropout_40 (Dropout)         (None, 128, 128, 32)      0         
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 128, 128, 64)      18496     
_________________________________________________________________
leaky_re_lu_41 (LeakyReLU)   (None, 128, 128, 64)      0         
_________________________________________________________________
max_pooling2d_31 (MaxPooling (None, 64, 64, 64)      

In [106]:
BATCH_SIZE

32

In [107]:
model.fit(dataset, steps_per_epoch=10, epochs=EPOCHS)

Train for 10 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7efb57dbddd0>

In [109]:
from sklearn.model_selection import train_test_split

In [110]:
from src.data import utils

In [120]:
image_file_paths = utils.get_image_paths("../data/raw", "Strawberry")
image_file_paths

['../data/raw/Strawberry___healthy/caa95a07-527a-4ceb-923a-40c717c872b8___RS_HL 1916.JPG',
 '../data/raw/Strawberry___healthy/275f8963-f4f4-4903-962b-1da716725d08___RS_HL 4780.JPG',
 '../data/raw/Strawberry___healthy/bb37a7ff-af9b-4db2-ab44-ccd9452e6bf6___RS_HL 2074.JPG',
 '../data/raw/Strawberry___healthy/3d28c3ea-8419-4e09-addd-211e3828e39f___RS_HL 1942.JPG',
 '../data/raw/Strawberry___healthy/6b871851-ec5f-4e79-b850-bfdaed9759c5___RS_HL 4446.JPG',
 '../data/raw/Strawberry___healthy/36fbdab5-6c76-4f04-8252-bfb944c20ddf___RS_HL 4788.JPG',
 '../data/raw/Strawberry___healthy/7cc70e2f-3727-428a-a414-02dd59f96ae3___RS_HL 4755.JPG',
 '../data/raw/Strawberry___healthy/3cffaab3-eb47-4ba7-8596-35445be017fc___RS_HL 2217.JPG',
 '../data/raw/Strawberry___healthy/1788478b-7aac-4ec3-b249-e1d145a39980___RS_HL 2172.JPG',
 '../data/raw/Strawberry___healthy/bf156be7-0764-4fdf-822a-568a93501825___RS_HL 1886.JPG',
 '../data/raw/Strawberry___healthy/dc05a60e-5939-4e59-bfee-890db52986cc___RS_HL 2014.JPG',

In [121]:
class_names = utils.get_class_names("../data/raw", "Strawberry")
class_names

array(['Strawberry___Leaf_scorch', 'Strawberry___healthy'], dtype='<U24')

In [123]:
labels = [utils.get_label(file_path, class_names) for file_path in image_file_paths]

In [124]:
train_files, test_files, train_labels, test_labels = train_test_split(image_file_paths, labels, test_size=0.2, random_state=13)

In [125]:
train_files, eval_files, train_labels, eval_labels = train_test_split(train_files, train_labels, test_size=0.2, random_state=13)

In [128]:
print(f"train: {len(train_files)}, eval: {len(eval_files)}, test: {len(test_files)}, total: {len(image_file_paths)}")

train: 1000, eval: 251, test: 313, total: 1564


In [129]:
testdata = {}

In [130]:
blah = {"name": 1}

In [131]:
{
    **blah
}

{'name': 1}

In [133]:
io.read_metadata(metadata_file_path)

{'species': 'Corn',
 'num_classes': 4,
 'class_names': ['Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot',
  'Corn_(maize)___Common_rust_',
  'Corn_(maize)___Northern_Leaf_Blight',
  'Corn_(maize)___healthy'],
 'created_date': '2020-01-26 04:07:55',
 'file_counts': {'train': 1618, 'test': 506, 'eval': 405}}