# Image classification training on a DEBIAI project with a dataset generator

This tutorial shows how to classify images of flowers after inserting the project contextual into DEBIAI.

Based on the tensorflow tutorial : https://www.tensorflow.org/tutorials/images/classification

In [14]:
# Import TensorFlow and other libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import PIL
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

# The pythonModule folder need to be in the same folder 
from debiai import debiai

## Download and explore the dataset

This tutorial uses a dataset of about 3,700 photos of flowers. The dataset contains 5 sub-directories, one per class:

daisy, dandelion, roses, sunflowers and tulips


In [15]:
import pathlib
dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
data_dir = tf.keras.utils.get_file('flower_photos', origin=dataset_url, untar=True)
data_dir = pathlib.Path(data_dir)


## Create a dataset

In [16]:
# Define some parameters for the loader:

batch_size = 32
img_height = 180
img_width = 180

In [17]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 3670 files belonging to 5 classes.
Using 2936 files for training.


In [18]:
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)


Found 3670 files belonging to 5 classes.
Using 734 files for validation.


In [19]:
class_names = train_ds.class_names
print(class_names)

['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']


In [20]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

## Insert the project contextual data in DEBIAI

In [34]:
# Creation of the DEBIAI project block structure
DEBIAI_block_structure = [
    {
        "name": "image_id",
        "groundTruth": [
            { "name": "class",           "type": "text"},
        ],
        "contexts": [
            { "name": "img_path",        "type": "text"},
        ]
    }
]

#### Converting some of the project data in a dataframe

In this exemple, it is done with the creation of a dataframe

more details here : 
https://git.irt-systemx.fr/ML/DEBIAI/pythonModule#adding-samples

In [35]:
# Creation of a dataframe with the same columns as the block structure
data = {"image_id": [], "class": [], "img_path": []}
i = 0
for class_name in class_names:
    images = list(data_dir.glob(class_name + '/*'))

    for image in images:
        data["image_id"].append(i)
        data["class"].append(class_name)
        data["img_path"].append(str(image))
        i += 1

df = pd.DataFrame(data=data)
df

Unnamed: 0,image_id,class,img_path
0,0,daisy,/home/tomansion/.keras/datasets/flower_photos/...
1,1,daisy,/home/tomansion/.keras/datasets/flower_photos/...
2,2,daisy,/home/tomansion/.keras/datasets/flower_photos/...
3,3,daisy,/home/tomansion/.keras/datasets/flower_photos/...
4,4,daisy,/home/tomansion/.keras/datasets/flower_photos/...
...,...,...,...
3665,3665,tulips,/home/tomansion/.keras/datasets/flower_photos/...
3666,3666,tulips,/home/tomansion/.keras/datasets/flower_photos/...
3667,3667,tulips,/home/tomansion/.keras/datasets/flower_photos/...
3668,3668,tulips,/home/tomansion/.keras/datasets/flower_photos/...


In [36]:
# Creation of a DEBIAI instance
DEBIAI_BACKEND_URL = 'http://localhost:3000/'
DEBIAI_PROJECT_NAME = 'Image classification demo'
my_debiai = debiai.Debiai(DEBIAI_BACKEND_URL)

In [37]:
# Creation of a DEBIAI project if it doesn't exist
debiai_project = my_debiai.get_project(DEBIAI_PROJECT_NAME)

if not debiai_project :
    debiai_project = my_debiai.create_project(DEBIAI_PROJECT_NAME)

debiai_project

DEBIAI project : 'Image classification demo'

In [38]:
# Set the project block_structure if not already done
if not debiai_project.block_structure_defined():
    debiai_project.set_blockstructure(DEBIAI_block_structure)
debiai_project.get_block_structure()

[{'contexts': [{'name': 'img_path', 'type': 'text'}],
  'groundTruth': [{'name': 'class', 'type': 'text'}],
  'name': 'image_id'}]

In [39]:
# Adding the dataframe
debiai_project.add_samples_pd(df, get_hash=False)

True

## Create the model

In [28]:
num_classes = len(class_names)

model = Sequential([
  layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes)
])


In [29]:
# Compile the model

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rescaling (Rescaling)        (None, 180, 180, 3)       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 180, 180, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 90, 90, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 90, 90, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 45, 45, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 45, 45, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 22, 22, 64)        0

## Train the model with the DEBIAI Dataset generator

In [31]:
# Because DEBIAI doesn't have the images to train the models, we will provide them with a function that take a sample information based on the given block_structure

def model_input_from_debiai_sample(debiai_sample: dict):
    # "image_id", "class", "img_path"
    img = keras.preprocessing.image.load_img(
        debiai_sample['img_path'], target_size=(img_height, img_width))
    img_array = keras.preprocessing.image.img_to_array(img)
    return tf.expand_dims(img_array, 0)  # Create a batch

In [32]:
# TF generated dataset 
train_dataset_imported = debiai_project.get_tf_dataset_with_provided_inputs(
    model_input_from_debiai_sample,
    output_types=(tf.float32, tf.int32),
    output_shapes=([None, img_height, img_width, 3], [1, ]),
    classes=class_names
)
AUTOTUNE = tf.data.AUTOTUNE
train_dataset_imported = train_dataset_imported.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)

# get_tf_dataset_with_provided_inputs Also work with a selection

In [33]:
# Train the model
epochs = 3
model.fit(train_dataset_imported, epochs=epochs)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7febfc637a30>