<a href="https://colab.research.google.com/github/agiagoulas/page-stream-segmentation/blob/master/model_training/ImageModel_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup & Imports

Connect to Google Drive when working in Google Colab

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


Set working directory 

In [2]:
working_dir = "/Tobacco800/"

Imports

In [3]:
import csv, re, math
import sklearn.metrics as sklm
import numpy as np
import requests
import tensorflow as tf

from keras.callbacks import ModelCheckpoint
from importlib import reload
from keras.utils import Sequence
from keras.models import Sequential, Model
from keras.layers import *
from keras.utils import *
from keras.callbacks import ModelCheckpoint, Callback

Private Imports

In [4]:
model_img_request = requests.get("https://raw.githubusercontent.com/agiagoulas/page-stream-segmentation/master/app/pss/model_img.py")
with open("model_img.py", "w") as f:
    f.write(model_img_request.text)
import model_img

Set model_img properties

In [6]:
img_dim = (224,224)
model_img.img_path_template = working_dir + "Tobacco800_Small/%s.png"

Load Tobacco800 Data

In [7]:
data_train = model_img.read_csv_data(working_dir + "tobacco800.train")
data_test = model_img.read_csv_data(working_dir + "tobacco800.test")

# Single Page Model Trainnig

Model Training

In [None]:
n_repeats = 10
n_epochs = 20
single_page_metric_history = []
optimize_for = 'kappa'

with tf.device('/GPU:0'):
  for i in range(n_repeats):
      print("Repeat " + str(i+1) + " of " + str(n_repeats))
      print("-------------------------")
      model_singlepage = model_img.compile_model_singlepage(img_dim)
      model_file = working_dir + "tobacco800_image_single-page_%02d.hdf5" % (i,)
      checkpoint = model_img.ValidationCheckpoint(model_file, data_test, img_dim, metric=optimize_for)
      model_singlepage.fit(model_img.ImageFeatureGenerator(data_train, img_dim, prevpage=False, train=True),
                      callbacks = [checkpoint],
                      epochs = n_epochs)
      single_page_metric_history.append(checkpoint.max_metrics)

print(single_page_metric_history)

Show metric results from different models

In [None]:
for i, r in enumerate(single_page_metric_history):
    model_file = working_dir + "tobacco800_image_single-page_%02d.hdf5" % (i)
    print(str(i) + ' ' + str(r['kappa']) + ' ' + str(r['accuracy']) + ' ' + str(r['f1_micro']) + ' ' + str(r['f1_macro']) + ' ' +  model_file)

Load model and generate prediction

In [None]:
model_singlepage = model_img.compile_model_singlepage(img_dim)
model_singlepage.load_weights(working_dir + "tobacco800_image_single-page_00.hdf5")

y_predict = np.round(model_singlepage.predict(model_img.ImageFeatureGenerator(data_test, img_dim, prevpage=False, train=True)))
y_true = [model_img.LABEL2IDX[x[3]] for x in data_test]
print("Accuracy: " + str(sklm.accuracy_score(y_true, y_predict)))
print("Kappa: " + str(sklm.cohen_kappa_score(y_true, y_predict)))
print("F1 Micro " + str(sklm.f1_score(y_true, y_predict, average='micro')))
print("F1 Macro " + str(sklm.f1_score(y_true, y_predict, average='macro')))

# Current & Prev Page Model Training

Model Training

In [None]:
n_repeats = 10
n_epochs = 20
prev_page_metric_history = []
optimize_for = 'kappa'

with tf.device('/GPU:0'):
  for i in range(n_repeats):
      print("Repeat " + str(i+1) + " of " + str(n_repeats))
      print("-------------------------")
      model_prevpage = model_img.compile_model_prevpage(img_dim)
      model_file = working_dir + "tobacco800_image_prev-page_%02d.hdf5" % (i,)
      checkpoint = model_img.ValidationCheckpoint(model_file, data_test, img_dim, prev_page_generator=True, metric=optimize_for)
      model_prevpage.fit(model_img.ImageFeatureGenerator(data_train, img_dim, prevpage=True, train=True),
                      callbacks = [checkpoint],
                      epochs = n_epochs)
      prev_page_metric_history.append(checkpoint.max_metrics)

print(prev_page_metric_history)

Show metric results from different models

In [None]:
for i, r in enumerate(prev_page_metric_history):
    model_file = working_dir + "tobacco800_image_prev-page_%02d.hdf5" % (i)
    print(str(i) + ' ' + str(r['kappa']) + ' ' + str(r['accuracy']) + ' ' + str(r['f1_micro']) + ' ' + str(r['f1_macro']) + ' ' +  model_file)

Load model and generate prediction

In [None]:
model_prevpage = model_img.compile_model_prevpage(img_dim)
model_prevpage.load_weights(working_dir + "tobacco800_image_prev-page_%02d.hdf5")

y_predict = np.round(model_prevpage.predict(model_img.ImageFeatureGenerator(data_test, img_dim, prevpage=True, train=True)))
y_true = [model_img.LABEL2IDX[x[3]] for x in data_test]
print("Accuracy: " + str(sklm.accuracy_score(y_true, y_predict)))
print("Kappa: " + str(sklm.cohen_kappa_score(y_true, y_predict)))
print("F1 Micro " + str(sklm.f1_score(y_true, y_predict, average='micro')))
print("F1 Macro " + str(sklm.f1_score(y_true, y_predict, average='macro')))