# Evaluation of Images of Lines from Writer

1. Set the DATA_FOLDER
2. Set the OUTPUT_FILE_NAME
3. Set the CUDA properties
4. Run the jupyter cells.

Output is in a file called OUTPUT_FILE_NAME in the DATA_FOLDER

In [1]:
DATA_FOLDER='/data/ta2_transcripts_drops/all_images/'
OUTPUT_FOLDER='/data/ta2_transcripts_drops/'
OUTPUT_FILE_NAME='m18_baseline.txt'
OUTPUT_PRED_FILE_NAME='m18_baseline_predictions.txt'

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"]="1"; 

In [3]:
from __future__ import division
import numpy as np
import os
from keras.utils import to_categorical, np_utils
import pandas as pd



In [4]:
from model import build_model
num_classes = 50
model = build_model(num_classes)

In [5]:
model.load_weights('writer_id_checkpoint.hdf5')

In [6]:
from data_prep import prep_sample

# get slices of normalized image
def generate_predict_data(filename):
    images = []
    targets = []
    prep_sample(filename, 0, images, targets)

    # trim image slices to only see section with road
    image_arrays = np.array(images)

    #reshape image_arrays for feeding in later
    image_arrays = image_arrays.reshape(image_arrays.shape[0], 113, 113, 1)
    #convert to float and normalize
    image_arrays = image_arrays.astype('float32')
    image_arrays /= 255
            
    return image_arrays

In [7]:
from scipy import stats
folder = DATA_FOLDER
# do not reprocess existing predicitons.
# this allows restart if the program stops mid processing OR new files are added to the source folder
if not os.path.exists(os.path.join(OUTPUT_FOLDER,OUTPUT_PRED_FILE_NAME)):
        openwith = 'w'
else:
        openwith = 'r+'
with open(os.path.join(OUTPUT_FOLDER,OUTPUT_PRED_FILE_NAME), openwith) as fp_pred:
    with open(os.path.join(OUTPUT_FOLDER,OUTPUT_FILE_NAME),openwith) as fp:
        if openwith[0] == 'r':
            fp_pred.readlines()
            ids = set([l.split(',')[0] for l in fp.readlines()])
            print(len(ids))
        else:
            ids = set()
        for root, dirs, files in os.walk(folder, topdown=False):
                for file in files:
                    if file in ids:
                        continue
                    if file.endswith('png') or file.endswith('jpg'):
                        predictions = model.predict_generator(generate_predict_data(os.path.join(root, file)))
                        # return multiple predictions using a sliding window over imag
                        # two approaches: take the mean or take the highest count argmax.
                        # subtle differences.
                        mu = np.mean(predictions,axis=0)
                        mustring = ','.join(f'{x:0.3f}' for x in mu) 
                        fp_pred.write(f'{file},{mustring},{0.0}\n')
                        #(cls,cnt) = np.unique(np.argmax(predictions,axis=1), return_counts=True)
                        best = np.argmax(mu)#cls[np.argmax(cnt)]
                        fp.write(f'{file},{best}\n')

155270
Instructions for updating:
Please use Model.predict, which supports generators.


In [22]:
predictions = model.predict_generator(generate_predict_data(os.path.join(root, '5f475c8c-4969-4323-9385-d7aaca7bfaf4.png')))
predictions

array([[1.11110801e-08, 5.07360312e-07, 2.15013701e-11, ...,
        1.79138375e-08, 1.23029267e-08, 3.20049026e-06],
       [6.78752012e-16, 1.84718647e-12, 4.39766967e-26, ...,
        1.33067632e-16, 2.37447284e-20, 9.19422050e-09],
       [3.30698385e-11, 3.65128994e-09, 6.36942643e-20, ...,
        2.47365791e-11, 1.21281205e-17, 1.00410728e-06],
       ...,
       [1.58392666e-07, 1.15191324e-04, 6.84207913e-10, ...,
        1.67082835e-05, 2.45408867e-08, 1.96581264e-03],
       [2.84861240e-12, 1.46530574e-08, 4.21933431e-19, ...,
        3.84590363e-12, 2.25955330e-16, 3.58878096e-05],
       [1.18628503e-08, 8.58587157e-07, 7.26094261e-13, ...,
        5.19203258e-09, 1.38276973e-10, 3.16180594e-05]], dtype=float32)

In [14]:
file

'8f0d5a82-5e2e-4ee1-9f89-3080654ffec4.png'