In [None]:
import os
import csv

from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np

from random import shuffle

font_file = '/root/bryton/aquabyte_sealice/Keyboard.ttf'
font = ImageFont.truetype(font_file, 20)

In [None]:
# load images

annotations_file = '/root/data/lice_data/annotations/training/annotations.csv'

f = open(annotations_file, 'rb')
reader = csv.reader(f)
lice_data = [ row[0].split() for row in reader]

#print lice_data[0]

labelsToTextDict = {
    0: 'AFLice',
    1: 'OLice',
    2: 'Lice',
    -1: 'Not_AFLice',
    -2: 'Not_OLice'
}

def labelsToText(label):
    return labelsToTextDict[label]

def classifyLice(label):
    if 'Not_AFLice' in label:
        return -1
    elif 'Not_OLice' in label:
        return -2
    elif 'Not_Olice' in label:
        return -2
    elif 'AFLice' in label:
        return 0
    elif 'OLice' in label:
        return 1
    elif 'Olice' in label:
        return 1
    elif 'Lice' in label:
        return 2
    else:
        print label
        
lice_data = [ (row[0], int(row[1]), int(row[2]), int(row[3]), int(row[4]), classifyLice(row[5])) for row in lice_data]

print lice_data[0]

In [None]:
widths = []
heights = []

for annotation in lice_data:
    image_filename, x1, y1, x2, y2, label = annotation

    w = x2 - x1
    h = y2 - y1
    
    widths.append(w)
    heights.append(h)

dimensions = np.array(list(zip(widths, heights)))
    
for percentile in xrange(0, 105, 5):
    width_percentile = np.percentile(widths, percentile)
    height_percentile = np.percentile(heights, percentile)
    
    print '%i: [%i, %i]' % (percentile, width_percentile, height_percentile)
    
width_percentile = np.percentile(widths, 95)
height_percentile = np.percentile(heights, 95)

subset = np.where((dimensions[:,0] >= width_percentile) & (dimensions[:,1] >= height_percentile))

print 'Only %i annotations that are in the 95th percentile for width and height' % (len(subset[0]), )

plt.hist(widths)
plt.title('Widths')
plt.show()

plt.hist(heights)
plt.title('Heights')
plt.show()

In [None]:
sample_annotations = [ lice_data[index] for index in subset[0] ]

# This is the one I had send to Preben previously
#print lice_data[100]

f, ax = plt.subplots(7, 2, figsize = (20, 10))

for index, sample_annotation in enumerate(sample_annotations):
    image_filename, x1, y1, x2, y2, label = sample_annotation

    ax[index / 2, index % 2].imshow(np.array(Image.open(image_filename)))

    rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=1, edgecolor='r', facecolor='none')

    ax[index / 2, index % 2].add_patch(rect)
    
    labelText = labelsToText(label)
    
    ax[index / 2, index % 2].set_title(labelText)

    print sample_annotation, labelText
    
plt.tight_layout()
plt.show()

In [None]:
output_directory = '/root/bryton/aquabyte_sealice/preben_output'

try: 
    os.makedirs(output_directory)
except OSError:
    if not os.path.isdir(output_directory):
        raise
        
for index, sample_annotation in enumerate(sample_annotations):
    image_filename, x1, y1, x2, y2, label = sample_annotation
    
    labelText = labelsToText(label)
    
    split_name = image_filename.split('/')
    print split_name[6].split('.')[0]
    
    image = Image.open(image_filename)
    
    draw = ImageDraw.Draw(image)
    
    draw.text((x1, y1 - 10), labelText, (255,255,0), font = font)
    draw.rectangle(((x1, y1), (x2, y2)), outline="red")
   
    output_file = '%s/%s.jpg' % (output_directory, split_name[6].split('.')[0])
    
    image.save(output_file)

In [None]:
shuffle(lice_data)

for label in [-2, -1, 0, 1, 2]:
    print 'Percent %s: %0.2f' % (labelsToText(label), np.average([1 if row[5] == label else 0 for row in lice_data]))

percent_train = 0.7
threshold = int(percent_train * len(lice_data))

print '%i out of %i' % (threshold, len(lice_data))

trainSet = lice_data[:threshold]
testSet = lice_data[(threshold + 1):]

for annotation in trainSet:
    pass # train the model here

'''
Assume we train something here
'''

def classifyPatch(x1, y1, x2, y2):
    return 1


In [None]:
'''
Validate it on the test set
'''

results = []

for annotation in testSet:
    image_filename, x1, y1, x2, y2, true_label = annotation
    predicted_label = classifyPatch(x1, y1, x2, y2)
    results.append(predicted_label == true_label)
    
print np.mean(results)