In [None]:
from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils
import json
import os
import  pandas as pd
import matplotlib.patches as patches
from tqdm import tqdm
from utils import utils, data_prep

import matplotlib.pyplot as plt
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)

In [None]:
%matplotlib inline

from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from random import randint, seed

In [None]:
SEED = 33
CROP_WIDTH = 512
CROP_HEIGHT = 512


LABEL_PATH = 'data/lice_crop_labels'
IMAGE_PATH = 'data/lice_crop_images'

In [None]:
credentials = json.load(open("/root/jane/cv_research/jane/deploy/data_warehouse_sql_credentials.json"))
rds_access_utils = RDSAccessUtils(credentials)

get_annotation_data = """
    SELECT 
        * 
    FROM 
        prod.crop_annotation 
    WHERE 
        (captured_at BETWEEN '2019-10-01' AND '2019-12-01') AND 
        (group_id = '56') AND
        (annotation_state_id IN (7)) AND
        (service_id = 1);
"""
annotation_data = rds_access_utils.extract_from_database(get_annotation_data)

In [None]:
annotation_data.shape

In [None]:
annotation_data[annotation_data['annotation'].notnull()].shape # has lice

In [None]:
annotation_data[annotation_data['annotation'].notnull()].head(2)

In [None]:
def add_box(sf, ax):
    if sf['annotation'] is not None:
        for lice in sf['annotation']:
            lp = lice['position'] #lice position
            if lice['category'] == 'ADULT_FEMALE':
                ec = 'b'
                x, y, w, h = lp["left"], lp["top"], lp["width"], lp["height"]
                rect = patches.Rectangle((x, y), w, h,linewidth=1,edgecolor=ec,facecolor='none')
                ax.add_patch(rect)

In [None]:
df_ad = data_prep.get_df_ad(annotation_data)

In [None]:
df_ad.shape

In [None]:
df_ad.head(10)

In [None]:
im_width_list = [lcm['width'] for lcm in df_ad['left_crop_metadata']]
im_height_list = [lcm['height'] for lcm in df_ad['left_crop_metadata']]


lice_width_list = [lice['position']['width'] for a in df_ad['annotation'] for lice in a]
lice_height_list = [lice['position']['height'] for a in df_ad['annotation'] for lice in a]

lice_top_list = [lice['position']['top'] for a in df_ad['annotation'] for lice in a]
lice_left_list = [lice['position']['left'] for a in df_ad['annotation'] for lice in a]

In [None]:


figure, axes = plt.subplots(nrows=2, ncols=2, figsize=(20, 20))



axes[0, 0].hist(im_width_list, bins=10)
axes[0, 1].hist(im_height_list, bins=10)
axes[0, 0].set_title('image width')
axes[0, 1].set_title('image height')


axes[1, 0].hist(lice_width_list, bins=10)
axes[1, 1].hist(lice_height_list, bins=10)
axes[1, 0].set_title('lice width')
axes[1, 1].set_title('lice height')

axes[2, 1].hist(lice_left_list, bins=10)
axes[2, 0].hist(lice_top_list, bins=10)
axes[2, 0].set_title('lice left')
axes[2, 1].set_title('lice top')

       
figure.tight_layout()

In [None]:
import statistics


print(statistics.median(im_width_list))
print(statistics.median(im_height_list))

print(statistics.median(lice_width_list))
print(statistics.median(lice_height_list))

print(max(lice_width_list))

In [None]:

figure, axes = plt.subplots(nrows=3, ncols=2, figsize=(20, 20))
for idx, sf_haslice in tqdm(df_ad.iterrows()):

    left_image_f, bucket, left_image_key = s3_access_utils.download_from_url(sf_haslice["left_crop_url"])
    image_f = left_image_f 

    #fig, ax = plt.subplots(figsize=(10, 10)) 
    image = plt.imread(image_f)
    
    image_w = sf_haslice['left_crop_metadata']['width']
    image_h = sf_haslice['left_crop_metadata']['height']
    
    lp = sf_haslice['annotation'][0]['position'] # only take the first female adult
    x, y, w, h = lp["left"], lp["top"], lp["width"], lp["height"]
    
    crop_left_offset = randint(max(0, x + CROP_WIDTH - image_w), min(x, CROP_WIDTH - w))
    crop_top_offset = randint(max(0, y + CROP_HEIGHT- image_h), min(y, CROP_HEIGHT - h))
    
    crop_left = x - crop_left_offset
    crop_top = y - crop_top_offset
    

    
    cropped_image = image[crop_top:(crop_top + CROP_HEIGHT), crop_left:(crop_left + CROP_WIDTH)]
    #ax.imshow( cropped_image)
    
    start, end = "left_frame_crop_", ".jpg"
    s = sf_haslice["left_crop_url"]
    file_name = "left_" + s[s.find(start)+ len(start):s.find(end)]
    
    utils.write_image(cropped_image, file_name, IMAGE_PATH)
    
    #rect = patches.Rectangle((crop_left_offset, crop_top_offset), w, h,linewidth=1,edgecolor="b",facecolor='none')
    
    
    
    xywh = [crop_left_offset, crop_top_offset, w, h]
    bbox = utils.xywh2yolobbox(xywh, [CROP_WIDTH, CROP_HEIGHT])

            
    utils.write_label(0, bbox, file_name, LABEL_PATH)
    
    
    #plt.scatter(bbox[0] * CROP_WIDTH, bbox[1] * CROP_HEIGHT, s=50)
    #ax.add_patch(rect)
plt.show()

In [None]:
seed(SEED)
for idx, sf_haslice in tqdm(df_ad.iterrows()):

    left_image_f, bucket, left_image_key = s3_access_utils.download_from_url(sf_haslice["left_crop_url"])
    image_f = left_image_f 

    #fig, ax = plt.subplots(figsize=(10, 10)) 
    image = plt.imread(image_f)
    
    image_w = sf_haslice['left_crop_metadata']['width']
    image_h = sf_haslice['left_crop_metadata']['height']
    
    lp = sf_haslice['annotation'][0]['position'] # only take the first female adult
    x, y, w, h = lp["left"], lp["top"], lp["width"], lp["height"]
    
    crop_left_offset = randint(max(0, x + CROP_WIDTH - image_w), min(x, CROP_WIDTH - w))
    crop_top_offset = randint(max(0, y + CROP_HEIGHT- image_h), min(y, CROP_HEIGHT - h))
    
    crop_left = x - crop_left_offset
    crop_top = y - crop_top_offset
    

    
    cropped_image = image[crop_top:(crop_top + CROP_HEIGHT), crop_left:(crop_left + CROP_WIDTH)]
    #ax.imshow( cropped_image)
    
    start, end = "left_frame_crop_", ".jpg"
    s = sf_haslice["left_crop_url"]
    file_name = "left_" + s[s.find(start)+ len(start):s.find(end)]
    
    utils.write_image(cropped_image, file_name, IMAGE_PATH)
    
    #rect = patches.Rectangle((crop_left_offset, crop_top_offset), w, h,linewidth=1,edgecolor="b",facecolor='none')
    
    
    
    xywh = [crop_left_offset, crop_top_offset, w, h]
    bbox = utils.xywh2yolobbox(xywh, [CROP_WIDTH, CROP_HEIGHT])

            
    utils.write_label(0, bbox, file_name, LABEL_PATH)
    
    
    #plt.scatter(bbox[0] * CROP_WIDTH, bbox[1] * CROP_HEIGHT, s=50)
    #ax.add_patch(rect)
plt.show()

In [None]:
import importlib
importlib.reload(data_prep)
