# SUMMARY 

## approx 0 - 8 lice annotations per fish crop

## sorted lice category are
['ADULT_FEMALE', 'MOVING', 'SCOTTISH_ADULT_FEMALE', 'UNSURE']

In [None]:
#!pip install -r requirements.txt

In [None]:
from research.utils.data_access_utils import RDSAccessUtils, S3AccessUtils
import json
import os
import  pandas as pd
import matplotlib.patches as patches
from tqdm import tqdm

import matplotlib.pyplot as plt
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)
%matplotlib inline
from utils import utils, data_prep


In [None]:
import cv2
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from random import randint, seed

In [None]:
import importlib
importlib.reload(utils)
importlib.reload(data_prep)

In [None]:
SEED = 33
CROP_WIDTH = 512
CROP_HEIGHT = 512

LICE_BBOX_COLOR = ['b', 'r'] # bbox edge color
LICE_CATEGORY = ['ADULT_FEMALE', 'MOVING']
#LABEL_PATH = 'data/moving_lice_crop_labels'
#IMAGE_PATH = 'data/moving_lice_crop_images'

# load annotation data

In [None]:
# docker-compose up 
#credentials = json.load(open(os.environ["DATA_WAREHOUSE_SQL_CREDENTIALS"]))

# docker run
credentials = json.load(open("/root/jane/cv_research/jane/deploy/data_warehouse_sql_credentials.json"))

rds_access_utils = RDSAccessUtils(credentials)

get_annotation_data = """
    SELECT 
        group_id,
        left_crop_metadata,
        left_crop_url,
        annotation 
    FROM 
        prod.crop_annotation 
    WHERE 
        (captured_at BETWEEN '2020-01-01' AND '2020-02-01') AND 
        (group_id IN ('56', '65', '37')) AND
        (annotation_state_id IN (7)) AND
        (service_id = 1);
"""
annotation_data = rds_access_utils.extract_from_database(get_annotation_data)

In [None]:
annotation_data.shape

In [None]:
# num of fish that has lice
annotation_data[annotation_data.annotation.notnull()].shape

In [None]:
annotation_data.head(3)

In [None]:
annotation_data.iloc[1]['annotation']

# Histogram of lice count in each fish crop

In [None]:
lice_count = []
for _, sf in tqdm(annotation_data.iterrows()):
    if sf['annotation']:
        lice_count.append(len(sf['annotation']))
    else:
        lice_count.append(0)
lice_count = np.array(lice_count)
_ = plt.hist(lice_count, bins = 25)  # arguments are passed to np.histogram
plt.title("Histogram of lice count per fish image")
plt.xlabel('lice count')
plt.show()
print("avg of lice/fish for fish with lice: {}".format(np.mean(lice_count[(lice_count > 0) ])))
print("avg of lice/fish for all fish: {}".format(np.mean(lice_count)))
print("max lice count per image: {}".format(np.max(lice_count)))

In [None]:
#np.mean(lice_count[(lice_count > 0) & (lice_count <= 10)])
np.mean(lice_count[(lice_count > 0) ])

# get lice df

In [None]:
df = pd.DataFrame()
for _, sf in tqdm(annotation_data.iterrows()):
    if sf['annotation']:
        for lice in sf['annotation']:
            if lice['category'] not in ['ADULT_FEMALE', 'MOVING']:
                continue
            df = df.append({"category": lice['category'],
                            "fish_image_url": sf['left_crop_url'],
                            "location": lice['location'],
                            "left": lice['position']['left'],
                            "top": lice['position']['top'],
                            "width": lice['position']['width'], 
                            "height": lice['position']['height'],
                            "fish_image_width": sf['left_crop_metadata']['width'],
                            "fish_image_height": sf['left_crop_metadata']['height'],
                           }, ignore_index=True)   

In [None]:
df.shape

In [None]:
df.head(3)

In [None]:
categories = df.groupby(['category'])
categories.describe()[{"height"}]

 # bar chart of lice by category, location or both

In [None]:
df.groupby('category').location.value_counts().unstack(0).plot(kind='barh', 
                                                               legend=True, 
                                                               color=LICE_BBOX_COLOR, 
                                                               title = "Bar Chart of Lice by Location")




In [None]:
# bbox dimension plot

In [None]:
# groups = df.groupby(['category', 'location'])

# for name, group in groups:
#     #class_index = LICE_CATEGORY.index(name[0])
#     #ec = LICE_BBOX_COLOR[class_index] 
    
#     plt.plot(group.left/group.fish_image_width,
#              group.top/group.fish_image_height, 
#              marker=".", linestyle="", label=name)
    
#     plt.legend()
#     plt.axvline(x=0.5)

#     plt.axis('square')
#     plt.xlabel('width')
#     plt.ylabel('height')
#     plt.title("heat map of {}".format(name))
#     plt.show()

In [None]:
figure, axes = plt.subplots(nrows=1, ncols=3, figsize=(24, 10))
figure.suptitle('Rescaled Location of Lice', fontsize=26)

blue_patch = patches.Patch(facecolor='none', edgecolor='b', label='ADULT_FEMALE')
red_patch = patches.Patch(facecolor='none', edgecolor='r', label='MOVING')

figure.legend(loc='upper left', bbox_to_anchor=(0.34,0.73), handles=[blue_patch,red_patch], fontsize='16')

for i, lice in tqdm( df.iterrows()):
    #if i > 200: break
    class_index = LICE_CATEGORY.index(lice.category)
    ec = LICE_BBOX_COLOR[class_index]    

    x, y = (lice.left/ lice.fish_image_width,
            1 - lice.top/ lice.fish_image_height)
    w, h = lice.width / lice.fish_image_width, lice.height/ lice.fish_image_height
    rect = patches.Rectangle((x, y), w, h,linewidth=1,edgecolor=ec,facecolor='none', label = lice.category)

    col_index = ["TOP", "MIDDLE", "BOTTOM"].index(lice['location'])

    axes[col_index].add_patch(rect)  
    
    axes[col_index].set(aspect='equal')
    
axes[0].set(title = "TOP")
axes[1].set(title = "MIDDLE")
axes[2].set(title = "BOTTOM")




fig.tight_layout()
fig.subplots_adjust(top=1.5)

plt.show()

# Dimension of bbox

In [None]:
categories = df.groupby('category')
import statistics


for name, group in categories:
    class_index = LICE_CATEGORY.index(name)
    ec = LICE_BBOX_COLOR[class_index]
    print("{}  median width: {} height{}".format(name, statistics.median(group.width), statistics.median(group.height)))
    plt.plot(group.width, group.height, color =ec, marker=".", linestyle="", label=name)

plt.legend()
plt.axis('square')
plt.xlabel('width (pixel)')
plt.ylabel('height (pixel)')
plt.title("Dimension of Lice Bbox")
plt.show()

categories.describe(percentiles=[.5])[{'width','height'}]

In [None]:

categories.describe(percentiles=[.5])[{'width','height'}]

In [None]:
df[(df.category == "MOVING") & (df.height > 25) & (df.width > 70)]

In [None]:
#s3_access_utils = S3AccessUtils('/root/data')

import boto3
from urllib.parse import urlparse


aws_credentials = json.load(open('/root/jane/cv_research/jane/deploy/aws_credentials.json'))
s3_client = boto3.client('s3', aws_access_key_id=aws_credentials["aws_access_key_id"],
aws_secret_access_key=aws_credentials["aws_secret_access_key"],
            region_name="eu-west-1")

def recursive_mkdir(dirname):
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    return
    
def download_from_s3(bucket, key, custom_location=None):
    if custom_location:
        recursive_mkdir(os.path.dirname(custom_location))
        s3_client.download_file(bucket, key, custom_location)
    else:
        s3_base_dir = os.path.join('/root/data', 's3')
        f = os.path.join(s3_base_dir, bucket, key)
        if not os.path.exists(f):
            recursive_mkdir(os.path.dirname(f))
            s3_client.download_file(bucket, key, f)
        return f
    
def download_from_url(url):
    parsed_url = urlparse(url, allow_fragments=False)
    if parsed_url.netloc.startswith('s3'):
        url_components = parsed_url.path.lstrip('/').split('/')
        bucket, key = url_components[0], os.path.join(*url_components[1:])
    else:
        bucket = parsed_url.netloc.split('.')[0]
        key = parsed_url.path.lstrip('/')
    image_f = download_from_s3(bucket, key)
    return image_f, bucket, key

In [None]:
# low-res
#fig, ax = plt.subplots(figsize=(12, 12)) 
#image = plt.imread(image_f)
#img = Image.open(image_f)
#img = img.resize((200,200)).resize((image.shape[1], image.shape[0]))

In [None]:
# for confluence  documentation 
num_pic = -1
nrows = 30
figure, axes = plt.subplots(nrows=nrows, ncols=2, figsize=(20, nrows * 6))

for idx, sf in tqdm(annotation_data.iterrows()):

    if not sf['annotation']:
        continue 
    has_large_lice = 0
    for lice in sf['annotation']:
        l_location = lice['location']
        if l_location == "BOTTOM":
            has_large_lice += 1
#         x = lp['left']
#         w, h = lp["width"], lp["height"]
#         x_scaled = x / sf["left_crop_metadata"]["width"]
#         if x_scaled < 0.6 and x_scaled > 0.4: 
#             has_large_lice = x_scaled
    if has_large_lice == 0:
        continue
    num_pic += 1
    if num_pic >= nrows * 2:
        break
    left_image_f, bucket, left_image_key = download_from_url(sf["left_crop_url"])
    image_f = left_image_f 

    #image = plt.imread(image_f)

    img = Image.open(image_f)

    #img = img.resize((512,512)).resize((image.shape[1], image.shape[0]))


    alpha = 3 # Contrast control (1.0-3.0)
    beta = 20 # Brightness control (0-100)

    img = np.asarray(img)
    adjusted = cv2.convertScaleAbs(img, alpha=alpha, beta=beta)
    axes[num_pic // 2, num_pic % 2].imshow(adjusted)

    title = ""
    for lice in sf['annotation']:
        lp = lice['position'] 
        x, y, w, h = lp["left"], lp["top"], lp["width"], lp["height"]
        class_index = LICE_CATEGORY.index(lice['category'])
        ec = LICE_BBOX_COLOR[class_index]
        rect = patches.Rectangle((x, y), w, h,linewidth=3,edgecolor=ec,facecolor='none')    
        #axes[num_pic // 2, num_pic % 2].add_patch(rect)
        if lice['location']=="BOTTOM": 
            axes[num_pic // 2, num_pic % 2].add_patch(rect)
            title += "{} {} {} {}".format(lice['location'], lice['category'], str(x), str(y))
#     axes[num_pic // 2, num_pic % 2].set_title(str(has_large_lice))

figure.tight_layout()

# Pipeline

1. Crop region detector: Full fish image, low resolution

   -> Bbox of Crop that covers the fish part

    Network: Yolo

2. Detector on crop, high resolution

   -> Bbox of lice
   
   Network:SSD (mask rcnn too slow)
   
3. Map to fish image

# prepare training output image of step1

1. bbox/RoI/Receptive field

2. fixed sized- 512 * 512 or different scale 128, 256, 300 etc

3. force to cover the fish by lice location

4. just 1 crop/ multiple

5. new generation of camera

# lice crop

## visualize random lice crop

In [None]:
# for confluence documentation 
seed(SEED)
num_pic = -1

nrows = 2
figure, axes = plt.subplots(nrows=nrows, ncols=2, figsize=(20, nrows * 6))

for idx, sf in tqdm(annotation_data.iterrows()):
    if not sf['annotation']:
        continue
    has_large_lice = False
    for lice in sf['annotation']:
        lp = lice['position'] 
        x, y, w, h = lp["left"], lp["top"], lp["width"], lp["height"]
        if lice['category'] == 'MOVING' and h > 25 and w > 70:
            has_large_lice = True
    if not has_large_lice:
        continue
    print(sf['annotation'])

    left_image_f, bucket, left_image_key = download_from_url(sf["left_crop_url"])
    image_f = left_image_f 

    iw = sf['left_crop_metadata']['width']
    ih = sf['left_crop_metadata']['height']
    crops = data_prep.generate_crops_smart(sf['annotation'], [iw, ih], [CROP_WIDTH, CROP_HEIGHT])

    for crop in crops:
        has_large_lice = False
        for lice in crops[crop]:
            lp = lice['position'] 
            x, y, w, h = lp["left"], lp["top"], lp["width"], lp["height"]
            if lice['category'] == 'MOVING' and h > 25 and w > 70:
                has_large_lice = True
        if not has_large_lice:
            continue
        
        num_pic += 1
        image = plt.imread(image_f)
        
        # crop lice image
        crop_left, crop_top = crop
        print("crop {} {}".format(crop_left, crop_top))
        cropped_image = image[crop_top:(crop_top + CROP_HEIGHT), crop_left:(crop_left + CROP_WIDTH)]
        
        # adjust image contrast and brightness
        alpha = 3 # Contrast control (1.0-3.0)
        beta = 20 # Brightness control (0-100)
        adjusted = cv2.convertScaleAbs(cropped_image, alpha=alpha, beta=beta)
        
        axes[num_pic // 2, num_pic % 2].imshow(adjusted)


        title = "large MOVING: "
        for lice in crops[crop]:
            lp = lice['position'] # only take the first female adult
            x, y, w, h = lp["left"], lp["top"], lp["width"], lp["height"]
            
            class_index = LICE_CATEGORY.index(lice['category'])
            ec = LICE_BBOX_COLOR[class_index]
            crop_left_offset = x - crop_left
            crop_top_offset = y - crop_top
            if w > 70:
                rect = patches.Rectangle((crop_left_offset, crop_top_offset), w, h,linewidth=3,edgecolor=ec,facecolor='none')    
                title += "{}, {}".format(w, h)
            else:
                rect = patches.Rectangle((crop_left_offset, crop_top_offset), w, h,linewidth=1,edgecolor=ec,facecolor='none')    
            axes[num_pic // 2, num_pic % 2].add_patch(rect) 
    
        axes[num_pic // 2, num_pic % 2].set_title(title)

figure.tight_layout()


In [None]:
seed(SEED)
CROP_WIDTH = 256
CROP_HEIGHT = 256
for idx, sf in tqdm(annotation_data.iterrows()):
    if not sf['annotation'] or idx < 20:
        continue
    if idx > 50:
        break
    left_image_f, bucket, left_image_key = download_from_url(sf["left_crop_url"])
    image_f = left_image_f 

    iw = sf['left_crop_metadata']['width']
    ih = sf['left_crop_metadata']['height']
    print(sf['annotation'])
    crops = data_prep.generate_crops(sf['annotation'], [iw, ih], [CROP_WIDTH, CROP_HEIGHT])

    print("num of crops {}".format(len(crops)))
    for crop in crops:
        
        fig, ax = plt.subplots(figsize=(10, 10))
        image = plt.imread(image_f)
        
        # crop lice image
        crop_left, crop_top = crop
        print("crop {} {}".format(crop_left, crop_top))
        cropped_image = image[crop_top:(crop_top + CROP_HEIGHT), crop_left:(crop_left + CROP_WIDTH)]
        
        # adjust image contrast and brightness
        alpha = 3 # Contrast control (1.0-3.0)
        beta = 20 # Brightness control (0-100)
        adjusted = cv2.convertScaleAbs(cropped_image, alpha=alpha, beta=beta)
        
        ax.imshow(adjusted)

    
        for lice in crops[crop]:
            lp = lice['position'] # only take the first female adult
            x, y, w, h = lp["left"], lp["top"], lp["width"], lp["height"]
            
            class_index = LICE_CATEGORY.index(lice['category'])
            ec = LICE_BBOX_COLOR[class_index]
            crop_left_offset = x - crop_left
            crop_top_offset = y - crop_top
            rect = patches.Rectangle((crop_left_offset, crop_top_offset), w, h,linewidth=1,edgecolor=ec,facecolor='none')    
            ax.add_patch(rect)    

        plt.show()

## generate and write data for model

In [None]:
seed(SEED)
for idx, sf in tqdm(annotation_data.iterrows()):
    if not sf['annotation']:
        continue

    left_image_f, bucket, left_image_key = download_from_url(sf["left_crop_url"])
    image_f = left_image_f 

    iw = sf['left_crop_metadata']['width']
    ih = sf['left_crop_metadata']['height']
    
    # randomly generate crops around lice
    crops = data_prep.generate_crops(sf['annotation'], [iw, ih], [CROP_WIDTH, CROP_HEIGHT ])

    print("num of crops {}, out of {} lice".format(len(crops), len(sf['annotation'])))
    i = -1
    for crop in crops:
        i += 1
        image = plt.imread(image_f)
        
        # crop lice image
        crop_left, crop_top = crop
        cropped_image = image[crop_top:(crop_top + CROP_HEIGHT), crop_left:(crop_left + CROP_WIDTH)]
        
        # get file name
        start, end = "left_frame_crop_", ".jpg"
        s = sf["left_crop_url"]
        file_name = "left_" + s[s.find(start)+ len(start):s.find(end)]
        file_name += "_CROP_{}_{}".format(i, len(crops))    
        
        # save image
        data_prep.write_image(cropped_image, file_name, IMAGE_PATH)
        
        labels = []
        
        # save labels
        for lice in crops[crop]:
            lp = lice['position'] # only take the first female adult
            x, y, w, h = lp["left"], lp["top"], lp["width"], lp["height"]
            class_index = LICE_CATEGORY.index(lice['category'])
            
            crop_left_offset = x - crop_left
            crop_top_offset = y - crop_top            
            
            xywh = [crop_left_offset, crop_top_offset, w, h]
            bbox = utils.xywh2yolobbox(xywh, [CROP_WIDTH, CROP_HEIGHT])
            
            labels.append([class_index] + bbox)

            
        data_prep.write_labels(labels, file_name, LABEL_PATH)


## ONLY MOVING LICE

In [None]:
seed(SEED)
for idx, sf in tqdm(annotation_data.iterrows()):
    if not sf['annotation']:
        continue
    if idx > 200:
        break
    left_image_f, bucket, left_image_key = download_from_url(sf["left_crop_url"])
    image_f = left_image_f 

    iw = sf['left_crop_metadata']['width']
    ih = sf['left_crop_metadata']['height']
    
    # randomly generate crops around lice
    crops = data_prep.generate_crops(sf['annotation'], [iw, ih], [CROP_WIDTH, CROP_HEIGHT], ['MOVING'])

    print("num of crops {}, out of {} lice".format(len(crops), len(sf['annotation'])))
    i = -1
    for crop in crops:
        i += 1
        image = plt.imread(image_f)
        
        # crop lice image
        crop_left, crop_top = crop
        # print("crop {} {}".format(crop_left, crop_top))
        cropped_image = image[crop_top:(crop_top + CROP_HEIGHT), crop_left:(crop_left + CROP_WIDTH)]
        
        # get file name
        start, end = "left_frame_crop_", ".jpg"
        s = sf["left_crop_url"]
        file_name = "left_" + s[s.find(start)+ len(start):s.find(end)]
        file_name += "_CROP_{}_{}".format(i, len(crops))    
        
        # save image
        data_prep.write_image(cropped_image, file_name, IMAGE_PATH)
        
        labels = []
        
        # save labels
        for lice in crops[crop]:
            lp = lice['position'] # only take the first female adult
            x, y, w, h = lp["left"], lp["top"], lp["width"], lp["height"]
            print(lice['category'])
            class_index = 0
            
            crop_left_offset = x - crop_left
            crop_top_offset = y - crop_top            
            
            xywh = [crop_left_offset, crop_top_offset, w, h]
            bbox = utils.xywh2yolobbox(xywh, [CROP_WIDTH, CROP_HEIGHT])
            
            labels.append([class_index] + bbox)

            
        data_prep.write_labels(labels, file_name, LABEL_PATH)
