In [None]:
import os
import sys
from sqlalchemy import Column, ForeignKey, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy import MetaData
from sqlalchemy import Table, select, func, and_, insert, delete, update, or_

In [None]:
import boto3

In [None]:
import json
sql_credentials = json.load(open("/root/thomas/sqlcredentials.json"))

In [None]:
aws_credentials = json.load(open("/root/thomas/aws_credentials.json"))
s3_client = boto3.client('s3', aws_access_key_id=aws_credentials["aws_access_key_id"],
                         aws_secret_access_key=aws_credentials["aws_secret_access_key"],
                         region_name="eu-west-1")

In [None]:
sql_engine = create_engine(
    "postgresql://{}:{}@{}:{}/{}".format(sql_credentials["user"], sql_credentials["password"],
                                         sql_credentials["host"], sql_credentials["port"],
                                         sql_credentials["database"]))

Query images

In [None]:
# metadata = MetaData()
# # step 1 - download crops + json
# fish_crops = Table('lati_fish_detections', metadata, autoload=True, autoload_with=sql_engine)
# lice_crops = Table('lati_fish_detections_lice_annotations', metadata, autoload=True,
#                    autoload_with=sql_engine)

# # inner join on fish crop id
# query = select([fish_crops.c.image_key, 
#                 lice_crops.c.is_blurry,
#                 lice_crops.c.is_too_dark,
#                 lice_crops.c.is_obstructed,
#                 lice_crops.c.is_bad_crop,
#                 ]) \
#     .select_from(lice_crops.join(fish_crops, lice_crops.c.lati_fish_detections_id == fish_crops.c.id)) \
#     .where(and_(fish_crops.c.site_id == 23,
#                 lice_crops.c.is_skipped == True,
#                 ))

In [None]:
metadata = MetaData()
# step 1 - download crops + json
fish_crops = Table('lati_fish_detections', metadata, autoload=True, autoload_with=sql_engine)
lice_crops = Table('lati_fish_detections_lice_annotations', metadata, autoload=True,
                   autoload_with=sql_engine)

# inner join on fish crop id
query = select([fish_crops.c.image_key, 
                lice_crops.c.is_blurry,
                lice_crops.c.is_too_dark,
                lice_crops.c.is_obstructed,
                lice_crops.c.is_bad_crop,
                ]) \
    .select_from(lice_crops.join(fish_crops, lice_crops.c.lati_fish_detections_id == fish_crops.c.id)) \
    .where(and_(fish_crops.c.site_id == 23,
                lice_crops.c.is_skipped == True,
                ))

In [None]:
connection = sql_engine.connect()
q = connection.execute(query)

In [None]:
results = [{"key": r[0], 
            "blurry": r[1], 
            "dark": r[2], 
            "obstructed": r[3], 
            "bad": r[4], 
            "year": r[0].split("/")[3].split("-")[0]} for r in q]
print(len(results))

Crop dictionnary stuff
(for each crop, write down the associated classes)

In [None]:
# import os
# key_results = {}
# for r in results:
#     key_results[os.path.basename(r["key"])] = [r["blurry"], r["dark"], r["obstructed"], r["bad"]]
    
# import json
# with open("/root/data/priority_queue/images/image_classes.json", "w") as f:
#     json.dump(key_results, f)

Frame dictionnary stuff
(for each frame, )

In [None]:
# import os
# import json
# import numpy as np

# frame_classes = {}
# for r in results:
#     typ, farm, penid, date, crop_name = r["key"].split("/")
#     frame_name = "_".join(crop_name.split("_")[:4]) + ".jpg"
#     if frame_name not in frame_classes:
#         frame_classes[frame_name] = np.zeros((4))
#     if r["blurry"]:
#         frame_classes[frame_name][0] += 1
#     if r["dark"]:
#         frame_classes[frame_name][1] += 1
# #     if r["obstructed"]:
# #         frame_classes[frame_name][2] += 1
# #     if r["bad"]:
# #         frame_classes[frame_name][3] += 1
# no_consensus = 0
# for (k,v) in frame_classes.items():
#     if v[0] > 1 and v[0] != v[1]:
#         no_consensus += 1
        
# print(no_consensus / len(list(frame_classes.keys())))

# to_json = {}
# for (k,v) in frame_classes.items():
#     to_json[k] = [int(k) for k in (v[:2] > 0)]
    
# with open("/root/data/priority_queue/frames/image_classes.json", "w") as f:
#     json.dump(to_json, f)        

In [None]:
results = [r for r in results if r["year"] == "2019"]
print(len(results))

In [None]:
blurry = [r for r in results if r["blurry"]]
print("Number of blurry images: {}".format(len(blurry)))

In [None]:
dark = [r for r in results if r["dark"]]
print("Number of dark images: {}".format(len(dark)))

In [None]:
bad = [r for r in results if r["bad"]]
print("Number of bad images: {}".format(len(bad)))

In [None]:
obstructed = [r for r in results if r["obstructed"]]
print("Number of obstructed images: {}".format(len(obstructed)))

In [None]:
# let's download a few k images

In [None]:
import numpy as np

let's download ~5k images for each

In [None]:
from tqdm import tqdm

In [None]:
frame = True
if frame:
    bucket = "aquabyte-images-raw-resized"
    destination_folder = "frames-resized"

In [None]:
# T = np.random.choice(blurry)
# typ, farm, penid, date, crop_name = T["key"].split("/")
# frame_name = "_".join(crop_name.split("_")[:4]) + ".jpg"
# key = "/".join([typ, farm, penid, date, frame_name])
# print(key)

In [None]:
# # download blurry image
# blurry_subset = np.random.choice(blurry, 10000)
# for img in tqdm(blurry_subset):
#     key = img["key"]
#     if frame:
#         typ, farm, penid, date, crop_name = key.split("/")
#         frame_name = "_".join(crop_name.split("_")[:4]) + ".jpg"
#         key = "/".join([typ, farm, penid, date, frame_name])
#     destination = os.path.join("/root/data/priority_queue/{}/blurry/".format(destination_folder), 
#                                os.path.basename(key))
#     try:
#         s3_client.download_file(bucket, key, destination)
#     except:
#         continue

In [None]:
# # download dark image
# dark_subset = np.random.choice(dark, 10000)
# for img in tqdm(dark_subset):
#     key = img["key"]
#     if frame:
#         typ, farm, penid, date, crop_name = key.split("/")
#         frame_name = "_".join(crop_name.split("_")[:4]) + ".jpg"
#         key = "/".join([typ, farm, penid, date, frame_name])
#     destination = os.path.join("/root/data/priority_queue/{}/dark/".format(destination_folder), 
#                                os.path.basename(key))
#     try:
#         s3_client.download_file(bucket, key, destination)
#     except:
#         continue

In [None]:
# download bad image
bad_subset = np.random.choice(bad, 10000)
for img in tqdm(bad_subset):
    key = img["key"]
    if frame:
        typ, farm, penid, date, crop_name = key.split("/")
        frame_name = "_".join(crop_name.split("_")[:4]) + ".jpg"
        key = "/".join([typ, farm, penid, date, frame_name])
    destination = os.path.join("/root/data/priority_queue/{}/bad/".format(destination_folder), 
                               os.path.basename(key))
    try:
        s3_client.download_file(bucket, key, destination)
    except:
        continue

In [None]:
# download obstructed image
obstructed_subset = np.random.choice(obstructed, 5000)
for img in tqdm(obstructed_subset):
    key = img["key"]
    destination = os.path.join("/root/data/priority_queue/{}/obstructed/".format(destination_folder),
                               os.path.basename(key))
    s3_client.download_file(bucket, key, destination)

In [None]:
from datetime import datetime

In [None]:
datetime.utcfromtimestamp(1545914338476/1000.0)

In [None]:
from PIL import Image
from time import time
import cv2

In [None]:
start = time()
image = Image.open("/root/data/priority_queue/frames/good/left_blom-kjeppevikholmen_2_1543143094624.jpg")
# image = image.resize((224, 224))
# image.save("/tmp/test.jpg")
end = time()
print(end-start)

In [None]:
start = time()
image = cv2.imread("/root/data/priority_queue/frames/good/left_blom-kjeppevikholmen_2_1543143094624.jpg")
# image = cv2.resize(image, (224, 224))
# cv2.imwrite("/tmp/test.jpg/", image)
end = time()
print(end-start)