In [None]:
from PIL import Image
import pandas as pd

from nircoloring.config import get_dataset_temp_image_file, DATASET_TEMP_IMAGES
from nircoloring.dataset.caltech import fetch_file_from_blob, load_metadata, load_filenames

In [None]:
filename = "5a2c8488-23d2-11e8-a6a3-ec086b02610b.jpg"
await fetch_file_from_blob(filename, DATASET_TEMP_IMAGES, in_place_transformation=None)
path = get_dataset_temp_image_file(filename)
img = Image.open(path)

print(img.height)
print(img.width)

img = img.crop((0, 30, img.width, img.height - 90))

crop_size = min(img.width, img.height)
print(crop_size)

left = int((img.width - crop_size) / 2)
top = int((img.height - crop_size) / 2)
right = int((img.width + crop_size) / 2)
bottom = int((img.height + crop_size) / 2)

img = img.crop((left, top, right, bottom))

img.resize((1024, 1024), Image.LANCZOS)

In [None]:
metadata = load_metadata()

df = pd.DataFrame(data=metadata["images"])
df[~((df["width"] == 800) & (df["height"] == 584))]

In [None]:
df.groupby(["location"]).count()

In [None]:
df.groupby(["seq_id"]).count().sort_values(by=["seq_num_frames"], ascending=False)

In [None]:
filenames = set(load_filenames())
df_ds = pd.DataFrame(data=filter(lambda x: x["file_name"].strip() in filenames, metadata["images"]))
df_ds

In [None]:
df_ds.groupby(["location"]).size()

In [None]:
for file_name in df_ds[df_ds.location == "96"]["file_name"][1:6]:
    await fetch_file_from_blob(file_name, DATASET_TEMP_IMAGES)
    image = Image.open(get_dataset_temp_image_file(file_name))
    display(image)

In [None]:
categories = pd.DataFrame(data=metadata["categories"])
categories = categories.set_index("id")

In [None]:
annotations = pd.DataFrame(data=metadata["annotations"])
annotations.merge(categories["name"], how="left", left_on='category_id', right_index=True)

In [None]:
annotations["has_animal"] = annotations["category_id"] != 30
annotations

In [None]:
animal_occurrences = annotations.groupby("image_id")["has_animal"].any()
animal_occurrences

In [None]:
df = df.merge(animal_occurrences, how="left", left_on="id", right_on="image_id")
df

In [None]:
df = df[df["has_animal"]]
df

In [None]:
df.groupby("has_animal").size()

In [None]:
location_occurrences = df.groupby(["location"]).size()
weights = 1 / location_occurrences.rename("weight")
df = df.merge(weights, how="left", on="location")
df.head()

In [None]:
df.sample(50000, weights="weight", replace=True).groupby("location").size().plot.hist(bins=15)

In [None]:
df.sample(50000, weights="weight", replace=False).groupby("location").size().plot.hist(bins=15)

In [None]:
df.groupby("location").size()

In [None]:
await fetch_file_from_blob("598f7588-23d2-11e8-a6a3-ec086b02610b.jpg", DATASET_TEMP_IMAGES)
image = Image.open(get_dataset_temp_image_file("598f7588-23d2-11e8-a6a3-ec086b02610b.jpg"))
display(image)