<a href="https://colab.research.google.com/github/artem-istranin/gdensity/blob/master/fathomnet_2023_first_cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import numpy as np
import pandas as pd
import json
from PIL import Image
import requests
from io import BytesIO
from glob import glob
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer

import tensorflow as tf

In [19]:
import sys

if 'google.colab' in sys.modules:
    from google.colab import drive
    drive.mount('/content/drive')
    DATA_DIR = '/content/drive/MyDrive/FathomNet23/fathomnet-out-of-sample-detection/'
else:
    DATA_DIR = '/kaggle/input/fathomnet-out-of-sample-detection/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Training dataset

In [20]:
category_key = pd.read_csv(os.path.join(DATA_DIR, 'category_key.csv'))
category_key['supercat_label'] = category_key['supercat'].astype('category').cat.codes
supercat_label_per_category_id = {row['id']: row['supercat_label'] for _, row in category_key.iterrows()}

In [22]:
train_csv = pd.read_csv(os.path.join(DATA_DIR, 'multilabel_classification', 'train.csv'))
train_csv.categories = train_csv.categories.apply(lambda x: list(map(int, json.loads(x))))
train_csv['supercats'] = train_csv.categories.apply(
    lambda x: list(set([supercat_label_per_category_id[category_id] for category_id in x])))
train_csv.head()

Unnamed: 0,id,categories,supercats
0,3b6f01ae-5bde-434d-9b06-79b269421ed6,[1],[0]
1,dce21f7c-20e5-482b-bd90-c038f8464c03,[1],[0]
2,4a7f2199-772d-486d-b8e2-b651246316b5,[1],[0]
3,3bddedf6-4ff8-4e81-876a-564d2b03b364,"[1, 9, 11, 88]","[0, 13, 6]"
4,3f735021-f5de-4168-b139-74bf2859d12a,"[1, 37, 51, 119]","[0, 13, 6]"


In [24]:
with open(os.path.join(DATA_DIR, 'object_detection', 'train.json')) as f:
    train_json = json.load(f)
train_json_images = pd.json_normalize(train_json['images'])
train_json_images.drop(['license'], axis=1, inplace=True)
train_json_images.drop(['flickr_url'], axis=1, inplace=True)
train_json_images.drop(['id'], axis=1, inplace=True)
train_json_images['id'] = train_json_images.file_name.apply(lambda x: x.split('.png')[0])
train_json_images.drop(['file_name'], axis=1, inplace=True)

In [25]:
train_data = train_json_images.merge(right=train_csv, how='left', left_on='id', right_on='id')
train_data.head()

Unnamed: 0,width,height,coco_url,date_captured,id,categories,supercats
0,720,368,https://fathomnet.org/static/m3/framegrabs/Tib...,2007-08-17 17:50:34,3b6f01ae-5bde-434d-9b06-79b269421ed6,[1],[0]
1,720,368,https://fathomnet.org/static/m3/framegrabs/Tib...,2007-08-17 17:50:38,dce21f7c-20e5-482b-bd90-c038f8464c03,[1],[0]
2,720,368,https://fathomnet.org/static/m3/framegrabs/Tib...,2007-08-17 17:50:44,4a7f2199-772d-486d-b8e2-b651246316b5,[1],[0]
3,720,368,https://fathomnet.org/static/m3/framegrabs/Ven...,2007-08-28 17:54:14,3bddedf6-4ff8-4e81-876a-564d2b03b364,"[1, 9, 11, 88]","[0, 13, 6]"
4,720,369,https://fathomnet.org/static/m3/framegrabs/Ven...,2007-08-28 19:00:00,3f735021-f5de-4168-b139-74bf2859d12a,"[1, 37, 51, 119]","[0, 13, 6]"


# Evaluation data

In [27]:
with open(os.path.join(DATA_DIR, 'object_detection', 'eval.json')) as f:
    eval_json = json.load(f)

eval_data = pd.json_normalize(eval_json['images'])
eval_data.drop(['license'], axis=1, inplace=True)
eval_data.drop(['flickr_url'], axis=1, inplace=True)
eval_data.drop(['id'], axis=1, inplace=True)
eval_data['id'] = eval_data.file_name.apply(lambda x: x.split('.png')[0])
eval_data.drop(['file_name'], axis=1, inplace=True)
eval_data.head()

Unnamed: 0,width,height,coco_url,date_captured,id
0,1920,1079,https://fathomnet.org/static/m3/framegrabs/Min...,2019-09-24 12:11:05,4172b65a-8b4f-4175-94e4-00187e2d5f33
1,713,425,https://fathomnet.org/static/m3/framegrabs/Ven...,2007-08-15 18:44:36,9d263fd4-f786-42bf-8e0c-2d5be7c81b49
2,720,368,https://fathomnet.org/static/m3/framegrabs/Tib...,2007-09-08 14:41:18,c114ccb9-cf89-4d98-807b-f8636477c6ca
3,718,365,https://fathomnet.org/static/m3/framegrabs/Ven...,2007-11-07 18:52:01,95ba3acc-859b-4a99-8c30-b38a91a1be28
4,720,368,https://fathomnet.org/static/m3/framegrabs/Ven...,2007-12-06 20:00:00,0c4bdb90-c715-4c01-81d1-aa7f49534ac7


# Preprocessing

In [28]:
all_training_categories = set(np.concatenate(train_data.supercats))
train_supercats, val_supercats = train_test_split(list(all_training_categories), test_size=0.2, random_state=12441)
train_data, val_data = train_test_split(train_data, test_size=0.2, random_state=93465)

In [29]:
def preprocess(dataset: pd.DataFrame):
    dataset['osd'] = np.nan  # init with NaN values

    for index, row in dataset.iterrows():
        osd_label = 0.0
        supercats_filtered = []
        for supercat in row['supercats']:
            if supercat in train_supercats:
                supercats_filtered.append(supercat)
            else:
                osd_label = 1.0
        dataset.loc[index, 'osd'] = osd_label
        dataset.at[index, 'supercats'] = supercats_filtered

        categories_filtered = []
        for categorie in row['categories']:
            if supercat_label_per_category_id[categorie] in train_supercats:
                categories_filtered.append(categorie)
        dataset.at[index, 'categories'] = categories_filtered

    return dataset

In [30]:
train_data = preprocess(train_data)
train_data.head()

Unnamed: 0,width,height,coco_url,date_captured,id,categories,supercats,osd
2764,716,486,https://fathomnet.org/static/m3/framegrabs/Ven...,2008-04-01 16:23:39,bd5f4456-478d-4e7e-a89b-0016ba3d6276,[103],[13],0.0
1295,1920,1080,https://fathomnet.org/static/m3/framegrabs/Ven...,2019-11-22 19:05:11,c84b6444-325a-423a-849f-c3cc6c771815,[],[],1.0
5178,720,366,https://fathomnet.org/static/m3/framegrabs/Doc...,2013-12-12 00:47:14,59a78356-616f-4432-a75b-8a6a6a8891d6,[160],[18],0.0
4667,714,368,https://fathomnet.org/static/m3/framegrabs/Doc...,2011-09-23 23:07:35,c29b61d1-137d-4f45-9ecf-50db2efc9247,[160],[18],0.0
4892,714,368,https://fathomnet.org/static/m3/framegrabs/Doc...,2011-10-22 01:05:41,2b0b96cb-b41a-45e8-8ee7-bc3b1836ceb6,[160],[18],0.0


In [31]:
val_data = preprocess(val_data)
val_data.head()

Unnamed: 0,width,height,coco_url,date_captured,id,categories,supercats,osd
821,720,368,https://fathomnet.org/static/m3/framegrabs/Ven...,2009-10-09 18:18:03,444a07c4-a33d-4e8a-a7c8-41f96fbf25ad,[160],[18],1.0
409,1920,1080,https://fathomnet.org/static/m3/framegrabs/Ven...,2014-09-18 16:05:26,131c66a4-43cf-4d9a-830b-2b3d2b414396,[10],[0],0.0
477,1920,1080,https://fathomnet.org/static/m3/framegrabs/Ven...,2017-01-23 18:43:00,dbae89e4-f9c3-4c21-908e-daceb5dcf650,[10],[0],0.0
1908,1920,1080,https://fathomnet.org/static/m3/framegrabs/Ven...,2011-10-03 20:27:27,3cec5e67-9c18-46f2-88e9-240d2b522e3d,[],[],1.0
1390,712,368,https://fathomnet.org/static/m3/framegrabs/Ven...,2010-11-22 19:43:32,93478933-2106-4ab0-b68a-1858c5f69b06,[],[],1.0


In [32]:
categories_encoder = MultiLabelBinarizer().fit(train_data.categories)
encode_sample_categories = lambda x: categories_encoder.transform([x])[0]

supercats_encoder = MultiLabelBinarizer().fit(train_data.supercats)
encode_sample_supercats = lambda x: supercats_encoder.transform([x])[0]

In [33]:
train_data.categories = train_data.categories.apply(encode_sample_categories)
train_data.supercats = train_data.supercats.apply(encode_sample_supercats)
train_data.head()

Unnamed: 0,width,height,coco_url,date_captured,id,categories,supercats,osd
2764,716,486,https://fathomnet.org/static/m3/framegrabs/Ven...,2008-04-01 16:23:39,bd5f4456-478d-4e7e-a89b-0016ba3d6276,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]",0.0
1295,1920,1080,https://fathomnet.org/static/m3/framegrabs/Ven...,2019-11-22 19:05:11,c84b6444-325a-423a-849f-c3cc6c771815,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",1.0
5178,720,366,https://fathomnet.org/static/m3/framegrabs/Doc...,2013-12-12 00:47:14,59a78356-616f-4432-a75b-8a6a6a8891d6,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]",0.0
4667,714,368,https://fathomnet.org/static/m3/framegrabs/Doc...,2011-09-23 23:07:35,c29b61d1-137d-4f45-9ecf-50db2efc9247,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]",0.0
4892,714,368,https://fathomnet.org/static/m3/framegrabs/Doc...,2011-10-22 01:05:41,2b0b96cb-b41a-45e8-8ee7-bc3b1836ceb6,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]",0.0


In [34]:
val_data.categories = val_data.categories.apply(encode_sample_categories)
val_data.supercats = val_data.supercats.apply(encode_sample_supercats)
val_data.head()



Unnamed: 0,width,height,coco_url,date_captured,id,categories,supercats,osd
821,720,368,https://fathomnet.org/static/m3/framegrabs/Ven...,2009-10-09 18:18:03,444a07c4-a33d-4e8a-a7c8-41f96fbf25ad,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]",1.0
409,1920,1080,https://fathomnet.org/static/m3/framegrabs/Ven...,2014-09-18 16:05:26,131c66a4-43cf-4d9a-830b-2b3d2b414396,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.0
477,1920,1080,https://fathomnet.org/static/m3/framegrabs/Ven...,2017-01-23 18:43:00,dbae89e4-f9c3-4c21-908e-daceb5dcf650,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.0
1908,1920,1080,https://fathomnet.org/static/m3/framegrabs/Ven...,2011-10-03 20:27:27,3cec5e67-9c18-46f2-88e9-240d2b522e3d,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",1.0
1390,712,368,https://fathomnet.org/static/m3/framegrabs/Ven...,2010-11-22 19:43:32,93478933-2106-4ab0-b68a-1858c5f69b06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",1.0


# Make tfrecord files

* https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset
* https://www.tensorflow.org/api_docs/python/tf/data/Dataset#interleave

In [35]:
target_image_size = [224, 224]

CATEGORIES_SHAPE = None
SUPERCATS_SHAPE = None
OSD_SHAPE = None

data_buffer = []
num_samples_per_file = 10
num_tfrecords_saved = 0
for index, row in train_data.iterrows():
    image_url = row['coco_url']
    image = Image.open(BytesIO(requests.get(image_url).content)).resize(target_image_size)
    sample = dict(
        image=tf.image.encode_png(np.array(image, dtype='uint8')).numpy(),
        categories=np.array(row['categories'], dtype=np.float32),
        supercats=np.array(row['supercats'], dtype=np.float32),
        osd=np.array([row['osd']], dtype=np.float32),
    )
    data_buffer.append(sample)

    if CATEGORIES_SHAPE is None:
        CATEGORIES_SHAPE = sample['categories'].shape
    if SUPERCATS_SHAPE is None:
        SUPERCATS_SHAPE = sample['supercats'].shape
    if OSD_SHAPE is None:
        OSD_SHAPE = sample['osd'].shape

    if len(data_buffer) == num_samples_per_file:
        tfrecord_outpath = f'train_{num_tfrecords_saved:03d}.tfrecord'
        with tf.io.TFRecordWriter(tfrecord_outpath) as tfrecord_writer:
            for sample in data_buffer:
                record_bytes = tf.train.Example(features=tf.train.Features(feature=dict(
                    image=tf.train.Feature(bytes_list=tf.train.BytesList(value=[sample['image']])),
                    categories=tf.train.Feature(float_list=tf.train.FloatList(value=sample['categories'])),
                    supercats=tf.train.Feature(float_list=tf.train.FloatList(value=sample['supercats'])),
                    osd=tf.train.Feature(float_list=tf.train.FloatList(value=sample['osd'])),
                ))).SerializeToString()
                tfrecord_writer.write(record_bytes)

        num_tfrecords_saved += 1
        data_buffer = []  # reset data buffer

    # TODO: delete!!!!!!
    if num_tfrecords_saved == 2:
        break

In [36]:
# Read the data back out.
def decode_fn(record_bytes):
    return tf.io.parse_single_example(
        # Data
        record_bytes,

        # Schema
        dict(
            image=tf.io.FixedLenFeature([], dtype=tf.string),
            categories=tf.io.FixedLenFeature(CATEGORIES_SHAPE, dtype=tf.float32),
            supercats=tf.io.FixedLenFeature(SUPERCATS_SHAPE, dtype=tf.float32),
            osd=tf.io.FixedLenFeature(OSD_SHAPE, dtype=tf.float32),
        )
    )

In [41]:
train_dataset = tf.data.Dataset.list_files('train_*.tfrecord', shuffle=True, seed=28796)
train_dataset = train_dataset.interleave(lambda fn: tf.data.TFRecordDataset(fn).map(decode_fn))

In [44]:
for batch in train_dataset:
    print(batch.keys())
    print(batch['categories'])

dict_keys(['categories', 'image', 'osd', 'supercats'])
tf.Tensor(
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], shape=(89,), dtype=float32)
dict_keys(['categories', 'image', 'osd', 'supercats'])
tf.Tensor(
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], shape=(89,), dtype=float32)
dict_keys(['categories', 'image', 'osd', 'supercats'])
tf.Tensor(
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

# Submission

In [45]:
# identify animals in a target image
np.random.seed(283947)

def predict_random_categories():
    random_categories = np.random.choice(category_key.id, size=np.random.randint(1, 20), replace=False)
    output = ' '.join(map(str, random_categories))
    return output

eval_data['categories'] = [predict_random_categories() for _ in range(len(eval_data))]

# assess if the image is from a different distribution relative to the training data
np.random.seed(243)
eval_data['osd'] = [np.random.random() for _ in range(len(eval_data))]

random_preds_submission = {'id': eval_data.id, 'categories': eval_data.categories, 'osd': eval_data.osd}
submission = pd.DataFrame(random_preds_submission)
submission.head()

Unnamed: 0,id,categories,osd
0,4172b65a-8b4f-4175-94e4-00187e2d5f33,117 56 245 69 168 138 184 220 221 234 156,0.207635
1,9d263fd4-f786-42bf-8e0c-2d5be7c81b49,102 74 182 52,0.781992
2,c114ccb9-cf89-4d98-807b-f8636477c6ca,116 231,0.606899
3,95ba3acc-859b-4a99-8c30-b38a91a1be28,163 265 84 160 217 257 81 12 97 107,0.346029
4,0c4bdb90-c715-4c01-81d1-aa7f49534ac7,92 77 169 289 88 94 48,0.40428


In [46]:
submission.to_csv('submission.csv', index=False)