In [1]:
from torchvision.transforms.v2.functional import to_pil_image

from lib import to_rgb
%matplotlib inline
%load_ext autoreload

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm

import torch
import torchvision
from torch.utils.data import Dataset
from torchvision import transforms
from torchvision.transforms import InterpolationMode
from torchvision.transforms import v2

import lib

## Load the data

In [2]:
train_features = pd.read_csv("data/train_features.csv", index_col="id")
test_features = pd.read_csv("data/test_features.csv", index_col="id")
train_labels = pd.read_csv("data/train_labels.csv", index_col="id")

In [3]:
train_features

Unnamed: 0_level_0,filepath,site
id,Unnamed: 1_level_1,Unnamed: 2_level_1
ZJ000000,train_features/ZJ000000.jpg,S0120
ZJ000001,train_features/ZJ000001.jpg,S0069
ZJ000002,train_features/ZJ000002.jpg,S0009
ZJ000003,train_features/ZJ000003.jpg,S0008
ZJ000004,train_features/ZJ000004.jpg,S0036
...,...,...
ZJ016483,train_features/ZJ016483.jpg,S0093
ZJ016484,train_features/ZJ016484.jpg,S0043
ZJ016485,train_features/ZJ016485.jpg,S0089
ZJ016486,train_features/ZJ016486.jpg,S0095


In [4]:
test_features

Unnamed: 0_level_0,filepath,site
id,Unnamed: 1_level_1,Unnamed: 2_level_1
ZJ016488,test_features/ZJ016488.jpg,S0082
ZJ016489,test_features/ZJ016489.jpg,S0040
ZJ016490,test_features/ZJ016490.jpg,S0040
ZJ016491,test_features/ZJ016491.jpg,S0041
ZJ016492,test_features/ZJ016492.jpg,S0040
...,...,...
ZJ020947,test_features/ZJ020947.jpg,S0086
ZJ020948,test_features/ZJ020948.jpg,S0152
ZJ020949,test_features/ZJ020949.jpg,S0162
ZJ020950,test_features/ZJ020950.jpg,S0055


In [87]:
train_labels

Unnamed: 0_level_0,antelope_duiker,bird,blank,civet_genet,hog,leopard,monkey_prosimian,rodent
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ZJ000000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
ZJ000001,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
ZJ000002,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
ZJ000003,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
ZJ000004,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...
ZJ016483,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
ZJ016484,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
ZJ016485,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
ZJ016486,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


### Make sure one-hot encoding is correct

In [6]:
assert train_labels[train_labels.sum(axis=1) != 1].shape[0] == 0

In [7]:
species_labels = sorted(train_labels.columns.unique())
species_labels

['antelope_duiker',
 'bird',
 'blank',
 'civet_genet',
 'hog',
 'leopard',
 'monkey_prosimian',
 'rodent']

## Explore the data

Now let's see what some of the actual images look like. The code below iterates through a list of species and selects a single random image from each species to display, along with its image ID and label. You can try changing the `random_state` variable to display a new set of images.

In [80]:
%autoreload 2
from torchvision.transforms.functional import to_pil_image
# %matplotlib notebook

import photo

random_state = 111

# we'll create a grid with 8 positions, one for each label (7 species, plus blanks)
fig, axes = plt.subplots(nrows=10, ncols=4, figsize=(15, 25))


lab = lib.LabCLAHE()

rows = train_features.sample(10, random_state=random_state)

for idx, ax in enumerate(axes):
    img = Image.open('data/' + rows.loc[ rows.index[idx], 'filepath' ]).convert('RGB')

    ax[0].imshow(img)
    ax[1].imshow(to_pil_image(lab(img)))
    ax[2].imshow(to_pil_image(lab(lab(img))))
    ax[3].imshow(to_pil_image(lab(lab(lab(img)))))

    # for ax_ in ax:
    #     ax_.imshow(img)
    #     ax_.set_title(f"{img_id} | {species}")

    # ax.imshow(to_pil_image(lab(lab(img))))
    # # ax.imshow(img)

plt.tight_layout()


<IPython.core.display.Javascript object>

In [86]:
%autoreload 2
from torchvision.transforms.functional import to_pil_image

import photo

random_state = 11

# we'll create a grid with 8 positions, one for each label (7 species, plus blanks)
fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(15, 12))

lab = lib.LabCLAHE()

# iterate through each species
for species, ax in zip(species_labels, axes.flat):
    # get an image ID for this species
    img_id = (
        train_labels[train_labels.loc[:,species] == 1]
        .sample(1, random_state=random_state)
        .index[0]
    )
    img = Image.open('data/' + train_features.loc[img_id].filepath)

    # res = photo.detect_day_night(img)
    # print(res)

    ax.imshow(to_pil_image(lab(lab(img))))
    # ax.imshow(img)
    ax.set_title(f"{img_id} | {species}")

  fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(15, 12))


<IPython.core.display.Javascript object>

### Distribution of species across the training set
Let's look at the distribution of species across the training set, first in terms of overall counts and then in percentage terms.

In [10]:
train_labels.sum().sort_values(ascending=False)

monkey_prosimian    2492.0
antelope_duiker     2474.0
civet_genet         2423.0
leopard             2254.0
blank               2213.0
rodent              2013.0
bird                1641.0
hog                  978.0
dtype: float64

In [11]:
train_labels.sum().divide(train_labels.shape[0]).sort_values(ascending=False)

monkey_prosimian    0.151140
antelope_duiker     0.150049
civet_genet         0.146955
leopard             0.136705
blank               0.134219
rodent              0.122089
bird                0.099527
hog                 0.059316
dtype: float64

In case you're curious, this distribution is not exactly what we find in the wild. The competition dataset has been curated a little bit to produce a more uniform distribution than we would see in the actual data.

There's a lot more data exploration to do. For example, you might also want to look at the distribution of image dimensions or camera trap sites. But since our primary goal here is to develop a benchmark, let's move on to the modeling!

In [12]:
train_features['resolution'] = train_features['filepath'].apply(lambda filename: lib.get_resolution('data/' + filename))

train_features['site_plus_resolution'] = train_features['site'] + '_' + train_features['resolution']

train_features

Unnamed: 0_level_0,filepath,site,resolution,site_plus_resolution
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ZJ000000,train_features/ZJ000000.jpg,S0120,960x540,S0120_960x540
ZJ000001,train_features/ZJ000001.jpg,S0069,960x540,S0069_960x540
ZJ000002,train_features/ZJ000002.jpg,S0009,640x360,S0009_640x360
ZJ000003,train_features/ZJ000003.jpg,S0008,640x360,S0008_640x360
ZJ000004,train_features/ZJ000004.jpg,S0036,640x335,S0036_640x335
...,...,...,...,...
ZJ016483,train_features/ZJ016483.jpg,S0093,960x540,S0093_960x540
ZJ016484,train_features/ZJ016484.jpg,S0043,640x360,S0043_640x360
ZJ016485,train_features/ZJ016485.jpg,S0089,640x360,S0089_640x360
ZJ016486,train_features/ZJ016486.jpg,S0095,960x540,S0095_960x540


In [13]:
resolutions = train_features.groupby(['resolution']).agg(count=('resolution', 'count')).sort_values(by='count', ascending=False)

resolutions

Unnamed: 0_level_0,count
resolution,Unnamed: 1_level_1
640x360,7490
960x540,6345
640x335,970
360x240,864
960x515,458
160x120,293
360x215,67
160x95,1


In [14]:
sites = train_features.groupby(['site']).agg(count=('site', 'count')).sort_values(by='count', ascending=False)

sites

Unnamed: 0_level_0,count
site,Unnamed: 1_level_1
S0060,1132
S0009,664
S0063,557
S0008,541
S0036,456
...,...
S0092,3
S0078,2
S0079,2
S0178,2


In [15]:
resolutions_by_sites = train_features.groupby(['site', 'resolution']).agg(count=('site', 'count'))

resolutions_by_sites# .drop(columns='count')

Unnamed: 0_level_0,Unnamed: 1_level_0,count
site,resolution,Unnamed: 2_level_1
S0001,640x360,85
S0002,640x360,171
S0003,640x335,3
S0003,640x360,55
S0004,640x360,109
...,...,...
S0192,640x360,6
S0193,360x240,38
S0196,360x240,15
S0197,640x360,17


In [16]:
# # train_features['site'] = train_features['site'].str.strip().str.upper()
# # test_features['site'] = test_features['site'].str.strip().str.upper()

sites_train = train_features[['site']].drop_duplicates()
sites_test = test_features[['site']].drop_duplicates()

# sites_test.merge(sites_train, left_on='site', right_on='site', how='left', indicator=True)
#

train_sites = set(train_features['site'])
test_sites  = set(test_features['site'])

print(len(train_sites & test_sites))          # сколько общих
test_only = sorted(list(test_sites - train_sites))  # примеры, что есть в test, но нет в train

print(len(test_only))



0
51


### Mark over-exposed photos as such

In [17]:
# def to_tensor(filename):
#     return lib.to_rgb(torchvision.io.read_image(filename))
#
# def is_overexposed(row):
#     tensor = lib.to_rgb(torchvision.io.read_image('data/' + row['filepath']))
#     return lib.is_overexposed_torchvision(tensor.to('cuda'))
#
# # train_features['tensor'] = train_features['filepath'].apply(lambda filename: to_tensor('data/' + filename))


In [18]:
# train_features['is_overexposed_full'] = train_features.apply(is_overexposed, axis=1)
# train_features['is_overexposed'] = train_features['is_overexposed_full'].str[0]
#
# train_features

In [19]:
# %autoreload 2
# # ZJ015089 = train_all.loc['ZJ015089']
# # ZJ015089 = train_all.loc['ZJ000007']
# # ZJ015089 = train_all.loc['ZJ004820']
# over_exposed_photo = train_all.loc['ZJ008741']
#
# tensor = torchvision.io.read_image('data/' + over_exposed_photo['filepath'])
# if tensor.shape[0] == 1:
#     tensor = tensor.repeat(3, 1, 1)
# dec, f = lib.is_overexposed_torchvision(tensor.to('cuda'))
#
# dec, f

In [20]:
# from torchvision.transforms.functional import to_pil_image
#
# to_pil_image(torchvision.io.read_image('data/' + over_exposed_photo['filepath']))

In [21]:
train_all = train_features.merge(train_labels, on='id')
train_all

Unnamed: 0_level_0,filepath,site,resolution,site_plus_resolution,antelope_duiker,bird,blank,civet_genet,hog,leopard,monkey_prosimian,rodent
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
ZJ000000,train_features/ZJ000000.jpg,S0120,960x540,S0120_960x540,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
ZJ000001,train_features/ZJ000001.jpg,S0069,960x540,S0069_960x540,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
ZJ000002,train_features/ZJ000002.jpg,S0009,640x360,S0009_640x360,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
ZJ000003,train_features/ZJ000003.jpg,S0008,640x360,S0008_640x360,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
ZJ000004,train_features/ZJ000004.jpg,S0036,640x335,S0036_640x335,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
ZJ016483,train_features/ZJ016483.jpg,S0093,960x540,S0093_960x540,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
ZJ016484,train_features/ZJ016484.jpg,S0043,640x360,S0043_640x360,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
ZJ016485,train_features/ZJ016485.jpg,S0089,640x360,S0089_640x360,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
ZJ016486,train_features/ZJ016486.jpg,S0095,960x540,S0095_960x540,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
train_all[train_all['blank'] == 1].groupby(['site', 'resolution']).agg(count=('filepath', 'count'))

Unnamed: 0_level_0,Unnamed: 1_level_0,count
site,resolution,Unnamed: 2_level_1
S0001,640x360,6
S0002,640x360,36
S0003,640x335,3
S0003,640x360,12
S0004,640x360,4
...,...,...
S0192,640x360,1
S0193,360x240,14
S0196,360x240,7
S0197,640x360,4


In [23]:
# %autoreload 2
# import lib
# from torchvision.transforms.functional import to_pil_image
#
# # blanks = train_all[(train_all['blank'] == 1) & (train_all['site'] == 'S0002') & (train_all['resolution'] == '640x360')]
# blanks = train_all[(train_all['resolution'] == '640x360')]
#
# blanks = blanks.sample(n=15, random_state=1)
#
# blanks = blanks[blanks['is_overexposed'] != True]
#
# blanks = blanks.copy()
#
# # blanks = train_all.loc[['ZJ008001', 'ZJ011856', 'ZJ015487']]
#
# blanks['tensor'] = blanks['filepath'].apply(lambda filename: to_tensor('data/' + filename))
#
# blanks_ = torch.stack(list(blanks['tensor']), dim=0).to(torch.float32) / 255.0
#
# bg_template = lib.background_template(blanks_)
#
# to_pil_image(bg_template)

In [24]:
# # rand = train_all[train_all['resolution'] == '160x120'].iloc[10]['tensor'].to(torch.float32)
# rand = blanks_[0]
# to_pil_image(rand)

In [25]:
# alpha, beta = lib.affine_params_to_background(rand, bg_template)
# x_aligned = (alpha[:, None, None] * rand + beta[:, None, None]).clamp(0, 1.0)
#
# to_pil_image(x_aligned)
#
# # alpha, beta

In [26]:
# from itertools import zip_longest
# import math
#
# # we'll create a grid with 8 positions, one for each label (7 species, plus blanks)
# fig, axes = plt.subplots(nrows=math.ceil(len(blanks) / 3), ncols=3, figsize=(15, 45))
#
# for img_tensor, ax in zip_longest(blanks_, axes.flatten()):
#     if img_tensor is None:
#         if ax is not None:
#             ax.remove()
#         continue
#     if ax is None:
#         break
#
#     alpha, beta = lib.affine_params_to_background(img_tensor, bg_template)
#     x_aligned = (alpha[:, None, None] * img_tensor + beta[:, None, None]).clamp(0, 1.0)
#
#     ax.imshow(to_pil_image(x_aligned))
#
#     m_val = (img_tensor - bg_template).abs().sum(dim=0) / 3
#
#     std = m_val.std(dim=(0, 1)).mean()
#
#     koef = torch.where(m_val > 4 * std, 1.0, 0).mean(dim=(0, 1))
#
#     ax.set_title(f"{koef}")

### Examine photos (put your condition)

In [27]:
# from itertools import zip_longest
#
# random_state = 411
#
# # rows = train_all[(train_all['blank'] == 1) & (train_all['site'] == 'S0159') & (train_all['resolution'] == '160x120')]
# # rows = train_all.loc[['ZJ008001', 'ZJ011856', 'ZJ015487']]
#
# rows = blanks
#
# # rows = train_features[train_features['is_overexposed'] == True]
#
# # rows = train_features[(train_features['is_overexposed_mu'] > 240)]
#
# # rows = rows.sample(min(axes.size, len(rows)), random_state=random_state)
#
# fig, axes = plt.subplots(nrows=math.ceil(len(rows) / 3), ncols=3, figsize=(15, 18 * 2))
#
#
# # iterate through each species
# print(f'Total rows: {len(rows)}')
#
# for img_tensor, ax in zip_longest(list(rows.iterrows()), axes.flatten()):
#     if img_tensor is None:
#         if ax is not None:
#             ax.remove()
#         continue
#     if ax is None:
#         break
#     img = Image.open('data/' + img_tensor[1]['filepath'])
#     ax.imshow(img)
#     ax.set_title(f"{img_tensor[1].name} {img_tensor[1]['resolution']}")

## Split into train and evaluation sets
First, we'll need to split the images into train and eval sets. We'll put aside 25% of the data for evaluation and stratify by the target labels to ensure we have similar relative frequencies of each class in the train and eval sets.

In [28]:
from sklearn.model_selection import train_test_split

y = train_labels
x = train_features.loc[y.index]

# # note that we are casting the species labels to an indicator/dummy matrix
# x_train, x_eval, y_train, y_eval = train_test_split(
#     x, y, stratify=y, test_size=0.20
# )

In [29]:
val_sites = ['S0060','S0063','S0043','S0038','S0120','S0014']

mask_val  = x['site'].isin(val_sites)

x_train = x[ ~mask_val ]
y_train = y[ ~mask_val ]

x_eval = x[ mask_val ]
y_eval = y[ mask_val ]



Here's what `x_train` and `y_train` look like now:

In [30]:
x_train

Unnamed: 0_level_0,filepath,site,resolution,site_plus_resolution
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ZJ000001,train_features/ZJ000001.jpg,S0069,960x540,S0069_960x540
ZJ000002,train_features/ZJ000002.jpg,S0009,640x360,S0009_640x360
ZJ000003,train_features/ZJ000003.jpg,S0008,640x360,S0008_640x360
ZJ000004,train_features/ZJ000004.jpg,S0036,640x335,S0036_640x335
ZJ000005,train_features/ZJ000005.jpg,S0019,960x540,S0019_960x540
...,...,...,...,...
ZJ016482,train_features/ZJ016482.jpg,S0146,640x360,S0146_640x360
ZJ016483,train_features/ZJ016483.jpg,S0093,960x540,S0093_960x540
ZJ016485,train_features/ZJ016485.jpg,S0089,640x360,S0089_640x360
ZJ016486,train_features/ZJ016486.jpg,S0095,960x540,S0095_960x540


In [31]:
y_train.head()

Unnamed: 0_level_0,antelope_duiker,bird,blank,civet_genet,hog,leopard,monkey_prosimian,rodent
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ZJ000001,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
ZJ000002,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
ZJ000003,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
ZJ000004,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
ZJ000005,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [32]:
x_train.shape, y_train.shape, x_eval.shape, y_eval.shape

((13190, 4), (13190, 8), (3298, 4), (3298, 8))

Next, let's validate that our split has resulted in roughly similar relative distributions of species across the train and eval sets (because of how we passed `stratify=y` above).

In [33]:
split_pcts = pd.DataFrame(
    {
        "train": y_train.idxmax(axis=1).value_counts(normalize=True),
        "eval": y_eval.idxmax(axis=1).value_counts(normalize=True),
    }
)
print("Species percentages by split")
(split_pcts.fillna(0) * 100).astype(int)

Species percentages by split


Unnamed: 0,train,eval
antelope_duiker,15,14
bird,11,4
blank,15,4
civet_genet,10,29
hog,5,6
leopard,11,21
monkey_prosimian,17,6
rodent,12,11
