In [None]:
# default_exp leaderboard

# Leaderboard

> Current leaderboard using validation set. The final leaderboard will be generated at the end of the contest using test dataset and will probably be different due to overfitting.

In [None]:
#hide
in_test = True

In [None]:
#skip
# ignore this
in_test = False

In [None]:
#exporti

import pandas as pd
import numpy as np
import boto3
from pathlib import Path

In [None]:
#exporti

import zipfile
import shutil
import torch
import tempfile

from dolphins_recognition_challenge.datasets import get_dataset
from dolphins_recognition_challenge.instance_segmentation.model import *

In [None]:
# exporti


def parse_filename(fname):
    tmp = fname.split("-")
    date = pd.to_datetime(tmp[1] + tmp[2] + tmp[3])
    alias = tmp[6]
    email = tmp[7]
    submitted_iou = tmp[5].split("=")[1]

    return {
        "file_name": fname,
        "date": date,
        "alias": alias,
        "email": email,
        "submitted_iou": submitted_iou,
        "calculated_iou": np.nan,
    }

In [None]:
# hide

actual = parse_filename(
    "uploaded-2020-12-22T15:35:15.513570-submission-iou=0.46613-dolphin123-name.surname@gmail.com-2020-12-22T15:35:04.875962.zip"
)
expected = {
    "file_name": "uploaded-2020-12-22T15:35:15.513570-submission-iou=0.46613-dolphin123-name.surname@gmail.com-2020-12-22T15:35:04.875962.zip",
    "date": pd.to_datetime("2020-12-22 15:35:15.513570"),
    "alias": "dolphin123",
    "email": "name.surname@gmail.com",
    "submitted_iou": "0.46613",
    "calculated_iou": np.nan,
}

assert actual == expected

In [None]:
# exporti

s3 = boto3.resource("s3")
my_bucket = s3.Bucket("ai-league.cisex.org")
private_leaderboard_path = Path("private_leaderboard.csv")
public_leaderboard_path = Path("leaderboard.csv")


def get_submissions_from_s3(private_leaderboard_path=private_leaderboard_path):
    """Downloads the zip file from s3 if there is no record of it in the csv file"""
    if private_leaderboard_path.exists():
        private_leaderboard = pd.read_csv(private_leaderboard_path)
    else:
        private_leaderboard = dict(file_name=[])

    # download file into models_for_evaluation directory
    s3_objects = [
        s3_object
        for s3_object in my_bucket.objects.all()
        if Path(s3_object.key).match("*submission*.zip")
        and Path(s3_object.key).name not in list(private_leaderboard["file_name"])
    ]
    if len(s3_objects) > 0:
        for i, s3_object in enumerate(s3_objects):
            print(f"Downloading {i+1}/{len(s3_objects)} from S3...")
            my_bucket.download_file(s3_object.key, f"models_for_evaluation/{Path(s3_object.key).name}")

        # return new entries
        new_entries = pd.Series([Path(s3_object.key).name for s3_object in s3_objects]).apply(parse_filename).apply(pd.Series)
    else:
        x = "uploaded-2020-12-22T15:35:15.513570-submission-iou=0.46613-dolphin123-name.surname@gmail.com-2020-12-22T15:35:04.875962.zip"
        new_entries = pd.Series([x]).apply(parse_filename).apply(pd.Series).iloc[:0, :]
        
    return new_entries
    

In [None]:
#exporti

def public(private_leaderboard):
    return private_leaderboard[["alias", "date", "submitted_iou", "calculated_iou"]]

In [None]:
#hide

if not in_test:
    new_entries = get_submissions_from_s3()
    public(new_entries)

In [None]:
# exporti

def merge_with_private_leaderboard(
    new_entries, private_leaderboard_path=private_leaderboard_path
):
    # merge private leaderboard and new_entries if needed
    new_entries["calculated_iou"] = np.nan
    if private_leaderboard_path.exists():
        private_leaderboard = pd.read_csv(private_leaderboard_path)
        private_leaderboard = pd.concat([private_leaderboard, new_entries], axis=0)
        private_leaderboard = private_leaderboard.drop_duplicates(subset="file_name")
    else:
        private_leaderboard = new_entries

    private_leaderboard.to_csv(private_leaderboard_path, index=False)

    return private_leaderboard

In [None]:
#hide

if not in_test:
    private_leaderboard = merge_with_private_leaderboard(new_entries)
    public(private_leaderboard)

In [None]:
#exporti

def evaluate_model(model_path) -> float:
    # do it
    with tempfile.TemporaryDirectory() as d:
        with zipfile.ZipFile(model_path, "r") as zip_ref:
            zip_ref.extractall(path=d)
            unzipped_path = [x for x in Path(d).glob("submiss*")][0]

        model = torch.load(unzipped_path / "model.pt")
        data_loader, data_loader_test = get_dataset("segmentation", batch_size=4)
        iou, iou_df = iou_metric(model, data_loader_test.dataset)

    return iou

In [None]:
#hide

if not in_test:
    actual = evaluate_model(
        Path(
            "models_for_evaluation/uploaded-2021-01-05T15:01:23.563795-submission-iou=0.44003-dolphin123-name.surname@gmail.com-2021-01-05T15:01:21.655750.zip"
        )
    )
    expected = 0.44003
    np.testing.assert_almost_equal(actual, expected, decimal=5)

AttributeError: 'RegionProposalNetwork' object has no attribute 'score_thresh'

In [None]:
#exporti

def evaluate_private_leaderboard(private_leaderboard_path=private_leaderboard_path):
    private_leaderboard = pd.read_csv(private_leaderboard_path)
    new_entries = private_leaderboard.loc[private_leaderboard["calculated_iou"].isna()]
    
    n = new_entries.shape[0]
    for i, ix in enumerate(new_entries.index):
        row = new_entries.loc[ix]
        file_name, alias, dt = row["file_name"], row["alias"], row["date"]
        print(f"Evaluating model {i+1}/{n} for {alias} submitted at {dt}...")
        calculated_iou = evaluate_model(f"models_for_evaluation/{file_name}")
        private_leaderboard.loc[ix, "calculated_iou"] = calculated_iou
        
    private_leaderboard.to_csv(private_leaderboard_path, index=False)
    return private_leaderboard

In [None]:
#hide

if not in_test:
    private_leaderboard = pd.read_csv("private_leaderboard.csv")
    assert np.nan not in list(private_leaderboard["calculated_iou"])

In [None]:
#exporti

def save_public_leaderboard(private_leaderboard_path=private_leaderboard_path, public_leaderboard_path=public_leaderboard_path):
    private_leaderboard = pd.read_csv(private_leaderboard_path)
    public_leaderboard = public(private_leaderboard)
    public_leaderboard.to_csv(public_leaderboard_path, index=False)

In [None]:
#hide

if not in_test:
    save_public_leaderboard()

In [None]:
# export


def get_leaderboard(public_leaderboard_path=public_leaderboard_path):
    public_leaderboard = pd.read_csv(public_leaderboard_path)
    public_leaderboard = public_leaderboard[
        (public_leaderboard.alias != "dolphin123")
        & (public_leaderboard.alias != "malimedo")
        & (public_leaderboard.alias != "prvi_pokušaj")
    ]
    public_leaderboard = public_leaderboard.sort_values(
        by=["submitted_iou"], ascending=False
    ).reset_index(drop=True)
    public_leaderboard.drop_duplicates(subset="alias", keep="first", inplace=True)

    public_leaderboard = public_leaderboard.sort_values(
        by=["submitted_iou"], ascending=False
    ).reset_index(drop=True)
    public_leaderboard.index = public_leaderboard.index + 1
    return public_leaderboard

This is a temporary leaderboard calculated daily using validation data. The final leaderboard will be calculated using test dataset unavailable to participants and will most likely be different than the one provided here due to overfitting on the validation dataset. Please see the following link for the details: https://en.wikipedia.org/wiki/Training,_validation,_and_test_sets

In [None]:
#hide_input

if not in_test:
    display(get_leaderboard())

Unnamed: 0,alias,date,submitted_iou,calculated_iou
1,tekashi,2021-04-19 15:03:11.966772,0.51546,
2,dolphinSantiago,2021-03-28 21:13:52.740719,0.48156,
3,Orka,2021-04-02 16:21:59.548029,0.47683,
4,alias,2021-05-02 10:38:49.415151,0.46234,
5,Dupin,2021-03-31 14:35:22.171495,0.46228,
6,dupincek,2021-03-29 17:04:06.266327,0.44912,
7,Boto,2021-03-31 22:54:40.595595,0.44678,
8,stokic,2021-02-21 18:51:53.232539,0.43552,
9,dolphin_rovinj,2021-05-01 20:06:35.372099,0.43487,
10,firstML,2021-02-28 09:55:59.155951,0.42529,


In [None]:
# hide
public_leaderboard_path

PosixPath('leaderboard.csv')