# Backtest Results

## Apply Model to all available Ground-truth data

In [1]:
import argparse
import json
import logging
import os
from traceback import print_exc
from tqdm import tqdm

FORMAT = '%(asctime)-15s %(levelname)-5s %(message)s'
logging.basicConfig(format=FORMAT, level=logging.INFO)

#import numpy as np
import pandas as pd

from research_lib.utils.data_access_utils import S3AccessUtils
from research_lib.utils.datetime_utils import add_days
from filter_optimization.filter_optimization_task import generate_global_optimum_filter, _add_date_hour_columns
from report_generation.report_generator import generate_ts_data, SamplingFilter
from weight_estimation.weight_estimator import WeightEstimator
from weight_estimation.utils import CameraMetadata



#s3 = S3AccessUtils('/tmp', json.load(open(os.environ['AWS_CREDENTIALS'])))
s3 = S3AccessUtils('/tmp')
ROOT_DIR = '/tmp/batches'
AKPD_SCORE_CUTOFF = 0.99
weight_model_f = '/root/alok/repos/internal-tools/biomass-backtest/.data/nn_epoch_798_v2.pb'
kf_model_f = '/root/alok/repos/internal-tools/biomass-backtest/.data//kf_predictor_v2.pb'


def estimate_weight_task(df: pd.DataFrame) -> pd.DataFrame:
    """Generates individual weight, length, and k-factor estimates for input dataset."""

    logging.info(f"estimate_weight_task data: {len(df)} rows")

    for col in ['annotation', 'camera_metadata']:
        if type(df[col].iloc[0]) != dict:
            df[col] = df[col].apply(lambda x: json.loads(x.replace("'", '"')))

    weight_estimator = WeightEstimator(weight_model_f, kf_model_f)
    weights, lengths, kfs = [], [], []
    count = 0
    for idx, row in tqdm(df.iterrows()):
#         if count % 500 == 0:
#             print('{} - Percentage completion: {}%'.format(count, round(100 * count / df.shape[0], 2)))
        count += 1
        annotation = row.annotation
        camera_metadata = row.camera_metadata

        camera_metadata_obj = CameraMetadata(
            focal_length=camera_metadata['focalLength'],
            focal_length_pixel=camera_metadata['focalLengthPixel'],
            baseline_m=camera_metadata['baseline'],
            pixel_count_width=camera_metadata['pixelCountWidth'],
            pixel_count_height=camera_metadata['pixelCountHeight'],
            image_sensor_width=camera_metadata['imageSensorWidth'],
            image_sensor_height=camera_metadata['imageSensorHeight']
        )

        weight, length, kf = weight_estimator.predict(annotation, camera_metadata_obj)
        weights.append(weight)
        lengths.append(length)
        kfs.append(kf)

    df['estimated_weight_g'] = weights
    df['estimated_length_mm'] = lengths
    df['estimated_k_factor'] = kfs
    return df


def compute_filter_task(df: pd.DataFrame, last_feeding_date: str) -> dict:
    """Compute optimal filter for this dataset."""

    df = _add_date_hour_columns(df)
    end_date = add_days(last_feeding_date, 1) if last_feeding_date else df.date.iloc[-1]
    start_date = add_days(end_date, -14)
    df = df[(df.date >= start_date) & (df.date <= end_date)]

    print(f"compute_filter_task {start_date}-{end_date} -> {len(df)} records")
    sampling_filter_obj = generate_global_optimum_filter(df)
    sampling_filter = {
        'best_start_hr': sampling_filter_obj.start_hour,
        'best_end_hr': sampling_filter_obj.end_hour,
        'best_kf_cutoff': sampling_filter_obj.kf_cutoff
    }
    # sampling_filter = {
    #     'best_start_hr': 7,
    #     'best_end_hr': 15,
    #     'best_kf_cutoff': 0.0
    # }

    return sampling_filter


def generate_report_task(df, sampling_filter):
    sampling_filter_obj = SamplingFilter(
        start_hour=sampling_filter['best_start_hr'],
        end_hour=sampling_filter['best_end_hr'],
        kf_cutoff=sampling_filter['best_kf_cutoff'],
        akpd_score_cutoff=AKPD_SCORE_CUTOFF
    )

    df = _add_date_hour_columns(df)
    ts_data = pd.DataFrame(generate_ts_data(df, sampling_filter_obj))
    return ts_data


def main(cohort_names, batch_name, use_dw_values):

    for cohort_name in cohort_names:
        print('Processing:{}'.format(cohort_name))

        try:
            s3_dir = os.path.join(
                'https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets',
                cohort_name
            )

            ground_truth_metadata_url = os.path.join(s3_dir, 'ground_truth_metadata.json')
            ground_truth_key_base = os.path.join(batch_name, cohort_name, 'ground_truth_metadata.json')
            ground_truth_f = os.path.join(ROOT_DIR, ground_truth_key_base)
            try:
                logging.info(f"Loading ground_truth_metadata_url: {ground_truth_metadata_url}")
                s3.download_from_url(ground_truth_metadata_url, custom_location=ground_truth_f)
                s3.s3_client.upload_file(ground_truth_f, 'aquabyte-images-adhoc',
                                         os.path.join('backtest', ground_truth_key_base))
            except:
                logging.info('Couldnt download groundtruth for cohort: {}'.format(cohort_name))
                continue

            # generate biomass computations
            data_url = os.path.join(s3_dir, 'annotation_dataset.csv')
            logging.info(f"Loading {data_url}")
            data_f, _, _= s3.download_from_url(data_url)
            df = pd.read_csv(data_f)
            df = df[df.akpd_score >= AKPD_SCORE_CUTOFF]

            if not use_dw_values:
                biomass_computations_df = estimate_weight_task(df)
            else:
                biomass_computations_df = df

            out_key_base = os.path.join(batch_name, cohort_name, 'biomass_computations.csv')
            out_f = os.path.join(ROOT_DIR, out_key_base)
            biomass_computations_df.to_csv(out_f)
            logging.info(f"estimate_weight_task -> {out_key_base}")
            s3.s3_client.upload_file(out_f, 'aquabyte-images-adhoc',
                                     os.path.join('backtest', out_key_base))

            # generate optimal filters
            ground_truth_metadata = json.load(open(ground_truth_f))
            last_feeding_date = ground_truth_metadata['last_feeding_date']
            sampling_filter = compute_filter_task(biomass_computations_df, last_feeding_date)
            logging.info(f"compute_filter_task -> {sampling_filter}")

            out_key_base = os.path.join(batch_name, cohort_name, 'sampling_filter.json')
            out_f = os.path.join(ROOT_DIR, out_key_base)
            logging.info(f"compute_filter_task -> {out_key_base}")
            json.dump(sampling_filter, open(out_f, 'w'))
            s3.s3_client.upload_file(out_f, 'aquabyte-images-adhoc',
                                     os.path.join('backtest', out_key_base))

            # generate preliminary report
            ts_data = generate_report_task(biomass_computations_df, sampling_filter)

            out_key_base = os.path.join(ROOT_DIR, batch_name, cohort_name, 'report.csv')
            logging.info(f"generate_report_task -> {out_key_base}")

            out_f = os.path.join(ROOT_DIR, out_key_base)
            ts_data.to_csv(out_f)
            s3.s3_client.upload_file(out_f, 'aquabyte-images-adhoc',
                                     os.path.join('backtest', out_key_base))
        except:
            logging.info('Got error for {}, skipping...'.format(cohort_name))
            print_exc()



2021-01-15 15:07:32,861 INFO  Found credentials in shared credentials file: ~/.aws/credentials


In [2]:
BATCH_NAME = 'sidtest3'
p = 'alok/production_datasets/'
gt = '/ground_truth_metadata.json'

cohort_names = [x.replace(p, '').replace(gt, '') for x in s3.get_matching_s3_keys('aquabyte-images-adhoc', p)
 if x.endswith(gt)]
cohort_names

alok/production_datasets/


['aplavika_pen_id_95_2020-07-10_2020-07-26',
 'bolaks_pen_id_88_2020-02-28_2020-03-10',
 'dale_pen_id_143_2020-10-07_2020-10-21',
 'djubawik_pen_id_153_2020-11-10_2020-11-26',
 'eldviktaren_pen_id_164_2020-09-21_2020-10-08',
 'habranden_pen_id_100_2020-08-10_2020-08-31',
 'kjeppevikholmen_pen_id_5_2019-06-18_2019-07-02',
 'langoy_pen_id_108_2020-05-07_2020-05-17',
 'leivsethamran_pen_id_165_2020-10-18_2020-11-13',
 'movikodden_pen_id_114_2020-11-03_2020-11-25',
 'movikodden_pen_id_167_2020-10-13_2020-10-30',
 'seglberget_pen_id_66_2020-05-13_2020-06-13',
 'silda_pen_id_86_2020-07-02_2020-07-19',
 'slapoya_pen_id_116_2020-10-18_2020-11-08',
 'tittelsnes_pen_id_37_2020-06-10_2020-06-24',
 'varholmen_pen_id_131_2020-08-15_2020-08-30',
 'varholmen_pen_id_151_2020-10-02_2020-10-17',
 'varholmen_pen_id_186_2020-10-18_2020-11-02',
 'vikane_pen_id_60_2020-08-10_2020-08-30']

In [3]:
main(
    cohort_names[5:],
    batch_name = BATCH_NAME,
    use_dw_values = False)

2021-01-15 15:08:13,648 INFO  Loading ground_truth_metadata_url: https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/habranden_pen_id_100_2020-08-10_2020-08-31/ground_truth_metadata.json


Processing:habranden_pen_id_100_2020-08-10_2020-08-31


2021-01-15 15:08:14,343 INFO  Loading https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/habranden_pen_id_100_2020-08-10_2020-08-31/annotation_dataset.csv
2021-01-15 15:08:16,382 INFO  estimate_weight_task data: 36357 rows
36357it [00:37, 980.07it/s] 
2021-01-15 15:09:00,648 INFO  estimate_weight_task -> sidtest3/habranden_pen_id_100_2020-08-10_2020-08-31/biomass_computations.csv


compute_filter_task 2020-08-11-2020-08-25 -> 27577 records
Performing coarse grid search...
Start hour: 0, End hour: 13, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consist

Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 13, KF cutoff: 1.1
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 13, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 13, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 13, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 13, KF cutoff: 1.3000000000000003
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 13, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
I

Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 13, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 13, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 13, KF cutoff: 1.3000000000000003
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 13, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 13, KF cutoff: 1.4000000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 13, KF cutoff: 1.450

Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.4000000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Insufficient dat

Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.2049999999999992
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.209999999999999
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.214999999999999
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.2199999999999989
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.2249999999999988
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 

2021-01-15 15:09:34,749 INFO  compute_filter_task -> {'best_start_hr': 0.0, 'best_end_hr': 23.0, 'best_kf_cutoff': 1.2349999999999985}
2021-01-15 15:09:34,750 INFO  compute_filter_task -> sidtest3/habranden_pen_id_100_2020-08-10_2020-08-31/sampling_filter.json


Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
2020-08-07 2020-08-13
['2020-08-07', '2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13']
2020-08-07 2020-08-13
['2020-08-07', '2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13']
2020-08-07 2020-08-13
['2020-08-07', '2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13']
2020-08-07 2020-08-13
['2020-08-07', '2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13']
2020-08-08 2020-08-14
['2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13', '2020-08-14']
2020-08-08 2020-08-14
['2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13', '2020-08-14']
2020-08-08 2020-08-14
['2020-08-08', '2020-08-09', '20

2020-08-23 2020-08-29
['2020-08-23', '2020-08-24', '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29']
2020-08-24 2020-08-30
['2020-08-24', '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29', '2020-08-30']
2020-08-24 2020-08-30
['2020-08-24', '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29', '2020-08-30']
2020-08-24 2020-08-30
['2020-08-24', '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29', '2020-08-30']
2020-08-24 2020-08-30
['2020-08-24', '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29', '2020-08-30']
2020-08-25 2020-08-31
['2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29', '2020-08-30', '2020-08-31']
2020-08-25 2020-08-31
['2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29', '2020-08-30', '2020-08-31']
2020-08-25 2020-08-31
['2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29', '2020-08-30', '2020-08-31']
2020-08-25 2020-08-31
['2020-08-

2021-01-15 15:09:38,471 INFO  generate_report_task -> /tmp/batches/sidtest3/habranden_pen_id_100_2020-08-10_2020-08-31/report.csv
2021-01-15 15:09:40,247 INFO  Loading ground_truth_metadata_url: https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/kjeppevikholmen_pen_id_5_2019-06-18_2019-07-02/ground_truth_metadata.json


Processing:kjeppevikholmen_pen_id_5_2019-06-18_2019-07-02


2021-01-15 15:09:40,912 INFO  Loading https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/kjeppevikholmen_pen_id_5_2019-06-18_2019-07-02/annotation_dataset.csv
2021-01-15 15:09:41,449 INFO  estimate_weight_task data: 15241 rows
15241it [00:16, 916.88it/s] 
2021-01-15 15:10:01,378 INFO  estimate_weight_task -> sidtest3/kjeppevikholmen_pen_id_5_2019-06-18_2019-07-02/biomass_computations.csv


compute_filter_task 2019-06-18-2019-07-02 -> 15241 records
Performing coarse grid search...
Start hour: 0, End hour: 13, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1

Start hour: 6, End hour: 15, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.1
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.3000000000000003
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.4000000000000004
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution co

Start hour: 8, End hour: 13, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.4000000000000004
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.1
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 8, End 

Start hour: 0.0, End hour: 23.0, KF cutoff: 1.2099999999999969
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.2149999999999967
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.2199999999999966
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.2249999999999965
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.2299999999999964
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.2349999999999963
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.2399999999999962
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.244999999999996
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.24999999999

2021-01-15 15:10:25,787 INFO  compute_filter_task -> {'best_start_hr': 0.0, 'best_end_hr': 23.0, 'best_kf_cutoff': 1.0899999999999994}
2021-01-15 15:10:25,788 INFO  compute_filter_task -> sidtest3/kjeppevikholmen_pen_id_5_2019-06-18_2019-07-02/sampling_filter.json


Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
2019-06-15 2019-06-21
['2019-06-15', '2019-06-16', '2019-06-17', '2019-06-18', '2019-06-19', '2019-06-20', '2019-06-21']
2019-06-15 2019-06-21
['2019-06-15', '2019-06-16', '2019-06-17', '2019-06-18', '2019-06-19', '2019-06-20', '2019-06-21']
2019-06-15 2019-06-21
['2019-06-15', '2019-06-16', '2019-06-17', '2019-06-18', '2019-06-19', '2019-06-20', '2019-06-21']
2019-06-15 2019-06-21
['2019-06-15', '2019-06-16', '2019-06-17', '2019-06-18', '2019-06-19', '2019-06-20', '2019-06-21']
2019-06-16 2019-06-22
['2019-06-16', '2019-06-17', '2019-06-18', '2019-06-19', '2019-06-20', '2019-06-21', '2019-06-22']
2019-06-16 2019-06-22
['2019-06-16', '2019-06-17', '2019-06-18', '2019-06-19', '2019-06-20', '2019-06-21', '2019-06-22']
2019-06-16 2019-06-22
['2019-06-16', '2019-06-17', '2019-06-18', '2019-06-19', '2019-06-20', '2019-06-21', '2019-06-22']
2019-06-16 2019-06-22
['2019-06-16', '2019-

2021-01-15 15:10:28,410 INFO  generate_report_task -> /tmp/batches/sidtest3/kjeppevikholmen_pen_id_5_2019-06-18_2019-07-02/report.csv


2019-06-25 2019-07-01
['2019-06-25', '2019-06-26', '2019-06-27', '2019-06-28', '2019-06-29', '2019-06-30', '2019-07-01']
2019-06-25 2019-07-01
['2019-06-25', '2019-06-26', '2019-06-27', '2019-06-28', '2019-06-29', '2019-06-30', '2019-07-01']
2019-06-25 2019-07-01
['2019-06-25', '2019-06-26', '2019-06-27', '2019-06-28', '2019-06-29', '2019-06-30', '2019-07-01']
2019-06-25 2019-07-01
['2019-06-25', '2019-06-26', '2019-06-27', '2019-06-28', '2019-06-29', '2019-06-30', '2019-07-01']
2019-06-26 2019-07-02
['2019-06-26', '2019-06-27', '2019-06-28', '2019-06-29', '2019-06-30', '2019-07-01', '2019-07-02']
2019-06-26 2019-07-02
['2019-06-26', '2019-06-27', '2019-06-28', '2019-06-29', '2019-06-30', '2019-07-01', '2019-07-02']
2019-06-26 2019-07-02
['2019-06-26', '2019-06-27', '2019-06-28', '2019-06-29', '2019-06-30', '2019-07-01', '2019-07-02']
2019-06-26 2019-07-02
['2019-06-26', '2019-06-27', '2019-06-28', '2019-06-29', '2019-06-30', '2019-07-01', '2019-07-02']
2019-06-27 2019-07-03
['2019-06-

2021-01-15 15:10:29,788 INFO  Loading ground_truth_metadata_url: https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/langoy_pen_id_108_2020-05-07_2020-05-17/ground_truth_metadata.json


Processing:langoy_pen_id_108_2020-05-07_2020-05-17


2021-01-15 15:10:30,526 INFO  Loading https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/langoy_pen_id_108_2020-05-07_2020-05-17/annotation_dataset.csv
2021-01-15 15:10:30,703 INFO  estimate_weight_task data: 3427 rows
3427it [00:03, 1094.62it/s]
2021-01-15 15:10:34,580 INFO  estimate_weight_task -> sidtest3/langoy_pen_id_108_2020-05-07_2020-05-17/biomass_computations.csv


compute_filter_task 2020-04-27-2020-05-11 -> 2491 records
Performing coarse grid search...
Start hour: 0, End hour: 13, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.

Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.4000000000000004
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.1
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.1500000000000004
Insufficient data t

Start hour: 8, End hour: 15, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.3000000000000003
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.4000000000000004
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 24, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 24, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 24, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Star

Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 14.0, KF cutoff: 1.2849999999999984
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 14.0, KF cutoff: 1.2899999999999983
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 14.0, KF cutoff: 1.2949999999999982
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 14.0, KF cutoff: 1.299999999999998
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 14.0, KF cutoff: 1.304999999999998
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 14.0, KF cutoff: 1.3099999999999978
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 14.0, KF cutoff: 1.3149999999999977
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 14.0, KF cutoff: 1.3199999999999976
Insufficient data to compute distribution consistency!
Sta

2021-01-15 15:10:41,315 INFO  compute_filter_task -> {'best_start_hr': 0.0, 'best_end_hr': 14.0, 'best_kf_cutoff': 1.2949999999999982}
2021-01-15 15:10:41,316 INFO  compute_filter_task -> sidtest3/langoy_pen_id_108_2020-05-07_2020-05-17/sampling_filter.json


Start hour: 0.0, End hour: 14.0, KF cutoff: 1.3749999999999964
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 14.0, KF cutoff: 1.3799999999999963
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 14.0, KF cutoff: 1.3849999999999962
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 14.0, KF cutoff: 1.3899999999999961
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 14.0, KF cutoff: 1.394999999999996
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 14.0, KF cutoff: 1.399999999999996
Insufficient data to compute distribution consistency!


2021-01-15 15:10:41,897 INFO  generate_report_task -> /tmp/batches/sidtest3/langoy_pen_id_108_2020-05-07_2020-05-17/report.csv


Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
2020-05-04 2020-05-10
['2020-05-04', '2020-05-05', '2020-05-06', '2020-05-07', '2020-05-08', '2020-05-09', '2020-05-10']
2020-05-04 2020-05-10
['2020-05-04', '2020-05-05', '2020-05-06', '2020-05-07', '2020-05-08', '2020-05-09', '2020-05-10']
2020-05-04 2020-05-10
['2020-05-04', '2020-05-05', '2020-05-06', '2020-05-07', '2020-05-08', '2020-05-09', '2020-05-10']
2020-05-04 2020-05-10
['2020-05-04', '2020-05-05', '2020-05-06', '2020-05-07', '2020-05-08', '2020-05-09', '2020-05-10']
2020-05-05 2020-05-11
['2020-05-05', '2020-05-06', '2020-05-07', '2020-05-08', '2020-05-09', '2020-05-10', '2020-05-11']
2020-05-05 2020-05-11
['2020-05-05', '2020-05-06', '2020-05-07', '2020-05-08', '2020-05-09', '2020-05-10', '2020-05-11']
2020-05-05 2020-05-11
['2020-05-05', '2020-05-06', '2020-05-07', '2020-05-08', '2020-05-09', '2020-05-10', '2020-05-11']
2020-05-05 2020-05-11
['2020-05-05', '2020-

2021-01-15 15:10:42,852 INFO  Loading ground_truth_metadata_url: https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/leivsethamran_pen_id_165_2020-10-18_2020-11-13/ground_truth_metadata.json


Processing:leivsethamran_pen_id_165_2020-10-18_2020-11-13


2021-01-15 15:10:43,535 INFO  Loading https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/leivsethamran_pen_id_165_2020-10-18_2020-11-13/annotation_dataset.csv
2021-01-15 15:10:44,001 INFO  estimate_weight_task data: 9743 rows
9743it [00:10, 927.92it/s] 
2021-01-15 15:10:56,479 INFO  estimate_weight_task -> sidtest3/leivsethamran_pen_id_165_2020-10-18_2020-11-13/biomass_computations.csv


compute_filter_task 2020-10-15-2020-10-29 -> 9743 records
Performing coarse grid search...
Start hour: 0, End hour: 13, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.

Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.4000000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Insufficient dat

Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 15, KF cutoff: 1.1
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 15, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 15, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 15, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 15, KF cutoff: 1.3000000000000003
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 15, KF cutoff: 1.3500000000000005
I

Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.3000000000000003
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.4000000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.450

Start hour: 0.0, End hour: 15.0, KF cutoff: 1.06
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 15.0, KF cutoff: 1.065
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 15.0, KF cutoff: 1.0699999999999998
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 15.0, KF cutoff: 1.0749999999999997
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 15.0, KF cutoff: 1.0799999999999996
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 15.0, KF cutoff: 1.0849999999999995
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 15.0, KF cutoff: 1.0899999999999994
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 15.0, KF cutoff: 1.0949999999999993
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 15.0, KF cutoff: 1.0999999999999992
Insufficient data to

2021-01-15 15:11:16,607 INFO  compute_filter_task -> {'best_start_hr': 0.0, 'best_end_hr': 15.0, 'best_kf_cutoff': 1.1799999999999975}
2021-01-15 15:11:16,608 INFO  compute_filter_task -> sidtest3/leivsethamran_pen_id_165_2020-10-18_2020-11-13/sampling_filter.json


Start hour: 0.0, End hour: 15.0, KF cutoff: 1.2399999999999962
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 15.0, KF cutoff: 1.244999999999996
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 15.0, KF cutoff: 1.249999999999996
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
2020-10-15 2020-10-21
['2020-10-15', '2020-10-16', '2020-10-17', '2020-10-18', '2020-10-19', '2020-10-20', '2020-10-21']
2020-10-15 2020-10-21
['2020-10-15', '2020-10-16', '2020-10-17', '2020-10-18', '2020-10-19', '2020-10-20', '2020-10-21']
2020-10-15 2020-10-21
['2020-10-15', '2020-10-16', '2020-10-17', '2020-10-18', '2020-10-19', '2020-10-20', '2020-10-21']
2020-10-15 2020-10-21
['2020-10-15', '2020-10-16', '2020-10-17', '2020-10-18', '2020-10-19', '2020-10-20', '2020-10-21']
2020-10-16 2020-10-22
['2020-10-16', '2020-10-17', '20

2021-01-15 15:11:18,997 INFO  generate_report_task -> /tmp/batches/sidtest3/leivsethamran_pen_id_165_2020-10-18_2020-11-13/report.csv


2020-10-21 2020-10-27
['2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27']
2020-10-21 2020-10-27
['2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27']
2020-10-21 2020-10-27
['2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27']
2020-10-21 2020-10-27
['2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27']
2020-10-22 2020-10-28
['2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28']
2020-10-22 2020-10-28
['2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28']
2020-10-22 2020-10-28
['2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28']
2020-10-22 2020-10-28
['2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28']
2020-10-23 2020-10-29
['2020-10-

2021-01-15 15:11:20,319 INFO  Loading ground_truth_metadata_url: https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/movikodden_pen_id_114_2020-11-03_2020-11-25/ground_truth_metadata.json


Processing:movikodden_pen_id_114_2020-11-03_2020-11-25


2021-01-15 15:11:20,976 INFO  Loading https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/movikodden_pen_id_114_2020-11-03_2020-11-25/annotation_dataset.csv
2021-01-15 15:11:21,530 INFO  estimate_weight_task data: 16275 rows
16275it [00:17, 954.60it/s] 
2021-01-15 15:11:42,056 INFO  estimate_weight_task -> sidtest3/movikodden_pen_id_114_2020-11-03_2020-11-25/biomass_computations.csv


compute_filter_task 2020-10-31-2020-11-14 -> 12746 records
Performing coarse grid search...
Start hour: 0, End hour: 13, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1

Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.3000000000000003
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.4000000000000004
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.0
Insu

Start hour: 8, End hour: 14, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.1
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.3000000000000003
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.4000000000000004
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution co

2021-01-15 15:11:59,088 INFO  compute_filter_task -> {'best_start_hr': 0.0, 'best_end_hr': 23.0, 'best_kf_cutoff': 1.0849999999999982}
2021-01-15 15:11:59,089 INFO  compute_filter_task -> sidtest3/movikodden_pen_id_114_2020-11-03_2020-11-25/sampling_filter.json


Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
2020-10-31 2020-11-06
['2020-10-31', '2020-11-01', '2020-11-02', '2020-11-03', '2020-11-04', '2020-11-05', '2020-11-06']
2020-10-31 2020-11-06
['2020-10-31', '2020-11-01', '2020-11-02', '2020-11-03', '2020-11-04', '2020-11-05', '2020-11-06']
2020-10-31 2020-11-06
['2020-10-31', '2020-11-01', '2020-11-02', '2020-11-03', '2020-11-04', '2020-11-05', '2020-11-06']
2020-10-31 2020-11-06
['2020-10-31', '2020-11-01', '2020-11-02', '2020-11-03', '2020-11-04', '2020-11-05', '2020-11-06']
2020-11-01 2020-11-07
['2020-11-01', '2020-11-02', '2020-11-03', '2020-11-04', '2020-11-05', '2020-11-06', '2020-11-07']
2020-11-01 2020-11-07
['2020-11-01', '2020-11-02', '2020-11-03', '2020-11-04', '2020-11-05', '2020-11-06', '2020-11-07']
2020-11-01 2020-11-07
['2020-11-01', '2020-11-02', '2020-11-03', '2020-11-04', '2020-11-05', '2020-11-06', '2

2021-01-15 15:12:01,262 INFO  generate_report_task -> /tmp/batches/sidtest3/movikodden_pen_id_114_2020-11-03_2020-11-25/report.csv


2020-11-15 2020-11-21
['2020-11-15', '2020-11-16', '2020-11-17', '2020-11-18', '2020-11-19', '2020-11-20', '2020-11-21']
2020-11-15 2020-11-21
['2020-11-15', '2020-11-16', '2020-11-17', '2020-11-18', '2020-11-19', '2020-11-20', '2020-11-21']
2020-11-15 2020-11-21
['2020-11-15', '2020-11-16', '2020-11-17', '2020-11-18', '2020-11-19', '2020-11-20', '2020-11-21']
2020-11-15 2020-11-21
['2020-11-15', '2020-11-16', '2020-11-17', '2020-11-18', '2020-11-19', '2020-11-20', '2020-11-21']
2020-11-16 2020-11-22
['2020-11-16', '2020-11-17', '2020-11-18', '2020-11-19', '2020-11-20', '2020-11-21', '2020-11-22']
2020-11-16 2020-11-22
['2020-11-16', '2020-11-17', '2020-11-18', '2020-11-19', '2020-11-20', '2020-11-21', '2020-11-22']
2020-11-16 2020-11-22
['2020-11-16', '2020-11-17', '2020-11-18', '2020-11-19', '2020-11-20', '2020-11-21', '2020-11-22']
2020-11-16 2020-11-22
['2020-11-16', '2020-11-17', '2020-11-18', '2020-11-19', '2020-11-20', '2020-11-21', '2020-11-22']


2021-01-15 15:12:02,768 INFO  Loading ground_truth_metadata_url: https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/movikodden_pen_id_167_2020-10-13_2020-10-30/ground_truth_metadata.json


Processing:movikodden_pen_id_167_2020-10-13_2020-10-30


2021-01-15 15:12:03,457 INFO  Loading https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/movikodden_pen_id_167_2020-10-13_2020-10-30/annotation_dataset.csv
2021-01-15 15:12:03,715 INFO  estimate_weight_task data: 5428 rows
5428it [00:06, 781.63it/s] 
2021-01-15 15:12:11,807 INFO  estimate_weight_task -> sidtest3/movikodden_pen_id_167_2020-10-13_2020-10-30/biomass_computations.csv


compute_filter_task 2020-10-10-2020-10-24 -> 4309 records
Performing coarse grid search...
Start hour: 0, End hour: 13, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.

Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.1
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.3000000000000003
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.3500000000000005
Insu

Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.3000000000000003
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.4000000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.0500000000000003
Insuffic

Start hour: 0.0, End hour: 23.0, KF cutoff: 1.144999999999997
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1499999999999968
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1549999999999967
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1599999999999966
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1649999999999965
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1699999999999964
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1749999999999963
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1799999999999962
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.18499999999

2021-01-15 15:12:21,901 INFO  compute_filter_task -> {'best_start_hr': 0.0, 'best_end_hr': 23.0, 'best_kf_cutoff': 1.1249999999999973}
2021-01-15 15:12:21,901 INFO  compute_filter_task -> sidtest3/movikodden_pen_id_167_2020-10-13_2020-10-30/sampling_filter.json


Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.189999999999996
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1949999999999958
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1999999999999957
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
2020-10-14 2020-10-20
['2020-10-14', '2020-10-15', '2020-10-16', '2020-10-17', '2020-10-18', '2020-10-19', '2020-10-20']
2020-10-14 2020-10-20
['2020-10-14', '2020-10-15', '2020-10-16', '2020-10-17', '2020-10-18', '2020-10-19', '2020-10-20']
2020-10-14 2020-10-20
['2020-10-14', '2020-10-15', '2020-10-16', '2020-10-17', '2020-10-18', '2020-10-19', '2020-10-20']
2020-10-14 2020-10-20
['2020-10-14', '2020-10-15', '2020-10-16', '2020-10-17', '2020-10-18', '2020-10-19', '2020-10-20'

2021-01-15 15:12:22,609 INFO  generate_report_task -> /tmp/batches/sidtest3/movikodden_pen_id_167_2020-10-13_2020-10-30/report.csv


2020-10-20 2020-10-26
['2020-10-20', '2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26']
2020-10-21 2020-10-27
['2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27']
2020-10-21 2020-10-27
['2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27']
2020-10-21 2020-10-27
['2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27']
2020-10-21 2020-10-27
['2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27']
2020-10-22 2020-10-28
['2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28']
2020-10-22 2020-10-28
['2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28']
2020-10-22 2020-10-28
['2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28']
2020-10-22 2020-10-28
['2020-10-

2021-01-15 15:12:23,730 INFO  Loading ground_truth_metadata_url: https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/seglberget_pen_id_66_2020-05-13_2020-06-13/ground_truth_metadata.json


Processing:seglberget_pen_id_66_2020-05-13_2020-06-13


2021-01-15 15:12:24,401 INFO  Loading https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/seglberget_pen_id_66_2020-05-13_2020-06-13/annotation_dataset.csv
2021-01-15 15:12:24,714 INFO  estimate_weight_task data: 7442 rows
7442it [00:08, 868.39it/s] 
2021-01-15 15:12:34,843 INFO  estimate_weight_task -> sidtest3/seglberget_pen_id_66_2020-05-13_2020-06-13/biomass_computations.csv


compute_filter_task 2020-05-27-2020-06-10 -> 2557 records
Performing coarse grid search...
Start hour: 0, End hour: 13, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consiste

Start hour: 6, End hour: 13, KF cutoff: 1.2000000000000002
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 13, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 13, KF cutoff: 1.3000000000000003
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 13, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 13, KF cutoff: 1.4000000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 13, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution

Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 13, KF cutoff: 1.3000000000000003
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 13, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 13, KF cutoff: 1.4000000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 13, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consist

Start hour: 8, End hour: 13, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 0.9
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.0500000000000003
Insufficient data to

Start hour: 0.0, End hour: 23.0, KF cutoff: 1.0849999999999982
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.089999999999998
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.094999999999998
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.0999999999999979
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1049999999999978
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1099999999999977
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1149999999999975
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1199999999999974
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.124999999999

2021-01-15 15:12:43,757 INFO  compute_filter_task -> {'best_start_hr': 0.0, 'best_end_hr': 23.0, 'best_kf_cutoff': 1.094999999999998}
2021-01-15 15:12:43,758 INFO  compute_filter_task -> sidtest3/seglberget_pen_id_66_2020-05-13_2020-06-13/sampling_filter.json


Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1699999999999964
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1749999999999963
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1799999999999962
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.184999999999996
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.189999999999996
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1949999999999958
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0.0, En

2020-05-26 2020-06-01
['2020-05-26', '2020-05-27', '2020-05-28', '2020-05-29', '2020-05-30', '2020-05-31', '2020-06-01']
2020-05-26 2020-06-01
['2020-05-26', '2020-05-27', '2020-05-28', '2020-05-29', '2020-05-30', '2020-05-31', '2020-06-01']
2020-05-26 2020-06-01
['2020-05-26', '2020-05-27', '2020-05-28', '2020-05-29', '2020-05-30', '2020-05-31', '2020-06-01']
2020-05-27 2020-06-02
['2020-05-27', '2020-05-28', '2020-05-29', '2020-05-30', '2020-05-31', '2020-06-01', '2020-06-02']
2020-05-27 2020-06-02
['2020-05-27', '2020-05-28', '2020-05-29', '2020-05-30', '2020-05-31', '2020-06-01', '2020-06-02']
2020-05-27 2020-06-02
['2020-05-27', '2020-05-28', '2020-05-29', '2020-05-30', '2020-05-31', '2020-06-01', '2020-06-02']
2020-05-27 2020-06-02
['2020-05-27', '2020-05-28', '2020-05-29', '2020-05-30', '2020-05-31', '2020-06-01', '2020-06-02']
2020-05-28 2020-06-03
['2020-05-28', '2020-05-29', '2020-05-30', '2020-05-31', '2020-06-01', '2020-06-02', '2020-06-03']
2020-05-28 2020-06-03
['2020-05-

2021-01-15 15:12:44,900 INFO  generate_report_task -> /tmp/batches/sidtest3/seglberget_pen_id_66_2020-05-13_2020-06-13/report.csv


 2020-06-10
['2020-06-04', '2020-06-05', '2020-06-06', '2020-06-07', '2020-06-08', '2020-06-09', '2020-06-10']
2020-06-05 2020-06-11
['2020-06-05', '2020-06-06', '2020-06-07', '2020-06-08', '2020-06-09', '2020-06-10', '2020-06-11']
2020-06-05 2020-06-11
['2020-06-05', '2020-06-06', '2020-06-07', '2020-06-08', '2020-06-09', '2020-06-10', '2020-06-11']
2020-06-05 2020-06-11
['2020-06-05', '2020-06-06', '2020-06-07', '2020-06-08', '2020-06-09', '2020-06-10', '2020-06-11']
2020-06-05 2020-06-11
['2020-06-05', '2020-06-06', '2020-06-07', '2020-06-08', '2020-06-09', '2020-06-10', '2020-06-11']
2020-06-06 2020-06-12
['2020-06-06', '2020-06-07', '2020-06-08', '2020-06-09', '2020-06-10', '2020-06-11', '2020-06-12']
2020-06-06 2020-06-12
['2020-06-06', '2020-06-07', '2020-06-08', '2020-06-09', '2020-06-10', '2020-06-11', '2020-06-12']
2020-06-06 2020-06-12
['2020-06-06', '2020-06-07', '2020-06-08', '2020-06-09', '2020-06-10', '2020-06-11', '2020-06-12']
2020-06-06 2020-06-12
['2020-06-06', '2020

2021-01-15 15:12:46,128 INFO  Loading ground_truth_metadata_url: https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/silda_pen_id_86_2020-07-02_2020-07-19/ground_truth_metadata.json


Processing:silda_pen_id_86_2020-07-02_2020-07-19


2021-01-15 15:12:46,779 INFO  Loading https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/silda_pen_id_86_2020-07-02_2020-07-19/annotation_dataset.csv
2021-01-15 15:12:47,511 INFO  estimate_weight_task data: 14070 rows
14070it [00:14, 941.92it/s] 
2021-01-15 15:13:05,203 INFO  estimate_weight_task -> sidtest3/silda_pen_id_86_2020-07-02_2020-07-19/biomass_computations.csv


compute_filter_task 2020-07-02-2020-07-16 -> 14070 records
Performing coarse grid search...
Start hour: 0, End hour: 13, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0499999999999998
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0999999999999999
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.15
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1999999999999997
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2499999999999996
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, 

Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.15
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.1999999999999997
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.2499999999999996
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.2999999999999996
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.3499999999999996
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.3999999999999995


Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 15, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 15, KF cutoff: 1.0499999999999998
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 15, KF cutoff: 1.0999999999999999
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 15, KF cutoff: 1.15
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 15, KF cutoff: 1.1999999999999997
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 15, KF cutoff: 1.2499999999999996
Insufficient da

Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.3499999999999996
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.3999999999999995
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.4499999999999993
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 24, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 24, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 24, KF cutoff: 1.0499999999999998
Insufficient data to compute distribution consistency!
I

2021-01-15 15:13:24,582 INFO  compute_filter_task -> {'best_start_hr': 0.0, 'best_end_hr': 23.0, 'best_kf_cutoff': 1.1599999999999975}
2021-01-15 15:13:24,583 INFO  compute_filter_task -> sidtest3/silda_pen_id_86_2020-07-02_2020-07-19/sampling_filter.json


Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
2020-06-29 2020-07-05
['2020-06-29', '2020-06-30', '2020-07-01', '2020-07-02', '2020-07-03', '2020-07-04', '2020-07-05']
2020-06-29 2020-07-05
['2020-06-29', '2020-06-30', '2020-07-01', '2020-07-02', '2020-07-03', '2020-07-04', '2020-07-05']
2020-06-29 2020-07-05
['2020-06-29', '2020-06-30', '2020-07-01', '2020-07-02', '2020-07-03', '2020-07-04', '2020-07-05']
2020-06-29 2020-07-05
['2020-06-29', '2020-06-30', '2020-07-01', '2020-07-02', '2020-07-03', '2020-07-04', '2020-07-05']
2020-06-30 2020-07-06
['2020-06-30', '2020-07-01', '2020-07-02', '2020-07-03', '2020-07-04', '2020-07-05', '2020-07-06']
2020-06-30 2020-07-06
['2020-06-30', '2020-07-01', '2020-07-02', '2020-07-03', '2020-07-04', '2020-07-05', '2020-07-06']
2020-06-30 2020-07-06
['2020-06-30', '2020-07-01', '2020-07-02', '2020-07-03', '2020-07-04', '2020-07-05', '2

2021-01-15 15:13:26,837 INFO  generate_report_task -> /tmp/batches/sidtest3/silda_pen_id_86_2020-07-02_2020-07-19/report.csv


2020-07-07 2020-07-13
['2020-07-07', '2020-07-08', '2020-07-09', '2020-07-10', '2020-07-11', '2020-07-12', '2020-07-13']
2020-07-07 2020-07-13
['2020-07-07', '2020-07-08', '2020-07-09', '2020-07-10', '2020-07-11', '2020-07-12', '2020-07-13']
2020-07-07 2020-07-13
['2020-07-07', '2020-07-08', '2020-07-09', '2020-07-10', '2020-07-11', '2020-07-12', '2020-07-13']
2020-07-07 2020-07-13
['2020-07-07', '2020-07-08', '2020-07-09', '2020-07-10', '2020-07-11', '2020-07-12', '2020-07-13']
2020-07-08 2020-07-14
['2020-07-08', '2020-07-09', '2020-07-10', '2020-07-11', '2020-07-12', '2020-07-13', '2020-07-14']
2020-07-08 2020-07-14
['2020-07-08', '2020-07-09', '2020-07-10', '2020-07-11', '2020-07-12', '2020-07-13', '2020-07-14']
2020-07-08 2020-07-14
['2020-07-08', '2020-07-09', '2020-07-10', '2020-07-11', '2020-07-12', '2020-07-13', '2020-07-14']
2020-07-08 2020-07-14
['2020-07-08', '2020-07-09', '2020-07-10', '2020-07-11', '2020-07-12', '2020-07-13', '2020-07-14']
2020-07-09 2020-07-15
['2020-07-

2021-01-15 15:13:28,192 INFO  Loading ground_truth_metadata_url: https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/slapoya_pen_id_116_2020-10-18_2020-11-08/ground_truth_metadata.json


Processing:slapoya_pen_id_116_2020-10-18_2020-11-08


2021-01-15 15:13:28,927 INFO  Loading https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/slapoya_pen_id_116_2020-10-18_2020-11-08/annotation_dataset.csv
2021-01-15 15:13:29,047 INFO  estimate_weight_task data: 1580 rows
1580it [00:01, 1096.93it/s]
2021-01-15 15:13:30,955 INFO  estimate_weight_task -> sidtest3/slapoya_pen_id_116_2020-10-18_2020-11-08/biomass_computations.csv


compute_filter_task 2020-10-15-2020-10-29 -> 1580 records
Performing coarse grid search...
Start hour: 0, End hour: 13, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0499999999999998
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0999999999999999
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.15
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1999999999999997
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2499999999999996
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2999999999999996
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 1

Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.0499999999999998
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.0999999999999999
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.15
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.1999999999999997
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.2499999999999996
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.2999999999999996
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.3499999999999996
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.3999999999999995
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.44999

Start hour: 8, End hour: 15, KF cutoff: 1.0999999999999999
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.15
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.1999999999999997
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.2499999999999996
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.2999999999999996
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.3499999999999996
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.3999999999999995
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.4499999999999993
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 24, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution c

Insufficient data to compute distribution consistency!
Start hour: 5.0, End hour: 13.0, KF cutoff: 1.1299999999999981
Insufficient data to compute distribution consistency!
Start hour: 5.0, End hour: 13.0, KF cutoff: 1.134999999999998
Insufficient data to compute distribution consistency!
Start hour: 5.0, End hour: 13.0, KF cutoff: 1.139999999999998
Insufficient data to compute distribution consistency!
Start hour: 5.0, End hour: 13.0, KF cutoff: 1.1449999999999978
Insufficient data to compute distribution consistency!
Start hour: 5.0, End hour: 13.0, KF cutoff: 1.1499999999999977
Insufficient data to compute distribution consistency!
Start hour: 5.0, End hour: 13.0, KF cutoff: 1.1549999999999976
Insufficient data to compute distribution consistency!
Start hour: 5.0, End hour: 13.0, KF cutoff: 1.1599999999999975
Insufficient data to compute distribution consistency!
Start hour: 5.0, End hour: 13.0, KF cutoff: 1.1649999999999974
Insufficient data to compute distribution consistency!
Sta

Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.089999999999999
Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.0949999999999989
Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.0999999999999988
Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.1049999999999986
Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.1099999999999985
Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.1149999999999984
Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.1199999999999983
Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.1249999999999982
Insufficient data to compute distribution consistency!
St

2021-01-15 15:13:36,260 INFO  compute_filter_task -> {'best_start_hr': 5.0, 'best_end_hr': 13.0, 'best_kf_cutoff': 1.139999999999998}
2021-01-15 15:13:36,261 INFO  compute_filter_task -> sidtest3/slapoya_pen_id_116_2020-10-18_2020-11-08/sampling_filter.json


Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.1749999999999972
Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.179999999999997
Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.184999999999997
Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.1899999999999968
Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.1949999999999967
Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.1999999999999966
Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.2049999999999965
Insufficient data to compute distribution consistency!
Start hour: 6.0, End hour: 13.0, KF cutoff: 1.2099999999999964
Insufficient data to compute distribution consistency!
Sta

2021-01-15 15:13:36,722 INFO  generate_report_task -> /tmp/batches/sidtest3/slapoya_pen_id_116_2020-10-18_2020-11-08/report.csv


Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
2020-10-19 2020-10-25
['2020-10-19', '2020-10-20', '2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25']
2020-10-19 2020-10-25
['2020-10-19', '2020-10-20', '2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25']
2020-10-19 2020-10-25
['2020-10-19', '2020-10-20', '2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25']
2020-10-19 2020-10-25
['2020-10-19', '2020-10-20', '2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25']
2020-10-20 2020-10-26
['2020-10-20', '2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26']
2020-10-20 2020-10-26
['2020-10-20', '2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26']
2020-10-20 2020-10-26
['2020-10-20', '2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26']
2020-10-20 2020-10-26
['2020-10-20', '2020-

2021-01-15 15:13:37,533 INFO  Loading ground_truth_metadata_url: https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/tittelsnes_pen_id_37_2020-06-10_2020-06-24/ground_truth_metadata.json


Processing:tittelsnes_pen_id_37_2020-06-10_2020-06-24


2021-01-15 15:13:38,188 INFO  Loading https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/tittelsnes_pen_id_37_2020-06-10_2020-06-24/annotation_dataset.csv
2021-01-15 15:13:38,512 INFO  estimate_weight_task data: 6988 rows
6988it [00:08, 858.14it/s] 
2021-01-15 15:13:47,986 INFO  estimate_weight_task -> sidtest3/tittelsnes_pen_id_37_2020-06-10_2020-06-24/biomass_computations.csv


compute_filter_task 2020-06-04-2020-06-18 -> 3242 records
Performing coarse grid search...
Start hour: 0, End hour: 13, KF cutoff: 0.8500000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9000000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9500000000000002
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0000000000000002
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1000000000000003
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2000000000000004
Insufficient data to compute distribution consistenc

Start hour: 6, End hour: 15, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.1000000000000003
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.2000000000000004
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.3000000000000005
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.4000000000000006
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.4500000000000006
Insufficient data to compute 

Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.4000000000000006
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 0.8500000000000001
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 0.9000000000000001
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 0.9500000000000002
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.0000000000000002
Insufficient data to compute distribu

2021-01-15 15:13:58,548 INFO  compute_filter_task -> {'best_start_hr': 0.0, 'best_end_hr': 23.0, 'best_kf_cutoff': 1.0649999999999988}
2021-01-15 15:13:58,549 INFO  compute_filter_task -> sidtest3/tittelsnes_pen_id_37_2020-06-10_2020-06-24/sampling_filter.json


Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1699999999999966
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1749999999999965
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1799999999999964
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1849999999999963
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1899999999999962
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.194999999999996
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.199999999999996
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
2020-06-07 2020-06-13
['2020-06-07', '2020-06-08', '2020-06-09', '

2021-01-15 15:13:59,302 INFO  generate_report_task -> /tmp/batches/sidtest3/tittelsnes_pen_id_37_2020-06-10_2020-06-24/report.csv


2020-06-15 2020-06-21
['2020-06-15', '2020-06-16', '2020-06-17', '2020-06-18', '2020-06-19', '2020-06-20', '2020-06-21']
2020-06-15 2020-06-21
['2020-06-15', '2020-06-16', '2020-06-17', '2020-06-18', '2020-06-19', '2020-06-20', '2020-06-21']
2020-06-15 2020-06-21
['2020-06-15', '2020-06-16', '2020-06-17', '2020-06-18', '2020-06-19', '2020-06-20', '2020-06-21']
2020-06-15 2020-06-21
['2020-06-15', '2020-06-16', '2020-06-17', '2020-06-18', '2020-06-19', '2020-06-20', '2020-06-21']
2020-06-16 2020-06-22
['2020-06-16', '2020-06-17', '2020-06-18', '2020-06-19', '2020-06-20', '2020-06-21', '2020-06-22']
2020-06-16 2020-06-22
['2020-06-16', '2020-06-17', '2020-06-18', '2020-06-19', '2020-06-20', '2020-06-21', '2020-06-22']
2020-06-16 2020-06-22
['2020-06-16', '2020-06-17', '2020-06-18', '2020-06-19', '2020-06-20', '2020-06-21', '2020-06-22']
2020-06-16 2020-06-22
['2020-06-16', '2020-06-17', '2020-06-18', '2020-06-19', '2020-06-20', '2020-06-21', '2020-06-22']
2020-06-17 2020-06-23
['2020-06-

2021-01-15 15:14:00,419 INFO  Loading ground_truth_metadata_url: https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/varholmen_pen_id_131_2020-08-15_2020-08-30/ground_truth_metadata.json


Processing:varholmen_pen_id_131_2020-08-15_2020-08-30


2021-01-15 15:14:01,075 INFO  Loading https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/varholmen_pen_id_131_2020-08-15_2020-08-30/annotation_dataset.csv
2021-01-15 15:14:02,208 INFO  estimate_weight_task data: 21411 rows
21411it [00:22, 946.81it/s] 
2021-01-15 15:14:29,146 INFO  estimate_weight_task -> sidtest3/varholmen_pen_id_131_2020-08-15_2020-08-30/biomass_computations.csv


compute_filter_task 2020-08-12-2020-08-26 -> 16274 records
Performing coarse grid search...
Start hour: 0, End hour: 13, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0499999999999998
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0999999999999999
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.15
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1999999999999997
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2499999999999996
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2999999999999996
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 

Start hour: 6, End hour: 15, KF cutoff: 1.2999999999999996
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.3499999999999996
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.3999999999999995
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.4499999999999993
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.0499999999999998
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.0999999999999999
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 24, KF cutoff: 1.15
Insufficient data to compute distribution consistency!
Sta

Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.1999999999999997
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.2499999999999996
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.2999999999999996
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.3499999999999996
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.3999999999999995
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 14, KF cutoff: 1.4499999999999993
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 0.9500000000000001
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.0
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 15, KF cutoff: 1.049999

2021-01-15 15:14:55,604 INFO  compute_filter_task -> {'best_start_hr': 0.0, 'best_end_hr': 23.0, 'best_kf_cutoff': 1.1449999999999978}
2021-01-15 15:14:55,605 INFO  compute_filter_task -> sidtest3/varholmen_pen_id_131_2020-08-15_2020-08-30/sampling_filter.json


Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
2020-08-12 2020-08-18
['2020-08-12', '2020-08-13', '2020-08-14', '2020-08-15', '2020-08-16', '2020-08-17', '2020-08-18']
2020-08-12 2020-08-18
['2020-08-12', '2020-08-13', '2020-08-14', '2020-08-15', '2020-08-16', '2020-08-17', '2020-08-18']
2020-08-12 2020-08-18
['2020-08-12', '2020-08-13', '2020-08-14', '2020-08-15', '2020-08-16', '2020-08-17', '2020-08-18']
2020-08-12 2020-08-18
['2020-08-12', '2020-08-13', '2020-08-14', '2020-08-15', '2020-08-16', '2020-08-17', '2020-08-18']
2020-08-13 2020-08-19
['2020-08-13', '2020-08-14', '2020-08-15', '2020-08-16', '2020-08-17', '2020-08-18', '2020-08-19']
2020-08-13 2020-08-19
['2020-08-13', '2020-08-14', '2020-08-15', '2020-08-16', '2020-08-17', '2020-08-18', '2020-08-19']
2020-08-13 2020-08-19
['2020-08-13', '2020-08-14', '2020-08-15', '2020-08-16', '2020-08-17', '2020-08-18', '2020-08-19']
2020-08-13 2020-08-19
['2020-08-13', '2020-

2021-01-15 15:14:58,576 INFO  generate_report_task -> /tmp/batches/sidtest3/varholmen_pen_id_131_2020-08-15_2020-08-30/report.csv
2021-01-15 15:15:00,057 INFO  Loading ground_truth_metadata_url: https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/varholmen_pen_id_151_2020-10-02_2020-10-17/ground_truth_metadata.json


Processing:varholmen_pen_id_151_2020-10-02_2020-10-17


2021-01-15 15:15:00,730 INFO  Loading https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/varholmen_pen_id_151_2020-10-02_2020-10-17/annotation_dataset.csv
2021-01-15 15:15:01,406 INFO  estimate_weight_task data: 13475 rows
13475it [00:15, 872.24it/s] 
2021-01-15 15:15:19,594 INFO  estimate_weight_task -> sidtest3/varholmen_pen_id_151_2020-10-02_2020-10-17/biomass_computations.csv


compute_filter_task 2020-09-29-2020-10-13 -> 12075 records
Performing coarse grid search...
Start hour: 0, End hour: 13, KF cutoff: 0.8500000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9000000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9500000000000002
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0000000000000002
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1000000000000003
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2000000000000004
Insufficient data to compute distribution consisten

Start hour: 6, End hour: 14, KF cutoff: 1.1000000000000003
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.2000000000000004
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.3000000000000005
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.4000000000000006
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF c

Start hour: 7, End hour: 15, KF cutoff: 1.4000000000000006
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 15, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 24, KF cutoff: 0.8500000000000001
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 24, KF cutoff: 0.9000000000000001
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 24, KF cutoff: 0.9500000000000002
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 24, KF cutoff: 1.0000000000000002
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 24, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 24, KF cutoff: 1.1000000000000003
Insufficient data to compute dist

Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.06
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.065
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.0699999999999998
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.0749999999999997
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.0799999999999996
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.0849999999999995
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.0899999999999994
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.0949999999999993
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 2

2021-01-15 15:15:37,998 INFO  compute_filter_task -> {'best_start_hr': 0.0, 'best_end_hr': 23.0, 'best_kf_cutoff': 1.159999999999998}
2021-01-15 15:15:37,999 INFO  compute_filter_task -> sidtest3/varholmen_pen_id_151_2020-10-02_2020-10-17/sampling_filter.json


Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
2020-09-29 2020-10-05
['2020-09-29', '2020-09-30', '2020-10-01', '2020-10-02', '2020-10-03', '2020-10-04', '2020-10-05']
2020-09-29 2020-10-05
['2020-09-29', '2020-09-30', '2020-10-01', '2020-10-02', '2020-10-03', '2020-10-04', '2020-10-05']
2020-09-29 2020-10-05
['2020-09-29', '2020-09-30', '2020-10-01', '2020-10-02', '2020-10-03', '2020-10-04', '2020-10-05']
2020-09-29 2020-10-05
['2020-09-29', '2020-09-30', '2020-10-01', '2020-10-02', '2020-10-03', '2020-10-04', '2020-10-05']
2020-09-30 2020-10-06
['2020-09-30', '2020-10-01', '2020-10-02', '2020-10-03', '2020-10-04', '2020-10-05', '2020-10-06']
2020-09-30 2020-10-06
['2020-09-30', '2020-10-01', '2020-10-02', '2020-10-03', '2020-10-04', '2020-10-05', '2020-10-06']
2020-09-30 2020-10-06
['2020-09-30', '2020-10-01', '2020-10-02', '2020-10-03', '2020-10-04', '2020-10-05', '2020-10-06']
2020-09-30 2020-10-06
['2020-09-30', '2020-

2021-01-15 15:15:40,701 INFO  generate_report_task -> /tmp/batches/sidtest3/varholmen_pen_id_151_2020-10-02_2020-10-17/report.csv


2020-10-10 2020-10-16
['2020-10-10', '2020-10-11', '2020-10-12', '2020-10-13', '2020-10-14', '2020-10-15', '2020-10-16']
2020-10-10 2020-10-16
['2020-10-10', '2020-10-11', '2020-10-12', '2020-10-13', '2020-10-14', '2020-10-15', '2020-10-16']
2020-10-10 2020-10-16
['2020-10-10', '2020-10-11', '2020-10-12', '2020-10-13', '2020-10-14', '2020-10-15', '2020-10-16']
2020-10-10 2020-10-16
['2020-10-10', '2020-10-11', '2020-10-12', '2020-10-13', '2020-10-14', '2020-10-15', '2020-10-16']
2020-10-11 2020-10-17
['2020-10-11', '2020-10-12', '2020-10-13', '2020-10-14', '2020-10-15', '2020-10-16', '2020-10-17']
2020-10-11 2020-10-17
['2020-10-11', '2020-10-12', '2020-10-13', '2020-10-14', '2020-10-15', '2020-10-16', '2020-10-17']
2020-10-11 2020-10-17
['2020-10-11', '2020-10-12', '2020-10-13', '2020-10-14', '2020-10-15', '2020-10-16', '2020-10-17']
2020-10-11 2020-10-17
['2020-10-11', '2020-10-12', '2020-10-13', '2020-10-14', '2020-10-15', '2020-10-16', '2020-10-17']
2020-10-12 2020-10-18
['2020-10-

2021-01-15 15:15:42,148 INFO  Loading ground_truth_metadata_url: https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/varholmen_pen_id_186_2020-10-18_2020-11-02/ground_truth_metadata.json


Processing:varholmen_pen_id_186_2020-10-18_2020-11-02


2021-01-15 15:15:42,799 INFO  Loading https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/varholmen_pen_id_186_2020-10-18_2020-11-02/annotation_dataset.csv
2021-01-15 15:15:43,401 INFO  estimate_weight_task data: 13343 rows
13343it [00:13, 984.94it/s] 
2021-01-15 15:15:59,819 INFO  estimate_weight_task -> sidtest3/varholmen_pen_id_186_2020-10-18_2020-11-02/biomass_computations.csv


compute_filter_task 2020-10-15-2020-10-29 -> 13343 records
Performing coarse grid search...
Start hour: 0, End hour: 13, KF cutoff: 0.8
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.8500000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9000000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9500000000000002
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0000000000000002
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1000000000000003
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour:

Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 0.9000000000000001
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 0.9500000000000002
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.0000000000000002
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.1000000000000003
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution

Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 13, KF cutoff: 1.3000000000000005
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 13, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 13, KF cutoff: 1.4000000000000006
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 13, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 14, KF cutoff: 0.8
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 14, KF cutoff: 0.8500000000000001
Insufficient data to compute distribution consistency!
I

Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.2000000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.3000000000000005
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 1.4000000000000006
Insufficient data to compute distribution

Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.165
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.17
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1749999999999998
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1799999999999997
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1849999999999996
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1899999999999995
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1949999999999994
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1999999999999993
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 2

2021-01-15 15:16:22,038 INFO  compute_filter_task -> {'best_start_hr': 0.0, 'best_end_hr': 23.0, 'best_kf_cutoff': 1.2449999999999983}
2021-01-15 15:16:22,039 INFO  compute_filter_task -> sidtest3/varholmen_pen_id_186_2020-10-18_2020-11-02/sampling_filter.json


Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.349999999999996
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
2020-10-17 2020-10-23
['2020-10-17', '2020-10-18', '2020-10-19', '2020-10-20', '2020-10-21', '2020-10-22', '2020-10-23']
2020-10-17 2020-10-23
['2020-10-17', '2020-10-18', '2020-10-19', '2020-10-20', '2020-10-21', '2020-10-22', '2020-10-23']
2020-10-17 2020-10-23
['2020-10-17', '2020-10-18', '2020-10-19', '2020-10-20', '2020-10-21', '2020-10-22', '2020-10-23']
2020-10-17 2020-10-23
['2020-10-17', '2020-10-18', '2020-10-19', '2020-10-20', '2020-10-21', '2020-10-22', '2020-10-23']
2020-10-18 2020-10-24
['2020-10-18', '2020-10-19', '2020-10-20', '2020-10-21', '2020-10-22', '2020-10-23', '2020-10-24']
2020-10-18 2020-10-24
['2020-10-18', '2020-10-19', '2020-10-20', '2020-10-21', '2020-10-22', '2020-10-23', '2020-

2021-01-15 15:16:24,816 INFO  generate_report_task -> /tmp/batches/sidtest3/varholmen_pen_id_186_2020-10-18_2020-11-02/report.csv


2020-10-22 2020-10-28
['2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28']
2020-10-22 2020-10-28
['2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28']
2020-10-22 2020-10-28
['2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28']
2020-10-22 2020-10-28
['2020-10-22', '2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28']
2020-10-23 2020-10-29
['2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28', '2020-10-29']
2020-10-23 2020-10-29
['2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28', '2020-10-29']
2020-10-23 2020-10-29
['2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28', '2020-10-29']
2020-10-23 2020-10-29
['2020-10-23', '2020-10-24', '2020-10-25', '2020-10-26', '2020-10-27', '2020-10-28', '2020-10-29']
2020-10-24 2020-10-30
['2020-10-

2021-01-15 15:16:26,115 INFO  Loading ground_truth_metadata_url: https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/vikane_pen_id_60_2020-08-10_2020-08-30/ground_truth_metadata.json


Processing:vikane_pen_id_60_2020-08-10_2020-08-30


2021-01-15 15:16:26,804 INFO  Loading https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets/vikane_pen_id_60_2020-08-10_2020-08-30/annotation_dataset.csv
2021-01-15 15:16:27,896 INFO  estimate_weight_task data: 25274 rows
25274it [00:26, 959.89it/s] 
2021-01-15 15:16:59,459 INFO  estimate_weight_task -> sidtest3/vikane_pen_id_60_2020-08-10_2020-08-30/biomass_computations.csv


compute_filter_task 2020-08-12-2020-08-26 -> 22123 records
Performing coarse grid search...
Start hour: 0, End hour: 13, KF cutoff: 0.8500000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9000000000000001
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 0.9500000000000002
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0000000000000002
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1000000000000003
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 0, End hour: 13, KF cutoff: 1.2000000000000004
Insufficient data to compute distribution consisten

Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.4000000000000006
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 14, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 0.8500000000000001
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 0.9000000000000001
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 0.9500000000000002
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.0000000000000002
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF cutoff: 1.0500000000000003
Insufficient data to compute distribution consistency!
Start hour: 6, End hour: 15, KF c

Start hour: 7, End hour: 24, KF cutoff: 1.1000000000000003
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 24, KF cutoff: 1.1500000000000004
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 24, KF cutoff: 1.2000000000000004
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 24, KF cutoff: 1.2500000000000004
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 24, KF cutoff: 1.3000000000000005
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 24, KF cutoff: 1.3500000000000005
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 24, KF cutoff: 1.4000000000000006
Insufficient data to compute distribution consistency!
Start hour: 7, End hour: 24, KF cutoff: 1.4500000000000006
Insufficient data to compute distribution consistency!
Start hour: 8, End hour: 13, KF cutoff: 0.8500000000000001
Insufficient data to compute 

Start hour: 0.0, End hour: 23.0, KF cutoff: 1.109999999999999
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1149999999999989
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1199999999999988
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1249999999999987
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1299999999999986
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1349999999999985
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1399999999999983
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.1449999999999982
Insufficient data to compute distribution consistency!
Start hour: 0.0, End hour: 23.0, KF cutoff: 1.14999999999

2021-01-15 15:17:32,715 INFO  compute_filter_task -> {'best_start_hr': 0.0, 'best_end_hr': 23.0, 'best_kf_cutoff': 1.109999999999999}
2021-01-15 15:17:32,716 INFO  compute_filter_task -> sidtest3/vikane_pen_id_60_2020-08-10_2020-08-30/sampling_filter.json


Start hour: 0.0, End hour: 23.0, KF cutoff: 1.249999999999996
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
Insufficient data to compute distribution consistency!
2020-08-07 2020-08-13
['2020-08-07', '2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13']
2020-08-07 2020-08-13
['2020-08-07', '2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13']
2020-08-07 2020-08-13
['2020-08-07', '2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13']
2020-08-07 2020-08-13
['2020-08-07', '2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13']
2020-08-08 2020-08-14
['2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13', '2020-08-14']
2020-08-08 2020-08-14
['2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13', '2020-08-14']
2020-08-08 2020-08-14
['2020-08-08', '2020-08-0

2021-01-15 15:17:34,965 INFO  generate_report_task -> /tmp/batches/sidtest3/vikane_pen_id_60_2020-08-10_2020-08-30/report.csv


2020-08-22 2020-08-28
['2020-08-22', '2020-08-23', '2020-08-24', '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28']
2020-08-22 2020-08-28
['2020-08-22', '2020-08-23', '2020-08-24', '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28']
2020-08-23 2020-08-29
['2020-08-23', '2020-08-24', '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29']
2020-08-23 2020-08-29
['2020-08-23', '2020-08-24', '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29']
2020-08-23 2020-08-29
['2020-08-23', '2020-08-24', '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29']
2020-08-23 2020-08-29
['2020-08-23', '2020-08-24', '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29']
2020-08-24 2020-08-30
['2020-08-24', '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29', '2020-08-30']
2020-08-24 2020-08-30
['2020-08-24', '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28', '2020-08-29', '2020-08-30']
2020-08-24 2020-08-30
['2020-08-

In [4]:
import pandas as pd
from pprint import pprint
import numpy as np

DEFAULT_LOSS_FACTOR = 0.17

def backtest_accuracy_data(cohort_names, other_loss_factors=list(np.arange(0.13, 0.19, 0.01))):
    out_rows = []
    for i, cohort_name in enumerate(cohort_names):
        report_path = f'/tmp/batches/{BATCH_NAME}/{cohort_name}/report.csv'
        if os.path.exists(report_path):
            report = pd.read_csv(report_path)
            gt_meta = json.load(open(f'/tmp/batches/{BATCH_NAME}/{cohort_name}/ground_truth_metadata.json'))
            rows = report[report['date'] >= gt_meta['last_feeding_date']]
            
            # Must have data on or day after feeding date
            if len(rows) > 0:
                
                if len(rows)>1:
                    chosen_row = rows.iloc[1]
                else:
                    chosen_row = rows.iloc[0]

                smart_avg = chosen_row['smart_average_weight']

                out = dict()
                out['cohort'] = cohort_name
                if gt_meta['expected_loss_factor'] is None:
                    gt_meta['expected_loss_factor'] = 0.13
                if gt_meta['expected_loss_factor'] > 1:
                    gt_meta['expected_loss_factor'] /= 100

                pred = smart_avg * (1 - gt_meta['expected_loss_factor'])
                

                out[f'error pct'] = (pred - gt_meta['gutted_average_weight']) / gt_meta['gutted_average_weight']
                out['smart_pred'] = pred
                out['gutted_weight'] = gt_meta['gutted_average_weight']
                out['expected_loss_factor'] = gt_meta['expected_loss_factor']


                if other_loss_factors is not None:
                    for loss_factor in other_loss_factors:
                        lf = round(loss_factor, 2)
                        pred = smart_avg * (1 - lf)
                        out[f'loss_factor:{lf} error pct'] = (pred - gt_meta['gutted_average_weight']) / gt_meta['gutted_average_weight']

                for k in ['kpi', 'raw_sample_size', 'smart_sample_size']:
                    out[k] = chosen_row[k]
                out_rows.append(out)
    out_df = pd.DataFrame(out_rows)
    return out_df
        
backtest_accuracy_data(cohort_names)

Unnamed: 0,cohort,error pct,smart_pred,gutted_weight,expected_loss_factor,loss_factor:0.13 error pct,loss_factor:0.14 error pct,loss_factor:0.15 error pct,loss_factor:0.16 error pct,loss_factor:0.17 error pct,loss_factor:0.18 error pct,kpi,raw_sample_size,smart_sample_size
0,aplavika_pen_id_95_2020-07-10_2020-07-26,-0.033881,4781.320789,4949,0.175,0.018816,0.007105,-0.004605,-0.016316,-0.028026,-0.039737,1.638737,1477,7181
1,bolaks_pen_id_88_2020-02-28_2020-03-10,-0.01667,4503.652832,4580,0.165,0.024548,0.012771,0.000995,-0.010781,-0.022558,-0.034334,1.523585,1090,7230
2,djubawik_pen_id_153_2020-11-10_2020-11-26,-0.091117,5171.545071,5690,0.16,-0.058657,-0.069477,-0.080297,-0.091117,-0.101937,-0.112757,-0.128455,43,563
3,eldviktaren_pen_id_164_2020-09-21_2020-10-08,0.050012,3533.291362,3365,0.16,0.087513,0.075013,0.062512,0.050012,0.037512,0.025012,1.423111,917,7690
4,habranden_pen_id_100_2020-08-10_2020-08-31,0.001059,4512.774678,4508,0.175,0.055662,0.043528,0.031394,0.01926,0.007126,-0.005008,1.71497,1775,11756
5,kjeppevikholmen_pen_id_5_2019-06-18_2019-07-02,-0.030473,3836.420142,3957,0.17,0.016252,0.004571,-0.00711,-0.018791,-0.030473,-0.042154,1.355377,934,3430
6,langoy_pen_id_108_2020-05-07_2020-05-17,0.025346,4737.100788,4620,0.165,0.068325,0.056045,0.043766,0.031486,0.019207,0.006927,1.079511,318,2232
7,leivsethamran_pen_id_165_2020-10-18_2020-11-13,-0.061562,5808.932655,6190,0.13,-0.061562,-0.072348,-0.083135,-0.093922,-0.104708,-0.115495,-9.360978,3,3246
8,movikodden_pen_id_114_2020-11-03_2020-11-25,-0.045623,3741.159216,3920,0.13,-0.045623,-0.056593,-0.067562,-0.078532,-0.089502,-0.100472,1.473303,1001,6036
9,movikodden_pen_id_167_2020-10-13_2020-10-30,-0.091387,3907.03711,4300,0.18,-0.035983,-0.047064,-0.058145,-0.069225,-0.080306,-0.091387,1.128928,361,2255


# GTSF Results

In [9]:
train_test_dataset = pd.read_csv('/root/data/sid/traintest_weight_dataset.csv')

In [10]:
estimates = estimate_weight_task(train_test_dataset)
estimates

2021-01-15 15:38:08,838 INFO  estimate_weight_task data: 180746 rows
180746it [03:10, 949.76it/s] 


Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,annotation,fish_id,weight,kf,camera_metadata,akpd_score,train_mask,val_mask,test_mask,estimated_weight_g,estimated_length_mm,estimated_k_factor
0,0,0,"{'leftCrop': [{'keypointType': 'ADIPOSE_FIN', ...",190607010048_bolaks-mjanes,4976,1.327622,"{'baseline': 0.12693501988129197, 'focalLength...",0.991304,True,False,False,4945.976138,728.712165,1.278156
1,1,1,"{'leftCrop': [{'keypointType': 'ADIPOSE_FIN', ...",190607010048_bolaks-mjanes,4976,1.327622,"{'baseline': 0.12693501988129197, 'focalLength...",0.953481,True,False,False,5319.556594,741.299781,1.305852
2,2,2,"{'leftCrop': [{'keypointType': 'ADIPOSE_FIN', ...",190607010048_bolaks-mjanes,4976,1.327622,"{'baseline': 0.12693501988129197, 'focalLength...",0.986294,True,False,False,5154.142976,730.146641,1.324116
3,3,3,"{'leftCrop': [{'keypointType': 'ADIPOSE_FIN', ...",190607010048_bolaks-mjanes,4976,1.327622,"{'baseline': 0.12693501988129197, 'focalLength...",0.981766,True,False,False,4936.802387,722.975654,1.306396
4,4,4,"{'leftCrop': [{'keypointType': 'ADIPOSE_FIN', ...",190607010048_bolaks-mjanes,4976,1.327622,"{'baseline': 0.12693501988129197, 'focalLength...",0.987670,True,False,False,5293.039083,738.259934,1.315459
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180741,180741,180741,"{'leftCrop': [{'keypointType': 'ADIPOSE_FIN', ...",200623-4d371b46-73d2-4bbe-bd3e-e11c39192cf3,855,1.249672,"{'baseline': 0.1273289997783822, 'focalLength'...",0.973298,True,False,False,888.889208,436.169312,1.071229
180742,180742,180742,"{'leftCrop': [{'keypointType': 'ADIPOSE_FIN', ...",200623-4d371b46-73d2-4bbe-bd3e-e11c39192cf3,855,1.249672,"{'baseline': 0.1273289997783822, 'focalLength'...",0.988657,True,False,False,779.825225,422.730826,1.032299
180743,180743,180743,"{'leftCrop': [{'keypointType': 'ADIPOSE_FIN', ...",200623-4d371b46-73d2-4bbe-bd3e-e11c39192cf3,855,1.249672,"{'baseline': 0.1273289997783822, 'focalLength'...",0.980260,True,False,False,942.713395,446.548096,1.058705
180744,180744,180744,"{'leftCrop': [{'keypointType': 'ADIPOSE_FIN', ...",200623-4d371b46-73d2-4bbe-bd3e-e11c39192cf3,855,1.249672,"{'baseline': 0.1273289997783822, 'focalLength'...",0.987468,True,False,False,862.712413,430.675665,1.079978


In [19]:
train = estimates[estimates['train_mask']]
val = estimates[estimates['val_mask']]

def accuracy(df):
    y_train_pred = df['estimated_weight_g']
    y_train = df['weight']
    stats = {
        'mean_absolute_error_pct': 100 * np.mean(np.abs((y_train_pred - y_train) / y_train)),
        'mean_error_pct': 100 * np.mean(y_train_pred - y_train) / np.mean(y_train)
    }
    return stats

res = {
    'train':accuracy(train),
    'val': accuracy(val)
}
pd.DataFrame(res)

Unnamed: 0,train,val
mean_absolute_error_pct,6.336687,4.213553
mean_error_pct,-0.476093,-1.704208
