## 1. Setup

In [1]:
import sys
sys.path.append('..')

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import os
import skimage.io
import warnings

In [3]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

warnings.filterwarnings('ignore')

## 2. Datasets Stats

In [4]:
for dataset_name in ['vgg_cells', 'carpk', 'shanghai_tech/part_b']:
    dataset_path = f'../datasets/{dataset_name}'
    
    print()
    print(f'-----{dataset_name}-----')
    print(dataset_path)
    
    counts = {}
    for split_name in ['train', 'val', 'test']:
        split_path = f'{dataset_path}/{split_name}'
        dots_path = f'{split_path}/gt_dots'
        
        dot_img_names = []
        if os.path.exists(dots_path):
            dot_img_names = sorted(os.listdir(dots_path))
        
        counts[split_name] = []
        for img_name in dot_img_names:
            dot_img = skimage.io.imread(f'{dots_path}/{img_name}') > 0
            count = dot_img.sum()
            counts[split_name].append(count)
            
    
    print(f'Train size: {len(counts["train"]) + len(counts["val"])} images')
    print(f'Test size: {len(counts["test"])} images')
    
    counts_all = np.array(counts['train'] + counts['val'] + counts['test'])      
    print('->Objects per image:')
    print(f'Min: {counts_all.min()}')
    print(f'Max: {counts_all.max()}')
    print(f'Avg: {counts_all.mean():.1f}')
    print(f'Total: {counts_all.sum()}')


-----vgg_cells-----
../datasets/vgg_cells
Train size: 100 images
Test size: 100 images
->Objects per image:
Min: 74
Max: 317
Avg: 176.0
Total: 35192

-----carpk-----
../datasets/carpk
Train size: 989 images
Test size: 459 images
->Objects per image:
Min: 1
Max: 188
Avg: 62.0
Total: 89772

-----shanghai_tech/part_b-----
../datasets/shanghai_tech/part_b
Train size: 400 images
Test size: 316 images
->Objects per image:
Min: 9
Max: 576
Avg: 123.2
Total: 88199
