In [1]:
# Explanation (see return_splits() method in dataset_generic.py): Without "dtype=slide_data['slide_id'].dtype" below, read_csv() will convert all-number columns to a numerical type. Even if we convert numerical columns back to objects later, we may lose zero-padding in the process; the columns must be correctly read in from the get-go. When we compare the individual train/val/test columns to slide_data['slide_id'] in the check_equality_in_number_of_slides() function below, we cannot compare objects (strings) to numbers or even to incorrectly zero-padded objects/strings.

import pandas as pd

def check_equality_in_number_of_slides(all_splits, slide_data):
    for split_key in ['train', 'val', 'test']:
        split = all_splits[split_key]
        split = split.dropna().reset_index(drop=True)
        mask = slide_data['slide_id'].isin(split.tolist())
        print('For the {} split, does the number of True values in the mask equal the number of slides in the split? {}'.format(split_key, mask.sum(axis=0) == len(split)))

slide_data = pd.read_csv('data_labels.csv')

# Fails for the "val" split
all_splits = pd.read_csv('splits_0.csv')
check_equality_in_number_of_slides(all_splits, slide_data)

# Works for all three splits
all_splits = pd.read_csv('splits_0.csv', dtype=slide_data['slide_id'].dtype)
check_equality_in_number_of_slides(all_splits, slide_data)

For the train split, does the number of True values in the mask equal the number of slides in the split? True
For the val split, does the number of True values in the mask equal the number of slides in the split? False
For the test split, does the number of True values in the mask equal the number of slides in the split? True
For the train split, does the number of True values in the mask equal the number of slides in the split? True
For the val split, does the number of True values in the mask equal the number of slides in the split? True
For the test split, does the number of True values in the mask equal the number of slides in the split? True
