Project DeepHealth, UC5 "Deep Image Annotation"

Franco Alberto Cardillo, ILC-CNR (UNITO) 

<francoalberto.cardillo@ilc.cnr.it>

<font color="yellow">Use this notebook to set up experiments for the EDDL pipellines</font>:
- select auto or MeSH terms
- apply a threshold on minium term frequency to balance the dataset (see notebook in preproc "SCUMBLE_THRESHOLDS")
- split in training-validation-test
- prepare yaml dataset for the ECVL dataloader

Notice: the same split is used for training both the convolutional and recurrent modules.


In [3]:
%load_ext autoreload
%autoreload 2

from bs4 import BeautifulSoup
from collections import defaultdict
import numpy as np
import os
import numpy as np
from numpy import count_nonzero as nnz
import pandas as pd
import pickle
from posixpath import join
from tqdm.notebook import tqdm
import yaml
from utils.vocabulary import Vocabulary
from sklearn.model_selection import train_test_split
from utils.text_collation import collate_fn_one_s, collate_fn_n_sents


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
# aux functions

def apply_threshold(image_ds, min_freq):
    counts = image_ds.sum(axis=0)
    iii = counts >= min_freq
    print(f"{nnz(iii)} labels have at least {min_freq} freq")

    keep_labels = image_ds.columns[iii]
    drop_labels = image_ds.columns[~iii]
    print(f"removing {nnz(~iii)} labels")
    image_ds["misc"] = 0
    misc_iii = image_ds[drop_labels].sum(axis=1) > 0
    image_ds.loc[misc_iii, "misc"] = 1
    image_ds = image_ds.drop(columns=drop_labels)  # , inplace=True)
    return image_ds

def save_label_indexes(df, out_fld):
    cols = df.columns
    lab2idx = {}
    idx2lab = {}
    for i, c in enumerate(cols):
        lab2idx[c] = i
        idx2lab[i] = c
        print(f"{i}) {c}")
    with open(join(out_fld, "label2idx.yaml"), "w") as fout:
        yaml.safe_dump(lab2idx, fout)
    print(f"saved {join(out_fld, 'label2idx.yaml')}")
    print(f"lab2idx with {len(lab2idx)} labels")
    with open(join(out_fld, "idx2label.yaml"), "w") as fout:
        yaml.safe_dump(idx2lab, fout)
    print(f"saved {join(out_fld, 'idx2label.yaml')}")


def encode_text(sentences, vocab):
    word_indexes = []
    for sent in sentences.split("."):
        tokens = sent.strip().split()
        enc_sent = []
        for t in tokens:
            enc_sent.append(vocab.word2idx.get(t, Vocabulary.OOV))
        word_indexes.append(enc_sent)

    return word_indexes


def build_img_text_ds(ds, image_fld):
    rep_ids = []
    image_filenames = []
    texts = []
    for row in ds.reset_index().itertuples():
        for fn in row.image_filename:
            rep_ids.append(row.id)
            image_filenames.append(join(image_fld, fn))
            texts.append(row.text)
    img_text_ds = pd.DataFrame()
    img_text_ds["id"] = rep_ids
    img_text_ds["image_filename"] = image_filenames
    img_text_ds["text"] = texts
    pd.columns = ["id", "image_filename", "text"]
    # display(img_text_ds.head())

    enc_text = img_text_ds.text.apply(encode_text, args=(vocab,))
    img_text_ds["enc_text"] = enc_text
    return img_text_ds

def to_ecvl_dataset(img_ds, filenames, image_fld, train_ids, valid_ids, test_ids, name="na", description="na"):
    labels = list(range(img_ds.shape[1]))
    print("N CLASSES:", len(labels))

    d = {
        "name"        : name,
        "description" : description,
        "classes"     : labels, 
        "images"      : [],
        "split"       : dict(training = [int(id) for id in train_ids], 
                            validation = [int(id) for id in valid_ids], 
                            test=[int(id) for id in test_ids])
    }
    imgs = []
    for fn in filenames:
        classes = []
        values = img_ds.loc[fn]
        for class_idx, v in enumerate(values):
            if v == 1:
                classes.append(class_idx)
        # print(f"{fn}:", classes)
        imgs.append({
            "location": join(image_fld, fn),
            "label": classes
        })
    d["images"] = imgs
    return d
        
def to_ecvl_dataset2(img_ds, image_fld, out_fld, train_ids, valid_ids, test_ids):
    for idx, row in img_ds.iterrows():
        classes = []
        for class_idx, value in enumerate(row):
            if value == 1:
                classes.append(class_idx)
        print(f"{idx}:", classes)

def build_ecvl_dataset(img_ds, exp_fld, img_fld, n_bootstraps, train_p, test_p, seed=1):
    n_images = img_ds.shape[0]
    img_ds2 = img_ds.reset_index()
    filenames = np.array(img_ds2.filename.tolist())
    normal_col = np.array(img_ds2.normal.tolist())
    valid_p = 1 - train_p - test_p
    print(f"expected |train| = {int(train_p * n_images)}")
    print(f"expected |valid| = {int(valid_p * n_images)}")
    print(f"expected |test| = {int(test_p * n_images)}")
    for i in range(n_bootstraps):
        out_fld = join(exp_fld, f"run_{i}")
        os.makedirs(out_fld, exist_ok=True)

        x_train, x_test = train_test_split(
            range(n_images), stratify=normal_col, 
            train_size=train_p, test_size=test_p, 
            random_state=seed + i, shuffle=True)
        x_valid = [idx for idx in range(n_images) if (idx not in x_train) and (idx not in x_test)]
        print(f"actual |train| = {len(x_train)}")
        print(f"actual |valid| = {len(x_valid)}")
        print(f"actual |test| = {len(x_test)}")
        train_ids = filenames[x_train]
        valid_ids = filenames[x_valid]
        test_ids = filenames[x_test]
        # TODO: save in run not exp
        for ids, fn in zip([train_ids, valid_ids, test_ids], ["train_ids.txt", "valid_ids.txt", "test_ids.txt"]):
            with open(join(out_fld, fn), "w") as fout:
                fout.write("\n".join(ids))
            print(f"saved {join(out_fld, fn)}: {len(ids)} images")

        ecvl_ds = to_ecvl_dataset(img_ds, filenames, img_fld, x_train, x_valid, x_test)
        with open( join(out_fld, "ecvl_ds.yml"), "w") as fout:
            yaml.safe_dump(ecvl_ds, fout, default_flow_style=None)
        print(f"saved {join(out_fld, 'ecvl_ds.yml')}")


In [None]:
# ----------------------------------------

# mesh term, min frequecy 70, balanced on normal

exp_fld = "/mnt/datasets/uc5/EXPS/eddl/mesh_70th"
os.makedirs(exp_fld, exist_ok=True)
print("** EXP FLD:", exp_fld)

ds_home_fld = "/mnt/datasets/uc5/std-dataset"
img_fld = join(ds_home_fld, "image")
meta_fld = "/mnt/datasets/uc5/meta/eddl/iuchest"

LABELS = "mesh"  # "mesh" or "auto"
min_freq = 70 if LABELS == "mesh" else 40
img_ds_fn = "img_dataset.pkl" if LABELS == "mesh" else "img_dataset_auto.pkl"
in_vocab_fn = "all_vocab_1000.pkl"

# ----------------------------------------
print(f"Using terms {LABELS}, min frequency: {min_freq}")

img_ds = pd.read_pickle(join(meta_fld, img_ds_fn))
print("image dataset read, shape:", img_ds.shape)

img_ds = apply_threshold(img_ds, min_freq)
print("thresholded image dataset, shape:", img_ds.shape)
print("IMG_DS")
display(img_ds.head())

# save img_ds
img_ds.to_pickle(join(exp_fld, "img_dataset.pkl"))
save_label_indexes(img_ds, exp_fld)

# ----------------------------------------
# process vocabulary
with open(join(meta_fld, "vocab_1000.pkl"), "rb") as fin:
    vocab = pickle.load(fin)
# process vocab here
with open(join(exp_fld, "vocab.pkl"), "wb") as fout:
    pickle.dump(vocab, fout)

# ----------------------------------------
# encode text

max_tokens = 12  # including bos and eos
# with EDDL we need to "collate" text here since the dataloader manages only images
# in PyTorch the collation is delegated to the dataloader

# first, encode words with their index in the vocabulary
ds = pd.read_pickle( join(meta_fld, "reports_raw2.pkl"))
img_text_ds = build_img_text_ds(ds, img_fld)
img_text_ds["target_text"] = img_text_ds.enc_text.apply(lambda enc: collate_fn_one_s(enc, max_tokens=max_tokens))

# display(img_text_ds.head().T)

img_text_ds.to_pickle(join(exp_fld, "img_text_dataset.pkl"))

# now prepare ecvl dataset using img_ds, img_text_ds not used for the data loader
n_bootstraps = 3
train_p = 0.7
test_p = 0.1

build_ecvl_dataset(img_ds, exp_fld, img_fld, n_bootstraps, train_p, test_p)


print("all done for exp in:", exp_fld)

In [None]:

# auto term, min frequecy 50, balanced on normal

exp_fld = "/mnt/datasets/uc5/EXPS/eddl/auto_50th_seed23"
os.makedirs(exp_fld, exist_ok=True)
print("** EXP FLD:", exp_fld)

ds_home_fld = "/mnt/datasets/uc5/std-dataset"
img_fld = join(ds_home_fld, "image")
meta_fld = "/mnt/datasets/uc5/meta/eddl/iuchest"

LABELS = "auto"  # "mesh" or "auto"
min_freq = 70 if LABELS == "mesh" else 50
img_ds_fn = "img_dataset.pkl" if LABELS == "mesh" else "img_dataset_auto.pkl"
in_vocab_fn = "all_vocab_1000.pkl"

# ----------------------------------------
print(f"Using terms {LABELS}, min frequency: {min_freq}")

img_ds = pd.read_pickle(join(meta_fld, img_ds_fn))
print("image dataset read, shape:", img_ds.shape)

img_ds = apply_threshold(img_ds, min_freq)
print("thresholded image dataset, shape:", img_ds.shape)
print("IMG_DS")
display(img_ds.head())

# save img_ds
img_ds.to_pickle(join(exp_fld, "img_dataset.pkl"))
save_label_indexes(img_ds, exp_fld)

# ----------------------------------------
# process vocabulary
with open(join(meta_fld, "vocab_1000.pkl"), "rb") as fin:
    vocab = pickle.load(fin)
# process vocab here
with open(join(exp_fld, "vocab.pkl"), "wb") as fout:
    pickle.dump(vocab, fout)

# ----------------------------------------
# encode text

max_tokens = 12  # including bos and eos
# with EDDL we need to "collate" text here since the dataloader manages only images
# in PyTorch the collation is delegated to the dataloader

# first, encode words with their index in the vocabulary
ds = pd.read_pickle( join(meta_fld, "reports_raw2.pkl"))
img_text_ds = build_img_text_ds(ds, img_fld) # absolute path names
img_text_ds["target_text"] = img_text_ds.enc_text.apply(lambda enc: collate_fn_one_s(enc, max_tokens=max_tokens))

# display(img_text_ds.head().T)

img_text_ds.to_pickle(join(exp_fld, "img_text_dataset.pkl"))

# now prepare ecvl dataset using img_ds, img_text_ds not used for the data loader
n_bootstraps = 3
train_p = 0.7
test_p = 0.1

build_ecvl_dataset(img_ds, exp_fld, img_fld, n_bootstraps, train_p, test_p, seed=23)


print("all done for exp in:", exp_fld)

In [None]:

# auto term, SEQLEN 18, min frequecy 50, balanced on normal
max_tokens = 18  # including bos and eos

exp_fld = f"/mnt/datasets/uc5/EXPS/eddl/auto_50th_seqlen{max_tokens}"
os.makedirs(exp_fld, exist_ok=True)
print("** EXP FLD:", exp_fld)

ds_home_fld = "/mnt/datasets/uc5/std-dataset"
img_fld = join(ds_home_fld, "image")
meta_fld = "/mnt/datasets/uc5/meta/eddl/iuchest"

LABELS = "auto"  # "mesh" or "auto"
min_freq = 70 if LABELS == "mesh" else 50
img_ds_fn = "img_dataset.pkl" if LABELS == "mesh" else "img_dataset_auto.pkl"
in_vocab_fn = "all_vocab_1000.pkl"

# ----------------------------------------
print(f"Using terms {LABELS}, min frequency: {min_freq}")

img_ds = pd.read_pickle(join(meta_fld, img_ds_fn))
print("image dataset read, shape:", img_ds.shape)

img_ds = apply_threshold(img_ds, min_freq)
print("thresholded image dataset, shape:", img_ds.shape)
print("IMG_DS")
display(img_ds.head())

# save img_ds
img_ds.to_pickle(join(exp_fld, "img_dataset.pkl"))
save_label_indexes(img_ds, exp_fld)

# ----------------------------------------
# process vocabulary
with open(join(meta_fld, "vocab_1000.pkl"), "rb") as fin:
    vocab = pickle.load(fin)
# process vocab here
with open(join(exp_fld, "vocab.pkl"), "wb") as fout:
    pickle.dump(vocab, fout)

# ----------------------------------------
# encode text

# with EDDL we need to "collate" text here since the dataloader manages only images
# in PyTorch the collation is delegated to the dataloader

# first, encode words with their index in the vocabulary
ds = pd.read_pickle( join(meta_fld, "reports_raw2.pkl"))
img_text_ds = build_img_text_ds(ds, img_fld) # absolute path names
img_text_ds["target_text"] = img_text_ds.enc_text.apply(lambda enc: collate_fn_one_s(enc, max_tokens=max_tokens))

# display(img_text_ds.head().T)

img_text_ds.to_pickle(join(exp_fld, "img_text_dataset.pkl"))

# now prepare ecvl dataset using img_ds, img_text_ds not used for the data loader
n_bootstraps = 3
train_p = 0.7
test_p = 0.1

build_ecvl_dataset(img_ds, exp_fld, img_fld, n_bootstraps, train_p, test_p)


print("all done for exp in:", exp_fld)

In [None]:
# MESH EXP
# mesg term, min frequecy 70, balanced on normal

exp_fld = "/mnt/datasets/uc5/EXPS/eddl/mesh_70th"
os.makedirs(exp_fld, exist_ok=True)
print("** EXP FLD:", exp_fld)

ds_home_fld = "/mnt/datasets/uc5/std-dataset"
img_fld = join(ds_home_fld, "image")
meta_fld = "/mnt/datasets/uc5/meta/eddl/iuchest"

LABELS = "mesh"  # "mesh" or "auto"
min_freq = 70 if LABELS == "mesh" else 50
img_ds_fn = "img_dataset.pkl" if LABELS == "mesh" else "img_dataset_auto.pkl"
in_vocab_fn = "all_vocab_1000.pkl"

# ----------------------------------------
print(f"Using terms {LABELS}, min frequency: {min_freq}")

img_ds = pd.read_pickle(join(meta_fld, img_ds_fn))
print("image dataset read, shape:", img_ds.shape)

img_ds = apply_threshold(img_ds, min_freq)
print("thresholded image dataset, shape:", img_ds.shape)
print("IMG_DS")
display(img_ds.head())

# save img_ds
img_ds.to_pickle(join(exp_fld, "img_dataset.pkl"))
save_label_indexes(img_ds, exp_fld)

# ----------------------------------------
# process vocabulary
with open(join(meta_fld, "vocab_1000.pkl"), "rb") as fin:
    vocab = pickle.load(fin)
# process vocab here
with open(join(exp_fld, "vocab.pkl"), "wb") as fout:
    pickle.dump(vocab, fout)

# ----------------------------------------
# encode text

max_tokens = 12  # including bos and eos
# with EDDL we need to "collate" text here since the dataloader manages only images
# in PyTorch the collation is delegated to the dataloader

# first, encode words with their index in the vocabulary
ds = pd.read_pickle( join(meta_fld, "reports_raw2.pkl"))
img_text_ds = build_img_text_ds(ds, img_fld) # absolute path names
img_text_ds["target_text"] = img_text_ds.enc_text.apply(lambda enc: collate_fn_one_s(enc, max_tokens=max_tokens))

# display(img_text_ds.head().T)

img_text_ds.to_pickle(join(exp_fld, "img_text_dataset.pkl"))

# now prepare ecvl dataset using img_ds, img_text_ds not used for the data loader
n_bootstraps = 3
train_p = 0.7
test_p = 0.1

build_ecvl_dataset(img_ds, exp_fld, img_fld, n_bootstraps, train_p, test_p)


print("all done for exp in:", exp_fld)

In [None]:
# MESH EXP
# mesg term, min frequecy 130, balanced on normal

exp_fld = "/mnt/datasets/uc5/EXPS/eddl/mesh_130th"
os.makedirs(exp_fld, exist_ok=True)
print("** EXP FLD:", exp_fld)

ds_home_fld = "/mnt/datasets/uc5/std-dataset"
img_fld = join(ds_home_fld, "image")
meta_fld = "/mnt/datasets/uc5/meta/eddl/iuchest"

LABELS = "mesh"  # "mesh" or "auto"
min_freq = 130 if LABELS == "mesh" else 50
img_ds_fn = "img_dataset.pkl" if LABELS == "mesh" else "img_dataset_auto.pkl"
in_vocab_fn = "all_vocab_1000.pkl"

# ----------------------------------------
print(f"Using terms {LABELS}, min frequency: {min_freq}")

img_ds = pd.read_pickle(join(meta_fld, img_ds_fn))
print("image dataset read, shape:", img_ds.shape)

img_ds = apply_threshold(img_ds, min_freq)
print("thresholded image dataset, shape:", img_ds.shape)
print("IMG_DS")
display(img_ds.head())

# save img_ds
img_ds.to_pickle(join(exp_fld, "img_dataset.pkl"))
save_label_indexes(img_ds, exp_fld)

# ----------------------------------------
# process vocabulary
with open(join(meta_fld, "vocab_1000.pkl"), "rb") as fin:
    vocab = pickle.load(fin)
# process vocab here
with open(join(exp_fld, "vocab.pkl"), "wb") as fout:
    pickle.dump(vocab, fout)

# ----------------------------------------
# encode text

max_tokens = 12  # including bos and eos
# with EDDL we need to "collate" text here since the dataloader manages only images
# in PyTorch the collation is delegated to the dataloader

# first, encode words with their index in the vocabulary
ds = pd.read_pickle( join(meta_fld, "reports_raw2.pkl"))
img_text_ds = build_img_text_ds(ds, img_fld) # absolute path names
img_text_ds["target_text"] = img_text_ds.enc_text.apply(lambda enc: collate_fn_one_s(enc, max_tokens=max_tokens))

# display(img_text_ds.head().T)

img_text_ds.to_pickle(join(exp_fld, "img_text_dataset.pkl"))

# now prepare ecvl dataset using img_ds, img_text_ds not used for the data loader
n_bootstraps = 3
train_p = 0.7
test_p = 0.1

build_ecvl_dataset(img_ds, exp_fld, img_fld, n_bootstraps, train_p, test_p)


print("all done for exp in:", exp_fld)

In [5]:
# *** 
# FINAL check before publishing the code
# test with lstm and gru nodes

# prefix in makefile for experiments with these settings: final_



# auto term, min frequecy 50, balanced on normal

exp_fld = "/mnt/datasets/uc5/EXPS/eddl/final_exp"
os.makedirs(exp_fld, exist_ok=True)
print("** EXP FLD:", exp_fld)

ds_home_fld = "/mnt/datasets/uc5/std-dataset"
img_fld = join(ds_home_fld, "image")
meta_fld = "/mnt/datasets/uc5/meta/eddl/iuchest"

LABELS = "auto"  # "mesh" or "auto"
min_freq = 70 if LABELS == "mesh" else 50
img_ds_fn = "img_dataset.pkl" if LABELS == "mesh" else "img_dataset_auto.pkl"
in_vocab_fn = "all_vocab_1000.pkl"

# ----------------------------------------
print(f"Using terms {LABELS}, min frequency: {min_freq}")

img_ds = pd.read_pickle(join(meta_fld, img_ds_fn))
print("image dataset read, shape:", img_ds.shape)

img_ds = apply_threshold(img_ds, min_freq)
print("thresholded image dataset, shape:", img_ds.shape)
print("IMG_DS")
display(img_ds.head())

# save img_ds
img_ds.to_pickle(join(exp_fld, "img_dataset.pkl"))
save_label_indexes(img_ds, exp_fld)

# ----------------------------------------
# process vocabulary
with open(join(meta_fld, "vocab_1000.pkl"), "rb") as fin:
    vocab = pickle.load(fin)
# process vocab here
with open(join(exp_fld, "vocab.pkl"), "wb") as fout:
    pickle.dump(vocab, fout)

# ----------------------------------------
# encode text

max_tokens = 12  # including bos and eos
# with EDDL we need to "collate" text here since the dataloader manages only images
# in PyTorch the collation is delegated to the dataloader

# first, encode words with their index in the vocabulary
ds = pd.read_pickle( join(meta_fld, "reports_raw2.pkl"))
img_text_ds = build_img_text_ds(ds, img_fld) # absolute path names
img_text_ds["target_text"] = img_text_ds.enc_text.apply(lambda enc: collate_fn_one_s(enc, max_tokens=max_tokens))

# display(img_text_ds.head().T)

img_text_ds.to_pickle(join(exp_fld, "img_text_dataset.pkl"))

# now prepare ecvl dataset using img_ds, img_text_ds not used for the data loader
n_bootstraps = 3
train_p = 0.7
test_p = 0.1

build_ecvl_dataset(img_ds, exp_fld, img_fld, n_bootstraps, train_p, test_p, seed=23)


print("all done for exp in:", exp_fld)

** EXP FLD: /mnt/datasets/uc5/EXPS/eddl/final_exp
Using terms auto, min frequency: 50
image dataset read, shape: (7244, 523)
45 labels have at least 50 freq
removing 478 labels
thresholded image dataset, shape: (7244, 46)
IMG_DS


Unnamed: 0_level_0,normal,aorta,arthritic changes,atelectasis,bilateral pleural effusion,cabg,calcified granuloma,calcinosis,cardiomegaly,"catheterization, central venous",...,rib,rib fracture,scar,scarring,scoliosis,sternotomy,thoracic aorta,thoracic vertebrae,tortuous aorta,misc
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CXR1_1_IM-0001-3001.png,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CXR1_1_IM-0001-4001.png,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CXR10_IM-0002-1001.png,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CXR10_IM-0002-2001.png,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CXR100_IM-0002-1001.png,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


0) normal
1) aorta
2) arthritic changes
3) atelectasis
4) bilateral pleural effusion
5) cabg
6) calcified granuloma
7) calcinosis
8) cardiomegaly
9) catheterization, central venous
10) congestion
11) copd
12) deformity
13) degenerative change
14) diaphragm
15) edema
16) effusion
17) emphysema
18) eventration
19) fracture
20) granuloma
21) granulomatous disease
22) hiatal hernia
23) hyperinflation
24) infection
25) infiltrates
26) nodule
27) opacity
28) osteophyte
29) pleural effusion
30) pleural thickening
31) pneumonia
32) pulmonary atelectasis
33) pulmonary disease, chronic obstructive
34) pulmonary edema
35) pulmonary emphysema
36) rib
37) rib fracture
38) scar
39) scarring
40) scoliosis
41) sternotomy
42) thoracic aorta
43) thoracic vertebrae
44) tortuous aorta
45) misc
saved /mnt/datasets/uc5/EXPS/eddl/final_exp/label2idx.yaml
lab2idx with 46 labels
saved /mnt/datasets/uc5/EXPS/eddl/final_exp/idx2label.yaml
expected |train| = 5070
expected |valid| = 1448
expected |test| = 724
actu