In [None]:
from pathlib import Path
import shutil
# you need quilt3 package to download the data:
! pip install quilt3
import pandas as pd
import quilt3
from aicsimageio import AICSImage
from aicsimageio.writers import OmeTiffWriter
from random import random

In [None]:
# set parameters

# which cell line to download: in the paper, we tested on four nuclear structures:
# - fibrillarin (cline = "FBL")
# - nucleophosmin (cline = "NPM1")
# - lamin b1 (cline = "LMNB1")
# - histon H2B (cline = "HIST1H2BJ")
cline = "FBL"
num_samples_per_cell_line = 50 # choose what you need, with roughly 80/20 training/validation split 

# set up path
parent_path = Path("/mnt/eternus/users/Yu/project/data_compression/data/FBL") / f"{cline}"
parent_path.mkdir(exist_ok=True)

raw_path = parent_path / Path("download")
raw_path.mkdir(exist_ok=True)
train_path = parent_path / Path("train")
train_path.mkdir(exist_ok=True)
holdout_path = parent_path / Path("holdout")
holdout_path.mkdir(exist_ok=True)

In [None]:
# connect to quilt and load meta table
pkg = quilt3.Package.browse(
    "aics/hipsc_single_cell_image_dataset", registry="s3://allencell"
)
meta_df_obj = pkg["metadata.csv"]
meta_df_obj.fetch(parent_path / "meta.csv")
meta_df = pd.read_csv(parent_path / "meta.csv")

# fetch the data of the specific cell line
meta_df_line = meta_df.query("structure_name==@cline")

# collapse the data table based on FOVId
meta_df_line.drop_duplicates(subset="FOVId", inplace=True)

# reset index
meta_df_line.reset_index(drop=True, inplace=True)

In [None]:
# download the images and re-slice into input (BF) and ground truth (fluorescent) images
for row in meta_df_line.itertuples():
    if row.Index >= num_samples_per_cell_line:
        break
    
    # fetch the raw image (multi-channel)
    subdir_name = row.fov_path.split("/")[0]
    file_name = row.fov_path.split("/")[1]

    local_fn = raw_path / f"{row.FOVId}_original.tiff"
    pkg[subdir_name][file_name].fetch(local_fn)

    # extract the bf and structures channel
    reader = AICSImage(local_fn)
    bf_img = reader.get_image_data(
        "ZYX", C=row.ChannelNumberBrightfield, S=0, T=0
    )
    str_img = reader.get_image_data(
        "ZYX", C=row.ChannelNumberStruct, S=0, T=0
    )

    if random() < 0.2:
        data_path = holdout_path
    else:
        data_path = train_path
    # 3d to 2d data:
    for i,(bf_slice,str_slice) in enumerate(zip(bf_img,str_img)):
        im_fn = data_path / f"{row.FOVId}_{i}_IM.tiff"
        gt_fn = data_path / f"{row.FOVId}_{i}_GT.tiff"
        OmeTiffWriter.save(bf_slice, im_fn, dim_order="YX")
        OmeTiffWriter.save(str_slice, gt_fn, dim_order="YX")

In [None]:
# you may remove the download folder now.
from shutil import rmtree
import os
rmtree(raw_path)
os.remove(parent_path / "meta.csv")

In [2]:
# 1, load the data:
data_dirs = Path('../../data/FBL/train').glob('*IM.tiff')
dst_dir = Path('../../data/FBL/train_im')
dst_dir.mkdir(exist_ok=True)
for data_dir in data_dirs:
    shutil.copy(data_dir,dst_dir)

In [41]:
# 2, calculate the ssim:
prd_path = Path('/mnt/eternus/users/Yu/project/data_compression/CompressAI/hello.tiff')
input_path = Path('/mnt/eternus/users/Yu/project/data_compression/data/labelfree_2d/test/7636_0_IM.tiff')

In [8]:
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import numpy as np
from aicsimageio import AICSImage
from aicsimageio.writers import OmeTiffWriter
from pathlib import Path
import os
import sys
import numpy as np
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr
import monai
import torch

def normalizeItensity(image):
    # Convert the image data to a floating-point data type
    img = image.astype(np.float32)
    # Convert the image to a toch Tensor
    img = torch.from_numpy(img)
    # Normalize the intensity of the image using the MONAI NormalizeIntensity transform
    normalize_intensity = monai.transforms.NormalizeIntensity()
    img_normalized = normalize_intensity(img)
    # Convert the normalized image back to a numpy array
    return img_normalized.numpy()

def transform_img(image):
    img = image.astype(np.float32) / 65535
    print(type(img))
    return img

def compare_images(path1, path2, gt=False):
    # Load the two images
    image1 = AICSImage(path1).get_image_data('YX') 
    image2 = AICSImage(path2).get_image_data('YX') 
    if gt:
        # image1 = image1.astype(np.float32)
        # # Scale the values in image1 to the range [0, 65535]
        # scaled_image = (image1 / np.max(image1) * 65535).round().astype(np.uint16)
        # image1 = scaled_image
        image1 = transform_img(image1)
        image2 = transform_img(image2)
    # Calculate metrics
    mse = np.sum((image1 - image2) ** 2)/(924*624)
    ssim_value = ssim(image1, image2)
    psnr_value = psnr(image1, image2)
    corr = np.corrcoef(image1.ravel(), image2.ravel())[0, 1]
    # psnr = 10 * np.log10(1 / (mse + 0.000001))
    return mse, ssim_value, psnr_value, corr

In [2]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
from compressai.zoo.image import cfgs
from compressai.zoo import image_models

holdout_path_2d = Path("/mnt/eternus/users/Yu/project/data_compression/data/labelfree/FBL/holdout")
for image in holdout_path_2d.glob('*IM*.tiff'): 
    model_name= 'bmshj2018-factorized'+'_'+'mse'+'_'+'8'
    path_encoded= image.parent/model_name/str(image.stem+'_encoded')
    path_encoded.parent.mkdir(exist_ok=True, parents = True)
    path_decoded= image.parent/model_name/str(image.stem+'_decoded.tiff')
    path_decoded.parent.mkdir(exist_ok=True, parents = True)
    if not path_decoded.is_file():
        !python3 codec.py encode {image} -o {path_encoded} --model bmshj2018-factorized -q 8 -m mse --cuda
        !python3 codec.py decode {path_encoded} -o {path_decoded} --cuda

In [10]:
decoded_paths = sorted(Path('/mnt/eternus/users/Yu/project/data_compression/data/labelfree_2d/test/bmshj2018-factorized_mse_8/').glob('*.tiff'))
holdout_paths = sorted(Path("/mnt/eternus/users/Yu/project/data_compression/data/labelfree_2d/test/").glob('*.tiff'))

In [None]:
mse_value = AverageMeter()
ssim_value = AverageMeter()
psnr_value = AverageMeter()
corr_value = AverageMeter()
for i, (decode_path, holdout_path) in enumerate(zip(decoded_paths, holdout_paths)):
    tmp_mse, tmp_ssim, tmp_psnr, tmp_corr = compare_images(decode_path, holdout_path)
    mse_value.update(tmp_mse)
    ssim_value.update(tmp_ssim)
    psnr_value.update(tmp_psnr)
    corr_value.update(tmp_corr)
print(mse_value.avg, ssim_value.avg, psnr_value.avg, corr_value.avg)

# Fine tuning the pretrained model

In [None]:
!python3 train.py -d /mnt/eternus/users/Yu/project/data_compression/data/labelfree_2d/ --aux-learning-rate 0 --lambda 0.1800 --epochs 100 -lr 1e-3 --batch-size 4 --pretrained True --model bmshj2018-factorized --quality 8 --metric mse --cuda --save_path /mnt/eternus/users/Yu/project/data_compression/data/labelfree_2d/model/fine_tune_v4.pth.tar

- with trained model:

In [None]:
holdout_path_2d = Path("/mnt/eternus/users/Yu/project/data_compression/data/labelfree_2d/toy_test/")
for image in holdout_path_2d.glob('*IM*.tiff'): 
    model_name= 'bmshj2018-factorized'+'_'+'mse'+'_'+'8_v5'
    path_encoded= image.parent/model_name/str(image.stem+'_encoded')
    path_encoded.parent.mkdir(exist_ok=True, parents = True)
    path_decoded= image.parent/model_name/str(image.stem+'_decoded.tiff')
    path_decoded.parent.mkdir(exist_ok=True, parents = True)
    if not path_decoded.is_file():
        !python3 codec.py encode {image} -o {path_encoded} --model bmshj2018-factorized -q 8 -m mse --cuda --checkpoint /mnt/eternus/users/Yu/project/data_compression/data/labelfree_2d/model/fine_tune_v4.pth.tar
        # !python3 codec.py encode {image} -o {path_encoded} --model bmshj2018-factorized -q 8 -m mse --cuda
        !python3 codec.py decode {path_encoded} -o {path_decoded} --cuda

# Labelfree 2d experiment

In [1]:
!nvidia-smi
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0,1,2,3,4,5,6,7"
!export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7

Thu Jul 13 10:46:06 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:0E:00.0 Off |                    0 |
| N/A   28C    P0    51W / 400W |      0MiB / 40536MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA A100-SXM...  Off  | 00000000:13:00.0 Off |                    0 |
| N/A   28C    P0    51W / 400W |      0MiB / 40536MiB |      0%      Default |
|       

In [None]:
!run_im2im --config_path /mnt/data/ISAS.DE/yu.zhou/Yu/project/data_compression/CompressAI/examples/configs/labelfree_2d_FCN_train.yaml