# Secret Pre-Landscape Notebook MERSCOPE

In [1]:
%load_ext autoreload
%autoreload 2
%env ANYWIDGET_HMR=1

env: ANYWIDGET_HMR=1


In [1]:

import numpy as np
import pandas as pd
import tifffile
import os
import celldega as dega

technology = 'MERSCOPE'

In [3]:
sample = '2024_merscope_breast_htma'
region = 0

DATA_DIR = f'/Users/whuan/dev/ist_benchmarking/data'

data_dir = f'{DATA_DIR}/{sample}'
data_dir_region = f'{DATA_DIR}/{sample}/region_{region}'
path_landscape_files = f'{DATA_DIR}/landscapes/{sample}'
path_landscape_files_region = f'{DATA_DIR}/landscapes/{sample}/region_{region}'


for folder in [data_dir, data_dir_region, path_landscape_files, path_landscape_files_region]:
    if not os.path.exists(folder):
        os.mkdir(folder)
        print (folder)

# Copy data from Google bucket

In [4]:
raw_data_bucket = 'fc-b8e703d3-de2d-4532-94cc-efe864b4feea/SPARC/Revisions/202405032008_SPARCRevisionsHTMAJN3May24_VMSC11302'

for file in [
    'images/mosaic_DAPI_z1.tif',
    'images/mosaic_Cellbound1_z1.tif',
    'images/micron_to_mosaic_pixel_transform.csv',
    'cell_metadata.csv',
    'detected_transcripts.csv',
    'cell_boundaries.parquet',
    'cell_by_gene.csv'
    ]:

    if os.path.exists(f"{data_dir_region}/{file.split('/')[-1]}"):
        print (f'{file} is previously downloaded')
    else:
        cmd = f'gsutil cp gs://{raw_data_bucket}/region_{region}/{file} {data_dir_region}/'
        print (cmd)
        ! {cmd}

images/mosaic_DAPI_z1.tif is previously downloaded
images/mosaic_Cellbound1_z1.tif is previously downloaded
images/micron_to_mosaic_pixel_transform.csv is previously downloaded
cell_metadata.csv is previously downloaded
detected_transcripts.csv is previously downloaded
cell_boundaries.parquet is previously downloaded
cell_by_gene.csv is previously downloaded


## Make Pyramidal Image

In [5]:
image_scale = 1

In [7]:

channel = 'dapi'

# Open the OME-TIFF file and read the image data
with tifffile.TiffFile(f'{data_dir_region}/mosaic_DAPI_z1.tif') as tif:
    series = tif.series[0]  # Assuming you are interested in the first series
    image_data = series.asarray()

if image_data.ndim==2:
    image_data_scaled = image_data[:,:] * 2
elif image_data.ndim==3:
    image_data_scaled = image_data[0,:,:] * 2


# Save the image data to a regular TIFF file without compression
tifffile.imwrite(f'{path_landscape_files_region}/output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(f'{path_landscape_files_region}/output_regular.tif', image_scale, path_landscape_files_region)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, f"{path_landscape_files_region}/pyramid_images", channel, suffix=".webp[Q=100]")

## Cell Metadata

In [6]:
path_transformation_matrix = f'{data_dir_region}/micron_to_mosaic_pixel_transform.csv'
path_meta_cell_micron = f'{data_dir_region}/cell_metadata.csv'
path_meta_cell_image = f'{path_landscape_files_region}/cell_metadata.parquet'

In [7]:
dega.pre.make_meta_cell_image_coord(
    'MERSCOPE', 
    path_transformation_matrix, 
    path_meta_cell_micron, 
    path_meta_cell_image, 
    image_scale=image_scale
)

## Transcripts

In [11]:
%%time
tile_size = 250 #
path_trx = f'{data_dir_region}/detected_transcripts.csv'
path_trx_tiles = f'{path_landscape_files_region}/transcript_tiles'

tile_bounds = dega.pre.make_trx_tiles(
    technology, 
    path_trx, 
    path_transformation_matrix, 
    path_trx_tiles,
    tile_size=tile_size,
    # verbose=True
    image_scale=image_scale
)

CPU times: user 10min 31s, sys: 20.8 s, total: 10min 52s
Wall time: 10min 59s


## Cell Boundaries

In [13]:
%%time

path_cell_boundaries = f'{data_dir_region}/cell_boundaries.parquet'
path_meta_cell_micron = f'{data_dir_region}/cell_metadata.csv'
path_transformation_matrix = f'{data_dir_region}/micron_to_mosaic_pixel_transform.csv'
path_output = f'{path_landscape_files_region}/cell_segmentation'

dega.pre.make_cell_boundary_tiles(
    technology,
    path_cell_boundaries, 
    path_meta_cell_micron, 
    path_transformation_matrix, 
    path_output,
    tile_size=tile_size,
    tile_bounds=tile_bounds,
    image_scale=image_scale
)

row 0
row 2
row 4
row 6
row 8
row 10
row 12
row 14
row 16
row 18
row 20
row 22
row 24
row 26
row 28
row 30
row 32
row 34
row 36
row 38
row 40
row 42
row 44
row 46
row 48
row 50
row 52
row 54
row 56
row 58
row 60
row 62
row 64
row 66
row 68
row 70
row 72
row 74
row 76
row 78
row 80
row 82
row 84
row 86
row 88
row 90
row 92
row 94
row 96
row 98
row 100
row 102
row 104
row 106
row 108
row 110
row 112
row 114
row 116
row 118
row 120
row 122
row 124
row 126
row 128
row 130
row 132
row 134
row 136
row 138
row 140
row 142
row 144
row 146
row 148
row 150
row 152
row 154
row 156
row 158
row 160
row 162
row 164
row 166
row 168
row 170
row 172
row 174
row 176
row 178
row 180
row 182
row 184
row 186
row 188
row 190
row 192
row 194
row 196
row 198
row 200
row 202
row 204
row 206
row 208
row 210
row 212
row 214
CPU times: user 25min 44s, sys: 33 s, total: 26min 17s
Wall time: 26min 35s


## Gene Metadata

In [14]:
path_cbg = f'{data_dir_region}/cell_by_gene.csv'
path_output = f'{path_landscape_files_region}/meta_gene.parquet'
dega.pre.make_meta_gene(technology, path_cbg, path_output)


cbg is a dense DataFrame. Proceeding with dense operations.
Calculating mean expression
Calculating variance
Calculating maximum expression
Calculating proportion of non-zero expression


## Get Max Zoom
Save this to a file in the landscape files, read it on the front-end, and use it in the code. 

In [15]:
# Example usage:
path_image_pyramid = f"{path_landscape_files_region}/pyramid_images/dapi_files"  # Change this to your actual directory path
max_pyramid_zoom = dega.pre.get_max_zoom_level(path_image_pyramid)

print(max_pyramid_zoom)

17


## Save Landscape Parameters JSON

In [16]:
image_info =  [
        {
            "name": "dapi",
            "button_name": "DAPI",
            "color": [
                0,
                0,
                255
            ]
        }
    ]

In [17]:


dega.pre.save_landscape_parameters(
    technology, 
    path_landscape_files_region,
    'dapi_files',
    tile_size=tile_size,
    image_info=image_info,
    image_format='.webp'
)

/Users/whuan/dev/ist_benchmarking/data/landscapes/2024_merscope_breast_htma/region_0/pyramid_images/dapi_files


In [16]:
des_bucket_name = "fc-secure-cbb15268-8969-436a-818b-ae40f52e3b41/ist_data/landscape_files/"
cmd = f"gsutil -m cp -r {path_landscape_files} gs://{des_bucket_name}"
print (cmd)

gsutil -m cp -r /Users/whuan/dev/ist_benchmarking/data/landscapes/2024_merscope_breast_htma gs://fc-secure-cbb15268-8969-436a-818b-ae40f52e3b41/ist_data/landscape_files/
