# Example of CellProfiler project for idrstream

## Import Libraries

In [1]:
import pathlib
import pandas as pd
import shutil

from cellpose import core

import sys
sys.path.append("../")
from idrstream.CP_idr import CellProfilerRun

## Initialize idrstream

In [2]:
pipeline_path = pathlib.Path("example_files/CP_files/mitocheck_idr_cp.cppipe")
# need to fill in on fig
plugins_directory = pathlib.Path("/home/jenna/Desktop/Github/CellProfiler/cellprofiler/modules/plugins")
idr_id = "idr0013"
tmp_dir = pathlib.Path("../tmp/")
final_data_dir = pathlib.Path("../mitocheck_control_features/CP_features")
try:
    shutil.rmtree(tmp_dir)
    # uncomment the line below if you would like to remove the final data directory (e.g. all .csv.gz files)
    # shutil.rmtree(final_data_dir)
    pass
except:
    print("No files to remove")

stream = CellProfilerRun(pipeline_path, plugins_directory, idr_id, tmp_dir, final_data_dir, log='cp_idrstream.log')

## Initialize CellProfiler metadata compiler

In [3]:
data_to_process_tsv = pathlib.Path("example_files/data_to_process.tsv")
metadata_save_path = pathlib.Path("example_files/data_to_process.csv")

data_to_process = stream.convert_tsv_to_csv(data_to_process_tsv, metadata_save_path)

## Load in metadata

In [4]:
data_to_process = pd.read_csv("example_files/data_to_process.tsv", sep="\t")
data_to_process

Unnamed: 0.1,Unnamed: 0,Plate,Well,Frames,Well Number,Control Type,Original Gene Target,Plate_Map_Name,Gene_Replicate,Site,DNA
0,0,LT0001_02,A4,50,4,positive control,ENSG00000149503,LT0001_02_4,1,1,LT0001_02/LT0001_02_4_50.tif
1,1,LT0001_02,A15,50,15,negative control,negative control,LT0001_02_15,1,1,LT0001_02/LT0001_02_15_50.tif
2,2,LT0001_02,B2,50,26,negative control,negative control,LT0001_02_26,1,1,LT0001_02/LT0001_02_26_50.tif
3,3,LT0001_02,C1,50,49,positive control,ENSG00000149503,LT0001_02_49,1,1,LT0001_02/LT0001_02_49_50.tif
4,4,LT0001_02,C4,50,52,positive control,ENSG00000149503,LT0001_02_52,1,1,LT0001_02/LT0001_02_52_50.tif
...,...,...,...,...,...,...,...,...,...,...,...
6738,6738,LT0603_06,O13,50,349,positive control,KIF11,LT0603_06_349,1,1,LT0603_06/LT0603_06_349_50.tif
6739,6739,LT0603_06,O16,50,352,negative control,negative control,LT0603_06_352,1,1,LT0603_06/LT0603_06_352_50.tif
6740,6740,LT0603_06,P3,50,363,negative control,negative control,LT0603_06_363,1,1,LT0603_06/LT0603_06_363_50.tif
6741,6741,LT0603_06,P21,50,381,positive control,COPB,LT0603_06_381,1,1,LT0603_06/LT0603_06_381_50.tif


## Initialize Aspera downloader

In [5]:
# find the path in terminal using `ascli config ascp show`
aspera_path = pathlib.Path("/home/jenna/.aspera/ascli/sdk/ascp")
aspera_key_path = pathlib.Path("example_files/asperaweb_id_dsa.openssh")
screens_path = pathlib.Path("example_files/idr0013-screenA-plates.tsv")

stream.init_downloader(aspera_path, aspera_key_path, screens_path)

## Initialize Fiji preprocessor

In [6]:
fiji_path = pathlib.Path("/home/jenna/Desktop/test/Fiji.app")
stream.init_preprocessor(fiji_path)

## Copy and create CellProfiler files/folders

In [7]:
metadata_path = pathlib.Path("example_files/data_to_process.csv")
stream.copy_CP_files(metadata_path)

## Confirm that GPU is activated for Cellpose to run

In [8]:
use_GPU = core.use_gpu()
print(">>> GPU activated? %d" % use_GPU)
# logger_setup()

>>> GPU activated? 1


## Run idrstream batches

In [9]:
stream.run_cp_stream(data_to_process, batch_size=3, start_batch=0, batch_nums=[0])

Completed: 75739K bytes transferred in 3 seconds
 (187601K bits/sec), in 1 file.
CellH5Reader initializing /home/jenna/Desktop/Github/IDR_stream/tmp/downloads/LT0001_02/00004_01.ch5
Plate :/sample/0/plate/
Well :/sample/0/plate/LT0001_02--ex2005_11_16--sp2005_02_17--tt17--c3/experiment/
Site :/sample/0/plate/LT0001_02--ex2005_11_16--sp2005_02_17--tt17--c3/experiment/00004/position/
Parse segmentation ROIs for cell object primary__test : 0
Completed: 87703K bytes transferred in 6 seconds
 (115840K bits/sec), in 1 file.
CellH5Reader initializing /home/jenna/Desktop/Github/IDR_stream/tmp/downloads/LT0001_02/00015_01.ch5
Plate :/sample/0/plate/
Well :/sample/0/plate/LT0001_02--ex2005_11_16--sp2005_02_17--tt17--c3/experiment/
Site :/sample/0/plate/LT0001_02--ex2005_11_16--sp2005_02_17--tt17--c3/experiment/00015/position/
Parse segmentation ROIs for cell object primary__test : 0
Completed: 74464K bytes transferred in 3 seconds
 (196500K bits/sec), in 1 file.
CellH5Reader initializing /home/j

Times reported are CPU and Wall-clock times for each module
Fri Dec 16 13:16:20 2022: Image # 1, module Images # 1: CPU_time = 0.00 secs, Wall_time = 0.00 secs
Fri Dec 16 13:16:20 2022: Image # 1, module Metadata # 2: CPU_time = 0.00 secs, Wall_time = 0.00 secs
Fri Dec 16 13:16:20 2022: Image # 1, module NamesAndTypes # 3: CPU_time = 0.47 secs, Wall_time = 0.12 secs
Fri Dec 16 13:16:20 2022: Image # 1, module Groups # 4: CPU_time = 0.00 secs, Wall_time = 0.00 secs
** TORCH CUDA version installed and working. **
>>>> using GPU
>> cyto << model set to be used
>>>> model diam_mean =  30.000 (ROIs rescaled to this size during training)
~~~ ESTIMATING CELL DIAMETER(S) ~~~
estimated cell diameter(s) in 3.87 sec
>>> diameter(s) = 
[ 27.64 ]
~~~ FINDING MASKS ~~~
>>>> TOTAL TIME 4.61 sec
Fri Dec 16 13:16:20 2022: Image # 1, module RunCellpose # 5: CPU_time = 5.71 secs, Wall_time = 5.58 secs
  back_pixels = skimage.morphology.erosion(back_pixels_mask, selem=selem)
  back_pixels = skimage.morpho