Skip to content

Commit

Permalink
Remove GoogleDrivePath Dead Code (#528)
Browse files Browse the repository at this point in the history
* removed googledrive in requirements.txt

* Google Drive Code marked as `Deprecated`

* pycodestyle changes

* io_utils folders ref before assignment fix

* fixing io.imsave(pathlib.Path) object issue

* Path(<path>).to_posix(), future versions of scikit-image allow Path objects as file paths

* using os.path.join instead of pathlib.Path

* missed one

* missed another

* fixed one issue

* 1 issue left, Segment_Image_Data: list index out of range

* added notebook_update_test.py back

* pycodestyle

* removed Deprecated Code

* formatting

* formatting, removed generalized getmtime function

* formatting docstring fix

* formatting fix

* getmtime fix

* buildthedocs fix?

* pycodestyle

* removed .toks folder, and references in the Dockerfile / start_docker.sh

* cleaned segment_image_data.ipynb

* TRAVIS BUILD TEST

* tag=branch_name

* removed google_drive_usage.md

* reset .travis.yml
  • Loading branch information
srivarra authored May 13, 2022
1 parent 1e60432 commit a84cf4d
Show file tree
Hide file tree
Showing 18 changed files with 129 additions and 1,522 deletions.
1 change: 0 additions & 1 deletion .toks/.creds.enc

This file was deleted.

1 change: 0 additions & 1 deletion .toks/.s.txt

This file was deleted.

1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ WORKDIR /scripts

# copy over the requirements.txt, install dependencies, and README
COPY setup.py requirements.txt README.md /opt/ark-analysis/
COPY .toks /home/.toks
RUN pip install -r /opt/ark-analysis/requirements.txt

# copy the scripts over
Expand Down
29 changes: 10 additions & 19 deletions ark/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from ark import settings
from ark.utils import load_utils
from ark.utils.misc_utils import verify_in_list
from ark.utils.google_drive_utils import GoogleDrivePath, drive_write_out, path_join


def save_fov_images(fovs, data_dir, img_xr, name_suffix=''):
Expand Down Expand Up @@ -48,7 +47,7 @@ def save_fov_images(fovs, data_dir, img_xr, name_suffix=''):
def label_cells_by_cluster(fovs, all_data, label_maps, fov_col=settings.FOV_ID,
cell_label_column=settings.CELL_LABEL,
cluster_column=settings.KMEANS_CLUSTER):
""" Translates cell-ID labeled images according to the clustering assignment.
"""Translates cell-ID labeled images according to the clustering assignment.
Takes a list of fovs, and relabels each image (array) according to the assignment
of cell IDs to cluster label.
Expand Down Expand Up @@ -120,8 +119,7 @@ def generate_cell_cluster_mask(fovs, base_dir, seg_dir, cell_consensus_name,

if not os.path.exists(os.path.join(base_dir, cell_consensus_name)):
raise FileNotFoundError(
"consensus_dir %s does not exist in base_dir %s" % (cell_consensus_name, base_dir)
)
"consensus_dir %s does not exist in base_dir %s" % (cell_consensus_name, base_dir))

# verify the cluster_col provided is valid
verify_in_list(
Expand Down Expand Up @@ -341,11 +339,8 @@ def generate_deepcell_input(data_dir, tiff_dir, nuc_channels, mem_channels, fovs
if mem_channels:
out[1] = np.sum(data_xr.loc[fov, :, :, mem_channels].values, axis=2)

save_path = path_join(data_dir, f'{fov}.tif')
drive_write_out(
save_path,
lambda x: io.imsave(x, out, plugin='tifffile', check_contrast=False)
)
save_path = os.path.join(data_dir, f"{fov}.tif")
io.imsave(save_path, out, plugin='tifffile', check_contrast=False)


def stitch_images(data_xr, num_cols):
Expand Down Expand Up @@ -410,19 +405,15 @@ def split_img_stack(stack_dir, output_dir, stack_list, indices, names, channels_
"""

for stack_name in stack_list:
img_stack = io.imread(path_join(stack_dir, stack_name))
img_dir = path_join(output_dir, os.path.splitext(stack_name)[0])
if type(img_dir) is GoogleDrivePath:
img_dir.mkdir()
else:
os.makedirs(img_dir)
img_stack = io.imread(os.path.join(stack_dir, stack_name))
img_dir = os.path.join(output_dir, os.path.splitext(stack_name)[0])
os.makedirs(img_dir)

for i in range(len(indices)):
if channels_first:
channel = img_stack[indices[i], ...]
else:
channel = img_stack[..., indices[i]]
drive_write_out(
path_join(img_dir, names[i]),
lambda x: io.imsave(x, channel, plugin='tifffile', check_contrast=False)
)

save_path = os.path.join(img_dir, names[i])
io.imsave(save_path, channel, plugin='tifffile', check_contrast=False)
172 changes: 79 additions & 93 deletions ark/utils/deepcell_service_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,64 +10,58 @@
from concurrent.futures import ThreadPoolExecutor

from ark.utils import misc_utils
from ark.utils.google_drive_utils import GoogleDrivePath, drive_write_out, path_join, DriveOpen


def create_deepcell_output(deepcell_input_dir, deepcell_output_dir, fovs=None,
suffix='_feature_0', host='https://deepcell.org', job_type='mesmer',
scale=1.0, timeout=3600, zip_size=100, parallel=False):
""" Handles all of the necessary data manipulation for running deepcell tasks.
Creates .zip files (to be used as input for DeepCell),
calls run_deepcell_task method,
and extracts zipped output files to the specified output location
Args:
deepcell_input_dir (str):
Location of preprocessed files (assume deepcell_input_dir contains <fov>.tif
for each fov in fovs list). This should not be a GoogleDrivePath.
deepcell_output_dir (str):
Location to save DeepCell output (as .tif)
fovs (list):
List of fovs in preprocessing pipeline. if None, all .tif files
in deepcell_input_dir will be considered as input fovs. Default: None
suffix (str):
Suffix for DeepCell output filename. e.g. for fovX, DeepCell output
should be <fovX>+suffix.tif. Default: '_feature_0'
host (str):
Hostname and port for the kiosk-frontend API server
Default: 'https://deepcell.org'
job_type (str):
Name of job workflow (multiplex, segmentation, tracking)
Default: 'multiplex'
scale (float):
Value to rescale data by
Default: 1.0
timeout (int):
Approximate seconds until timeout.
Default: 1 hour (3600)
zip_size (int):
Maximum number of files to include in zip.
Default: 100
parallel (bool):
Tries to zip, upload, and extract zip files in parallel
Default: False
Raises:
ValueError:
Raised if there is some fov X (from fovs list) s.t.
the file <deepcell_input_dir>/fovX.tif does not exist
"""Handles all of the necessary data manipulation for running deepcell tasks.
Creates .zip files (to be used as input for DeepCell),
calls run_deepcell_task method,
and extracts zipped output files to the specified output location
Args:
deepcell_input_dir (str):
Location of preprocessed files (assume deepcell_input_dir contains <fov>.tif
for each fov in fovs list). This should not be a GoogleDrivePath.
deepcell_output_dir (str):
Location to save DeepCell output (as .tif)
fovs (list):
List of fovs in preprocessing pipeline. if None, all .tif files
in deepcell_input_dir will be considered as input fovs. Default: None
suffix (str):
Suffix for DeepCell output filename. e.g. for fovX, DeepCell output
should be <fovX>+suffix.tif. Default: '_feature_0'
host (str):
Hostname and port for the kiosk-frontend API server
Default: 'https://deepcell.org'
job_type (str):
Name of job workflow (multiplex, segmentation, tracking)
Default: 'multiplex'
scale (float):
Value to rescale data by
Default: 1.0
timeout (int):
Approximate seconds until timeout.
Default: 1 hour (3600)
zip_size (int):
Maximum number of files to include in zip.
Default: 100
parallel (bool):
Tries to zip, upload, and extract zip files in parallel
Default: False
Raises:
ValueError:
Raised if there is some fov X (from fovs list) s.t.
the file <deepcell_input_dir>/fovX.tif does not exist
"""

# check that scale arg can be converted to a float
try:
scale = float(scale)
except ValueError:
raise ValueError("Scale argument must be a number")

is_drive_path = False
if type(deepcell_input_dir) is GoogleDrivePath:
warnings.warn("Consider saving preprocessed deepcell input tifs locally...", UserWarning)
is_drive_path = True

# extract all the files from deepcell_input_dir
input_files = io_utils.list_files(deepcell_input_dir, substrs=['.tif'])

Expand Down Expand Up @@ -96,8 +90,9 @@ def create_deepcell_output(deepcell_input_dir, deepcell_output_dir, fovs=None,
# i.e easier to map fov_groups
def _zip_run_extract(fov_group, group_index):
# define the location of the zip file for our fovs
zip_path = path_join(deepcell_input_dir, f'fovs_batch_{group_index + 1}.zip')
if not is_drive_path and os.path.isfile(zip_path):
zip_path = os.path.join(deepcell_input_dir, f'fovs_batch_{group_index + 1}.zip')

if os.path.isfile(zip_path):
warnings.warn(f'{zip_path} will be overwritten')

# write all files to the zip file
Expand All @@ -110,36 +105,31 @@ def zip_write(zip_path):
basename = fov + '.tif'
if basename not in input_files:
basename = basename + 'f'
filename = path_join(deepcell_input_dir, basename)
if is_drive_path:
with filename.read() as f:
zipObj.writestr(basename, f.getvalue())
else:
zipObj.write(filename, basename)

drive_write_out(zip_path, zip_write)
filename = os.path.join(deepcell_input_dir, basename)
zipObj.write(filename, basename)

zip_write(zip_path)

# pass the zip file to deepcell.org
print('Uploading files to DeepCell server.')
run_deepcell_direct(zip_path, deepcell_output_dir, host, job_type, scale, timeout)
run_deepcell_direct(
zip_path, deepcell_output_dir, host, job_type, scale, timeout
)

# extract the .tif output
print('Extracting tif files from DeepCell response.')
zip_names = io_utils.list_files(deepcell_output_dir, substrs=['.zip'])
zip_files = [path_join(deepcell_output_dir, name) for name in zip_names]
print("Extracting tif files from DeepCell response.")
zip_names = io_utils.list_files(deepcell_output_dir, substrs=[".zip"])

# sort by newest added
zip_files.sort(key=io_utils.getmtime)
zip_files = [os.path.join(deepcell_output_dir, name) for name in zip_names]

# generalize for str/filehandle input to ZipFile call
if type(deepcell_output_dir) is GoogleDrivePath:
zip_files = [zf.read() for zf in zip_files]
# sort by newest added
zip_files.sort(key=os.path.getmtime)

with ZipFile(zip_files[-1], 'r') as zipObj:
with ZipFile(zip_files[-1], "r") as zipObj:
for name in zipObj.namelist():
with DriveOpen(path_join(deepcell_output_dir, name), mode='wb') as f:
with open(os.path.join(deepcell_output_dir, name), mode='wb') as f:
f.write(zipObj.read(name))
# zipObj.extractall(deepcell_output_dir)
for fov in fov_group:
if fov + suffix + '.tif' not in zipObj.namelist():
warnings.warn(f'Deep Cell output file was not found for {fov}.')
Expand All @@ -157,36 +147,31 @@ def run_deepcell_direct(input_dir, output_dir, host='https://deepcell.org',
job_type='mesmer', scale=1.0, timeout=3600):
"""Uses direct calls to DeepCell API and saves output to output_dir.
Args:
input_dir (str):
location of .zip files
output_dir (str):
location to save deepcell output (as .zip)
host (str):
Hostname and port for the kiosk-frontend API server.
Default: 'https://deepcell.org'
job_type (str):
Name of job workflow (mesmer, segmentation, tracking).
scale (float):
Value to rescale data by
Default: 1.0
timeout (int):
Approximate seconds until timeout.
Default: 1 hour (3600)
Args:
input_dir (str):
location of .zip files
output_dir (str):
location to save deepcell output (as .zip)
host (str):
Hostname and port for the kiosk-frontend API server.
Default: 'https://deepcell.org'
job_type (str):
Name of job workflow (mesmer, segmentation, tracking).
scale (float):
Value to rescale data by
Default: 1.0
timeout (int):
Approximate seconds until timeout.
Default: 1 hour (3600)
"""

# upload zip file
upload_url = host + '/api/upload'
upload_url = host + "/api/upload"
filename = Path(input_dir).name

is_drive_path = type(input_dir) is GoogleDrivePath
filename = input_dir.filename() if is_drive_path else Path(input_dir).name

with DriveOpen(input_dir, mode='rb') as f:
with open(input_dir, mode='rb') as f:
upload_fields = {
'file': (
filename,
f.read(),
'application/zip'),
'file': (filename, f.read(), 'application/zip'),
}
f.seek(0)

Expand Down Expand Up @@ -253,7 +238,8 @@ def run_deepcell_direct(input_dir, output_dir, host='https://deepcell.org',
print(f"Encountered Failure(s): {unquote_plus(redis_response['value'][4])}")

deepcell_output = requests.get(redis_response['value'][2], allow_redirects=True)
with DriveOpen(path_join(output_dir, 'deepcell_response.zip'), mode='wb') as f:

with open(os.path.join(output_dir, "deepcell_response.zip"), mode="wb") as f:
f.write(deepcell_output.content)

# being kind and sending an expire signal to deepcell
Expand Down
Loading

0 comments on commit a84cf4d

Please sign in to comment.