Skip to content

Commit

Permalink
edits from review
Browse files Browse the repository at this point in the history
  • Loading branch information
jenna-tomkinson committed Dec 9, 2022
1 parent 0d0b2cb commit a073b94
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 48 deletions.
12 changes: 6 additions & 6 deletions 0_download_data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Number of images:
- 384 images (New dataset)
Total memory:
Total storage size:
- 212.9 MB (Pilot dataset)
Expand Down Expand Up @@ -61,7 +61,7 @@ Both the Actin and GFP channel images are all `.tif` files.

## Solution

This led to the need for a solution to correct the second plate dataset to reflect the standard from the pilot dataset.
This led to the need to correct the second plate dataset to reflect standards from the pilot dataset.
The corrections include using CellProfiler and Python.

In CellProfiler, we split the RGB images into three greyscale images (called red, green, and blue), taking the one of the three images that is connected to the channel (e.g. DAPI is the blue channel, GFP is green, RFP is red), and cropping the images to remove the scale.
Expand All @@ -78,20 +78,20 @@ Lastly, using Python, we created a function to reorder the file names and add me
### Step 1: Create conda environment

```sh
# Run this command to create the conda environment for NF1 segmentation
# Run this command to create the conda environment
conda env create -f 0.download_NF1_data.yml
```

### Step 2: Activate conda environment

```sh
# Run this command to create the conda environment for NF1 segmentation
# Run this command to create the conda environment
conda activate download-NF1-data
```

### Step 3: Execute preprocessing NF1 data

```bash
# Run this script in terminal to segment NF1 data
```sh
# Run this script in terminal
bash 1.preprocessing_data.sh
```
3 changes: 1 addition & 2 deletions 0_download_data/correct_images.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
"outputs": [],
"source": [
"import pathlib\n",
"import importlib\n",
"correct = importlib.import_module(\"correctionutils\")\n"
"import correctionutils as correct\n"
]
},
{
Expand Down
84 changes: 46 additions & 38 deletions 0_download_data/correctionutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def correct_images(path_to_pipeline: str, path_to_output: str, path_to_images: s
command = f"cellprofiler -c -r -p {path_to_pipeline} -o {path_to_output} -i {path_to_images}"
os.system(command)
else:
print("This plate has already been ran through CellProfiler!")
print("This plate has already been processed by CellProfiler!")


def rename_images(path_to_images: pathlib.Path, output_folder_name: str):
Expand All @@ -34,13 +34,15 @@ def rename_images(path_to_images: pathlib.Path, output_folder_name: str):
path_to_images : pathlib.Path
path to where the corrected images from CellProfiler are located
output_folder_name:
name of the folder with the outputed
name of the folder with the outputted images
"""
original_prefix = ("DAPI", "GFP", "Actin")

# if any of the image names start with the channel name, then that means it needs to be reordered
if any(
images.name.startswith(original_prefix) for images in path_to_images.iterdir()
):
# this splits the image names into individual metadata and reorders them to fit standard (as well as adding metadata)
for images in path_to_images.iterdir():
image_names = images.name.split("_")
well = image_names[1]
Expand All @@ -52,14 +54,16 @@ def rename_images(path_to_images: pathlib.Path, output_folder_name: str):
)
Path(images).rename(Path(new_image_name))
print("The image names are reordered")
# if the images do not have the prefix, then likely the image has already been processed so it should not be processed again
# to avoid issues (e.g. wrongful reordering, deleting images, etc.)
else:
print("This plate has already been reordered!")

dapi_id = "DAPI"
gfp_id = "GFP"
actin_id = "Actin"

keywords = (
channel_number_added = (
"01_1_1",
"01_1_2",
"01_1_3",
Expand All @@ -74,41 +78,45 @@ def rename_images(path_to_images: pathlib.Path, output_folder_name: str):
"01_3_4",
)

# if any of the images contain the string (e.g. contains the channel number), then the images should be left alone
for images in path_to_images.iterdir():
if any(x in str(images) for x in keywords):
if any(x in str(images) for x in channel_number_added):
print("The metadata for this plate has already been corrected!")
break
else:
if dapi_id in str(images):
DAPI_name = images.name.split("_")
well = DAPI_name[0]
site = DAPI_name[2]
channel_name = DAPI_name[3]
plate = DAPI_name[4].split(".")[0]

new_DAPI_name = f"{output_folder_name}/{well}_01_1_{site}_{channel_name}_{plate}.tif"
Path(images).rename(Path(new_DAPI_name))
print(f"DAPI {well}_{site} image has been renamed")

if gfp_id in str(images):
GFP_name = images.name.split("_")
well = GFP_name[0]
site = GFP_name[2]
channel_name = GFP_name[3]
plate = GFP_name[4].split(".")[0]

new_GFP_name = f"{output_folder_name}/{well}_01_2_{site}_{channel_name}_{plate}.tif"
Path(images).rename(Path(new_GFP_name))
print(f"GFP {well}_{site} image has been renamed")

if actin_id in str(images):
Actin_name = images.name.split("_")
well = Actin_name[0]
site = Actin_name[2]
# replace Actin with RFP to keep metadata consistent between datasets
channel_name = "RFP"
plate = Actin_name[4].split(".")[0]

new_Actin_name = f"{output_folder_name}/{well}_01_3_{site}_{channel_name}_{plate}.tif"
Path(images).rename(Path(new_Actin_name))
print(f"Actin (now RFP) {well}_{site} image has been renamed")

# this will correct the images with DAPI in the name and give the correct channel number
if dapi_id in str(images):
DAPI_name = images.name.split("_")
well = DAPI_name[0]
site = DAPI_name[2]
channel_name = DAPI_name[3]
plate = DAPI_name[4].split(".")[0]

new_DAPI_name = f"{output_folder_name}/{well}_01_1_{site}_{channel_name}_{plate}.tif"
Path(images).rename(Path(new_DAPI_name))
print(f"DAPI {well}_{site} image has been renamed")

# this will correct the images with GFP in the name and give the correct channel number
if gfp_id in str(images):
GFP_name = images.name.split("_")
well = GFP_name[0]
site = GFP_name[2]
channel_name = GFP_name[3]
plate = GFP_name[4].split(".")[0]

new_GFP_name = f"{output_folder_name}/{well}_01_2_{site}_{channel_name}_{plate}.tif"
Path(images).rename(Path(new_GFP_name))
print(f"GFP {well}_{site} image has been renamed")

# this will correct the images with Actin in the name and give the correct channel number as well
if actin_id in str(images):
Actin_name = images.name.split("_")
well = Actin_name[0]
site = Actin_name[2]
# replace Actin with RFP to keep metadata consistent between datasets
channel_name = "RFP"
plate = Actin_name[4].split(".")[0]

new_Actin_name = f"{output_folder_name}/{well}_01_3_{site}_{channel_name}_{plate}.tif"
Path(images).rename(Path(new_Actin_name))
print(f"Actin (now RFP) {well}_{site} image has been renamed")
4 changes: 2 additions & 2 deletions 2_segmenting_data/Segmentation_Pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.13 ('download-NF1-data')",
"display_name": "Python 3.8.13 ('2.segment-NF1-data')",
"language": "python",
"name": "python3"
},
Expand All @@ -197,7 +197,7 @@
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "6f614b090b7dcc5417cc8c5d447b53061ddddd5f43fe38156693013cfafb65c9"
"hash": "e2880e5773ed34dc765efa6cd049c8e5f7cdd6d2179ba47ba1df48378009cd11"
}
}
},
Expand Down

0 comments on commit a073b94

Please sign in to comment.