Skip to content

Commit

Permalink
updating code
Browse files Browse the repository at this point in the history
  • Loading branch information
jenna-tomkinson committed Dec 12, 2022
1 parent ec3321c commit c6bf525
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 75 deletions.
2 changes: 1 addition & 1 deletion 0_download_data/0.download_NF1_data.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels:
dependencies:
- conda-forge::python=3.8
- conda-forge::scikit-image=0.19.2
- conda-forge::pandas=1.4.4
- conda-forge::pandas
- conda-forge::matplotlib=3.5.2
- conda-forge::jupyter=1.0.0
- conda-forge::jupyterlab
Expand Down
4 changes: 3 additions & 1 deletion 0_download_data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ Total storage size:
- 204.2 MB (New dataset - post conversion and cropping)
```

## File Name Structure
## Standard Metadata Name Structure

The standard metadata structure for this project is based on the pilot dataset.

![NF1 Pilot Data Metadata](example_images/NF1_Pilot_Data_Metadata.png "NF1 Pilot Data Metadata")

Expand Down
22 changes: 12 additions & 10 deletions 0_download_data/correct_images.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"outputs": [],
"source": [
"import pathlib\n",
"import correctionutils as correct\n"
"import correctionutils as correct"
]
},
{
Expand All @@ -37,9 +37,9 @@
"metadata": {},
"outputs": [],
"source": [
"path_to_pipeline = \"/home/jenna/NF1_SchwannCell_data/0_download_data/convert_crop_NF1_images.cppipe\"\n",
"path_to_output = \"/home/jenna/NF1_SchwannCell_data/0_download_data/NF1_Second_Plate_Corrected\"\n",
"path_to_images = \"/home/jenna/NF1_SchwannCell_data/0_download_data/NF1_Second_Plate\""
"path_to_pipeline = \"convert_crop_NF1_images.cppipe\"\n",
"path_to_output = \"NF1_test\"\n",
"path_to_images = \"NF1_Second_Plate\""
]
},
{
Expand All @@ -55,10 +55,10 @@
"metadata": {},
"outputs": [
{
"name": "stdout",
"name": "stderr",
"output_type": "stream",
"text": [
"This plate has already been ran through CellProfiler!\n"
"WARNING:root:This plate has already been processed by CellProfiler!\n"
]
}
],
Expand All @@ -72,17 +72,19 @@
"metadata": {},
"outputs": [
{
"name": "stdout",
"name": "stderr",
"output_type": "stream",
"text": [
"This plate has already been reordered!\n",
"The metadata for this plate has already been corrected!\n"
"WARNING:root:This plate has already been reordered!\n",
"ERROR:root:The length is 6. Please review your images\n",
"INFO:root:Images were renamed\n"
]
}
],
"source": [
"images_path = pathlib.Path(path_to_output)\n",
"output_folder_name = 'NF1_Second_Plate_Corrected'\n",
"# output_folder_name = 'NF1_Second_Plate_Corrected'\n",
"output_folder_name = \"NF1_test\"\n",
"\n",
"correct.rename_images(images_path, output_folder_name)"
]
Expand Down
98 changes: 35 additions & 63 deletions 0_download_data/correctionutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import os
import pathlib
from pathlib import Path
import logging

logging.basicConfig(level=logging.INFO)


def correct_images(path_to_pipeline: str, path_to_output: str, path_to_images: str):
Expand All @@ -22,7 +25,7 @@ def correct_images(path_to_pipeline: str, path_to_output: str, path_to_images: s
command = f"cellprofiler -c -r -p {path_to_pipeline} -o {path_to_output} -i {path_to_images}"
os.system(command)
else:
print("This plate has already been processed by CellProfiler!")
logging.warn("This plate has already been processed by CellProfiler!")


def rename_images(path_to_images: pathlib.Path, output_folder_name: str):
Expand Down Expand Up @@ -53,70 +56,39 @@ def rename_images(path_to_images: pathlib.Path, output_folder_name: str):
f"{output_folder_name}/{well}_01_{site}_{channel_name}_{plate}.tif"
)
Path(images).rename(Path(new_image_name))
print("The image names are reordered")

logging.info("The image names are reordered")

# if the images do not have the prefix, then likely the image has already been processed so it should not be processed again
# to avoid issues (e.g. wrongful reordering, deleting images, etc.)
else:
print("This plate has already been reordered!")

dapi_id = "DAPI"
gfp_id = "GFP"
actin_id = "Actin"

channel_number_added = (
"01_1_1",
"01_1_2",
"01_1_3",
"01_1_4",
"01_2_1",
"01_2_2",
"01_2_3",
"01_2_4",
"01_3_1",
"01_3_2",
"01_3_3",
"01_3_4",
)

# if any of the images contain the string (e.g. contains the channel number), then the images should be left alone
for images in path_to_images.iterdir():
if any(x in str(images) for x in channel_number_added):
print("The metadata for this plate has already been corrected!")
logging.warn("This plate has already been reordered!")

channel_id_dict = {
"DAPI": {"id": "01_1"},
"GFP": {"id": "01_2"},
"Actin": {"id": "01_3", "channel_name": "RFP"},
}

for image in path_to_images.iterdir():
# checks if images have the correct length and if not do not proceed with renaming
if len(image.name.split("_")) != 4:
logging.error(f"The length is {len(image.name.split('_'))}")
# ADD EXCEPTION
break

# this will correct the images with DAPI in the name and give the correct channel number
if dapi_id in str(images):
DAPI_name = images.name.split("_")
well = DAPI_name[0]
site = DAPI_name[2]
channel_name = DAPI_name[3]
plate = DAPI_name[4].split(".")[0]

new_DAPI_name = f"{output_folder_name}/{well}_01_1_{site}_{channel_name}_{plate}.tif"
Path(images).rename(Path(new_DAPI_name))
print(f"DAPI {well}_{site} image has been renamed")

# this will correct the images with GFP in the name and give the correct channel number
if gfp_id in str(images):
GFP_name = images.name.split("_")
well = GFP_name[0]
site = GFP_name[2]
channel_name = GFP_name[3]
plate = GFP_name[4].split(".")[0]

new_GFP_name = f"{output_folder_name}/{well}_01_2_{site}_{channel_name}_{plate}.tif"
Path(images).rename(Path(new_GFP_name))
print(f"GFP {well}_{site} image has been renamed")

# this will correct the images with Actin in the name and give the correct channel number as well
if actin_id in str(images):
Actin_name = images.name.split("_")
well = Actin_name[0]
site = Actin_name[2]
# replace Actin with RFP to keep metadata consistent between datasets
channel_name = "RFP"
plate = Actin_name[4].split(".")[0]

new_Actin_name = f"{output_folder_name}/{well}_01_3_{site}_{channel_name}_{plate}.tif"
Path(images).rename(Path(new_Actin_name))
print(f"Actin (now RFP) {well}_{site} image has been renamed")
# goes through keys within dictionary
for channel in channel_id_dict.keys():
channel_id = channel_id_dict[channel]["id"]
if "channel_name" in channel_id_dict[channel].keys():
channel_name = channel_id_dict[channel]["channel_name"]
else:
channel_name = channel

if channel in str(image):
well, _, site, _, plate = image.name.split("_")
plate = plate.split(".")[0]

new_channel_name = f"{output_folder_name}/{well}_{channel_id}_{site}_{channel_name}_{plate}.tif"
Path(image).rename(Path(new_channel_name))

0 comments on commit c6bf525

Please sign in to comment.