edits from review

d33bs · Dec 9, 2022 · a073b94 · a073b94
1 parent 0d0b2cb
commit a073b94
Show file tree

Hide file tree

Showing 4 changed files with 55 additions and 48 deletions.
diff --git a/0_download_data/README.md b/0_download_data/README.md
@@ -27,7 +27,7 @@ Number of images:
 
 - 384 images (New dataset)
 
-Total memory: 
+Total storage size: 
 
 - 212.9 MB (Pilot dataset)
 
@@ -61,7 +61,7 @@ Both the Actin and GFP channel images are all `.tif` files.
 
 ## Solution
 
-This led to the need for a solution to correct the second plate dataset to reflect the standard from the pilot dataset. 
+This led to the need to correct the second plate dataset to reflect standards from the pilot dataset. 
 The corrections include using CellProfiler and Python.
 
 In CellProfiler, we split the RGB images into three greyscale images (called red, green, and blue), taking the one of the three images that is connected to the channel (e.g. DAPI is the blue channel, GFP is green, RFP is red), and cropping the images to remove the scale.
@@ -78,20 +78,20 @@ Lastly, using Python, we created a function to reorder the file names and add me
 ### Step 1: Create conda environment
 
 ```sh
-# Run this command to create the conda environment for NF1 segmentation
+# Run this command to create the conda environment 
 conda env create -f 0.download_NF1_data.yml
 ```
 
 ### Step 2: Activate conda environment
 
 ```sh
-# Run this command to create the conda environment for NF1 segmentation
+# Run this command to create the conda environment 
 conda activate download-NF1-data
 ```
 
 ### Step 3: Execute preprocessing NF1 data
 
-```bash
-# Run this script in terminal to segment NF1 data
+```sh
+# Run this script in terminal
 bash 1.preprocessing_data.sh
 ```
diff --git a/0_download_data/correct_images.ipynb b/0_download_data/correct_images.ipynb
@@ -14,8 +14,7 @@
    "outputs": [],
    "source": [
     "import pathlib\n",
-    "import importlib\n",
-    "correct = importlib.import_module(\"correctionutils\")\n"
+    "import correctionutils as correct\n"
    ]
   },
   {

diff --git a/0_download_data/correctionutils.py b/0_download_data/correctionutils.py
@@ -22,7 +22,7 @@ def correct_images(path_to_pipeline: str, path_to_output: str, path_to_images: s
         command = f"cellprofiler -c -r -p {path_to_pipeline} -o {path_to_output} -i {path_to_images}"
         os.system(command)
     else:
-        print("This plate has already been ran through CellProfiler!")
+        print("This plate has already been processed by CellProfiler!")
 
 
 def rename_images(path_to_images: pathlib.Path, output_folder_name: str):
@@ -34,13 +34,15 @@ def rename_images(path_to_images: pathlib.Path, output_folder_name: str):
     path_to_images : pathlib.Path
         path to where the corrected images from CellProfiler are located
     output_folder_name:
-        name of the folder with the outputed
+        name of the folder with the outputted images
     """
     original_prefix = ("DAPI", "GFP", "Actin")
 
+    # if any of the image names start with the channel name, then that means it needs to be reordered
     if any(
         images.name.startswith(original_prefix) for images in path_to_images.iterdir()
     ):
+        # this splits the image names into individual metadata and reorders them to fit standard (as well as adding metadata)
         for images in path_to_images.iterdir():
             image_names = images.name.split("_")
             well = image_names[1]
@@ -52,14 +54,16 @@ def rename_images(path_to_images: pathlib.Path, output_folder_name: str):
             )
             Path(images).rename(Path(new_image_name))
             print("The image names are reordered")
+    # if the images do not have the prefix, then likely the image has already been processed so it should not be processed again
+    # to avoid issues (e.g. wrongful reordering, deleting images, etc.)
     else:
         print("This plate has already been reordered!")
 
     dapi_id = "DAPI"
     gfp_id = "GFP"
     actin_id = "Actin"
 
-    keywords = (
+    channel_number_added = (
         "01_1_1",
         "01_1_2",
         "01_1_3",
@@ -74,41 +78,45 @@ def rename_images(path_to_images: pathlib.Path, output_folder_name: str):
         "01_3_4",
     )
 
+    # if any of the images contain the string (e.g. contains the channel number), then the images should be left alone
     for images in path_to_images.iterdir():
-        if any(x in str(images) for x in keywords):
+        if any(x in str(images) for x in channel_number_added):
             print("The metadata for this plate has already been corrected!")
             break
-        else:
-            if dapi_id in str(images):
-                DAPI_name = images.name.split("_")
-                well = DAPI_name[0]
-                site = DAPI_name[2]
-                channel_name = DAPI_name[3]
-                plate = DAPI_name[4].split(".")[0]
-
-                new_DAPI_name = f"{output_folder_name}/{well}_01_1_{site}_{channel_name}_{plate}.tif"
-                Path(images).rename(Path(new_DAPI_name))
-                print(f"DAPI {well}_{site} image has been renamed")
-
-            if gfp_id in str(images):
-                GFP_name = images.name.split("_")
-                well = GFP_name[0]
-                site = GFP_name[2]
-                channel_name = GFP_name[3]
-                plate = GFP_name[4].split(".")[0]
-
-                new_GFP_name = f"{output_folder_name}/{well}_01_2_{site}_{channel_name}_{plate}.tif"
-                Path(images).rename(Path(new_GFP_name))
-                print(f"GFP {well}_{site} image has been renamed")
-
-            if actin_id in str(images):
-                Actin_name = images.name.split("_")
-                well = Actin_name[0]
-                site = Actin_name[2]
-                # replace Actin with RFP to keep metadata consistent between datasets
-                channel_name = "RFP"
-                plate = Actin_name[4].split(".")[0]
-
-                new_Actin_name = f"{output_folder_name}/{well}_01_3_{site}_{channel_name}_{plate}.tif"
-                Path(images).rename(Path(new_Actin_name))
-                print(f"Actin (now RFP) {well}_{site} image has been renamed")
+
+        # this will correct the images with DAPI in the name and give the correct channel number
+        if dapi_id in str(images):
+            DAPI_name = images.name.split("_")
+            well = DAPI_name[0]
+            site = DAPI_name[2]
+            channel_name = DAPI_name[3]
+            plate = DAPI_name[4].split(".")[0]
+
+            new_DAPI_name = f"{output_folder_name}/{well}_01_1_{site}_{channel_name}_{plate}.tif"
+            Path(images).rename(Path(new_DAPI_name))
+            print(f"DAPI {well}_{site} image has been renamed")
+
+        # this will correct the images with GFP in the name and give the correct channel number
+        if gfp_id in str(images):
+            GFP_name = images.name.split("_")
+            well = GFP_name[0]
+            site = GFP_name[2]
+            channel_name = GFP_name[3]
+            plate = GFP_name[4].split(".")[0]
+
+            new_GFP_name = f"{output_folder_name}/{well}_01_2_{site}_{channel_name}_{plate}.tif"
+            Path(images).rename(Path(new_GFP_name))
+            print(f"GFP {well}_{site} image has been renamed")
+
+        # this will correct the images with Actin in the name and give the correct channel number as well
+        if actin_id in str(images):
+            Actin_name = images.name.split("_")
+            well = Actin_name[0]
+            site = Actin_name[2]
+            # replace Actin with RFP to keep metadata consistent between datasets
+            channel_name = "RFP"
+            plate = Actin_name[4].split(".")[0]
+
+            new_Actin_name = f"{output_folder_name}/{well}_01_3_{site}_{channel_name}_{plate}.tif"
+            Path(images).rename(Path(new_Actin_name))
+            print(f"Actin (now RFP) {well}_{site} image has been renamed")
diff --git a/2_segmenting_data/Segmentation_Pipeline.ipynb b/2_segmenting_data/Segmentation_Pipeline.ipynb
@@ -178,7 +178,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.8.13 ('download-NF1-data')",
+   "display_name": "Python 3.8.13 ('2.segment-NF1-data')",
    "language": "python",
    "name": "python3"
   },
@@ -197,7 +197,7 @@
   "orig_nbformat": 4,
   "vscode": {
    "interpreter": {
-    "hash": "6f614b090b7dcc5417cc8c5d447b53061ddddd5f43fe38156693013cfafb65c9"
+    "hash": "e2880e5773ed34dc765efa6cd049c8e5f7cdd6d2179ba47ba1df48378009cd11"
    }
   }
  },