updating code

d33bs · Dec 12, 2022 · c6bf525 · c6bf525
1 parent ec3321c
commit c6bf525
Show file tree

Hide file tree

Showing 4 changed files with 51 additions and 75 deletions.
diff --git a/0_download_data/0.download_NF1_data.yml b/0_download_data/0.download_NF1_data.yml
@@ -4,7 +4,7 @@ channels:
 dependencies:
   - conda-forge::python=3.8
   - conda-forge::scikit-image=0.19.2
-  - conda-forge::pandas=1.4.4
+  - conda-forge::pandas
   - conda-forge::matplotlib=3.5.2
   - conda-forge::jupyter=1.0.0
   - conda-forge::jupyterlab

diff --git a/0_download_data/README.md b/0_download_data/README.md
@@ -34,7 +34,9 @@ Total storage size:
 - 204.2 MB (New dataset - post conversion and cropping)
 ```
 
-## File Name Structure
+## Standard Metadata Name Structure
+
+The standard metadata structure for this project is based on the pilot dataset.
 
 ![NF1 Pilot Data Metadata](example_images/NF1_Pilot_Data_Metadata.png "NF1 Pilot Data Metadata")
 

diff --git a/0_download_data/correct_images.ipynb b/0_download_data/correct_images.ipynb
@@ -14,7 +14,7 @@
    "outputs": [],
    "source": [
     "import pathlib\n",
-    "import correctionutils as correct\n"
+    "import correctionutils as correct"
    ]
   },
   {
@@ -37,9 +37,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "path_to_pipeline = \"/home/jenna/NF1_SchwannCell_data/0_download_data/convert_crop_NF1_images.cppipe\"\n",
-    "path_to_output = \"/home/jenna/NF1_SchwannCell_data/0_download_data/NF1_Second_Plate_Corrected\"\n",
-    "path_to_images = \"/home/jenna/NF1_SchwannCell_data/0_download_data/NF1_Second_Plate\""
+    "path_to_pipeline = \"convert_crop_NF1_images.cppipe\"\n",
+    "path_to_output = \"NF1_test\"\n",
+    "path_to_images = \"NF1_Second_Plate\""
    ]
   },
   {
@@ -55,10 +55,10 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
+     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "This plate has already been ran through CellProfiler!\n"
+      "WARNING:root:This plate has already been processed by CellProfiler!\n"
      ]
     }
    ],
@@ -72,17 +72,19 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
+     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "This plate has already been reordered!\n",
-      "The metadata for this plate has already been corrected!\n"
+      "WARNING:root:This plate has already been reordered!\n",
+      "ERROR:root:The length is 6. Please review your images\n",
+      "INFO:root:Images were renamed\n"
      ]
     }
    ],
    "source": [
     "images_path = pathlib.Path(path_to_output)\n",
-    "output_folder_name = 'NF1_Second_Plate_Corrected'\n",
+    "# output_folder_name = 'NF1_Second_Plate_Corrected'\n",
+    "output_folder_name = \"NF1_test\"\n",
     "\n",
     "correct.rename_images(images_path, output_folder_name)"
    ]

diff --git a/0_download_data/correctionutils.py b/0_download_data/correctionutils.py
@@ -5,6 +5,9 @@
 import os
 import pathlib
 from pathlib import Path
+import logging
+
+logging.basicConfig(level=logging.INFO)
 
 
 def correct_images(path_to_pipeline: str, path_to_output: str, path_to_images: str):
@@ -22,7 +25,7 @@ def correct_images(path_to_pipeline: str, path_to_output: str, path_to_images: s
         command = f"cellprofiler -c -r -p {path_to_pipeline} -o {path_to_output} -i {path_to_images}"
         os.system(command)
     else:
-        print("This plate has already been processed by CellProfiler!")
+        logging.warn("This plate has already been processed by CellProfiler!")
 
 
 def rename_images(path_to_images: pathlib.Path, output_folder_name: str):
@@ -53,70 +56,39 @@ def rename_images(path_to_images: pathlib.Path, output_folder_name: str):
                 f"{output_folder_name}/{well}_01_{site}_{channel_name}_{plate}.tif"
             )
             Path(images).rename(Path(new_image_name))
-            print("The image names are reordered")
+
+        logging.info("The image names are reordered")
+
     # if the images do not have the prefix, then likely the image has already been processed so it should not be processed again
     # to avoid issues (e.g. wrongful reordering, deleting images, etc.)
     else:
-        print("This plate has already been reordered!")
-
-    dapi_id = "DAPI"
-    gfp_id = "GFP"
-    actin_id = "Actin"
-
-    channel_number_added = (
-        "01_1_1",
-        "01_1_2",
-        "01_1_3",
-        "01_1_4",
-        "01_2_1",
-        "01_2_2",
-        "01_2_3",
-        "01_2_4",
-        "01_3_1",
-        "01_3_2",
-        "01_3_3",
-        "01_3_4",
-    )
-
-    # if any of the images contain the string (e.g. contains the channel number), then the images should be left alone
-    for images in path_to_images.iterdir():
-        if any(x in str(images) for x in channel_number_added):
-            print("The metadata for this plate has already been corrected!")
+        logging.warn("This plate has already been reordered!")
+
+    channel_id_dict = {
+        "DAPI": {"id": "01_1"},
+        "GFP": {"id": "01_2"},
+        "Actin": {"id": "01_3", "channel_name": "RFP"},
+    }
+
+    for image in path_to_images.iterdir():
+        # checks if images have the correct length and if not do not proceed with renaming 
+        if len(image.name.split("_")) != 4:
+            logging.error(f"The length is {len(image.name.split('_'))}")
+            # ADD EXCEPTION
             break
 
-        # this will correct the images with DAPI in the name and give the correct channel number
-        if dapi_id in str(images):
-            DAPI_name = images.name.split("_")
-            well = DAPI_name[0]
-            site = DAPI_name[2]
-            channel_name = DAPI_name[3]
-            plate = DAPI_name[4].split(".")[0]
-
-            new_DAPI_name = f"{output_folder_name}/{well}_01_1_{site}_{channel_name}_{plate}.tif"
-            Path(images).rename(Path(new_DAPI_name))
-            print(f"DAPI {well}_{site} image has been renamed")
-
-        # this will correct the images with GFP in the name and give the correct channel number
-        if gfp_id in str(images):
-            GFP_name = images.name.split("_")
-            well = GFP_name[0]
-            site = GFP_name[2]
-            channel_name = GFP_name[3]
-            plate = GFP_name[4].split(".")[0]
-
-            new_GFP_name = f"{output_folder_name}/{well}_01_2_{site}_{channel_name}_{plate}.tif"
-            Path(images).rename(Path(new_GFP_name))
-            print(f"GFP {well}_{site} image has been renamed")
-
-        # this will correct the images with Actin in the name and give the correct channel number as well
-        if actin_id in str(images):
-            Actin_name = images.name.split("_")
-            well = Actin_name[0]
-            site = Actin_name[2]
-            # replace Actin with RFP to keep metadata consistent between datasets
-            channel_name = "RFP"
-            plate = Actin_name[4].split(".")[0]
-
-            new_Actin_name = f"{output_folder_name}/{well}_01_3_{site}_{channel_name}_{plate}.tif"
-            Path(images).rename(Path(new_Actin_name))
-            print(f"Actin (now RFP) {well}_{site} image has been renamed")
+        # goes through keys within dictionary
+        for channel in channel_id_dict.keys():
+            channel_id = channel_id_dict[channel]["id"]
+            if "channel_name" in channel_id_dict[channel].keys():
+                channel_name = channel_id_dict[channel]["channel_name"]
+            else:
+                channel_name = channel
+
+            if channel in str(image):
+                well, _, site, _, plate = image.name.split("_")
+                plate = plate.split(".")[0]
+
+                new_channel_name = f"{output_folder_name}/{well}_{channel_id}_{site}_{channel_name}_{plate}.tif"
+                Path(image).rename(Path(new_channel_name))
+