<a href="https://colab.research.google.com/github/dnaae/minor-internship/blob/FISH-QUANT-pipeline/Coordinates_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install big-fish

In [None]:
import os
import pandas as pd
import numpy as np
import bigfish
import bigfish.stack as stack
import bigfish.classification as classification
import bigfish.plot as plot
from google.colab import drive
import re
print("Big-FISH version: {0}".format(bigfish.__version__))

In [None]:
# Function to mount Google Drive and select a folder
def choose_folder_colab():
    drive.mount('/content/drive')
    drive_folder = "/content/drive/MyDrive/"
    contents = os.listdir(drive_folder)

    print("Contents of your Google Drive:")
    for i, item in enumerate(contents):
        print(f"{i + 1}: {item}")

    while True:
        choice = input("Enter the number of the folder you want to select (q to quit): ")
        if choice.lower() == 'q':
            return None
        try:
            choice = int(choice)
            if 1 <= choice <= len(contents):
                selected_folder = os.path.join(drive_folder, contents[choice - 1])
                return selected_folder
            else:
                print("Invalid choice. Please try again or enter 'q' to quit.")
        except (ValueError, KeyboardInterrupt):
            print("Invalid input. Please try again or enter 'q' to quit.")

In [None]:
def distance_feature_names():
    feature_names =  classification.get_features_name(names_features_distance=True)
    return feature_names

In [None]:
def intranuclear_feature_names():
  feature_names=classification.get_features_name(names_features_intranuclear=True)
  return feature_names

In [None]:
def protrusion_feature_names():
  feature_names = classification.get_features_name(names_features_protrusion=True)
  return feature_names

In [None]:
def dispersion_feature_names():
  feature_names = classification.get_features_name(names_features_dispersion=True)
  return feature_names

In [None]:
def topographic_feature_names():
  feature_names = classification.get_features_name(names_features_topography=True)
  return feature_names

In [None]:
def foci_feature_names():
  feature_names = classification.get_features_name(names_features_foci=True)
  return feature_names

In [None]:
def area_feature_names():
  feature_names = classification.get_features_name(names_features_area=True)
  return feature_names

In [None]:
def centrosomal_feature_names():
  feature_names = classification.get_features_name(names_features_centrosome=True)
  return feature_names

In [None]:
# compute features
def compute_features(cell_mask, nuc_mask, rna_coord, smfish, foci_coord):
  features, features_names = classification.compute_features(
      cell_mask, nuc_mask, ndim=3, rna_coord=rna_coord,
      smfish=smfish, voxel_size_yx=103,
      foci_coord=foci_coord,
      centrosome_coord=None,
      compute_distance=True,
      compute_intranuclear=True,
      compute_protrusion=True,
      compute_dispersion=True,
      compute_topography=True,
      compute_foci=True,
      compute_area=True,
      return_names=True)
  for feature, feature_name in zip(features, features_names):
      print("{0:40} {1:0.2f}".format(feature_name + ":", feature))


In [None]:
def extract_number_from_filename(filename):
    match = re.search(r'\d{3}', filename)
    if match:
        return int(match.group())
    else:
        raise ValueError(f"No 3-digit number found in the filename: {filename}")


In [None]:
def process_images_interactively(path_output):
    output_folder = os.path.join(path_output, 'output', 'extraction_results_npz_notebook6')
    os.chdir(output_folder)

    print(f"Selected folder: {output_folder}")

    # Check if NPZ files are present in the current directory
    npz_files = [f for f in os.listdir() if f.lower().endswith('.npz')]
    print(f"NPZ files found: {npz_files}")

    if not npz_files:
        print("No NPZ files found in the current directory. Make sure you are in the correct directory.")
        return

    dataframes = []

    for npz_file in npz_files:
        filename = os.path.join(output_folder, npz_file)

        try:
            # Extract cell index using the function
            cell_index = extract_number_from_filename(npz_file)

            # load single cell data
            path = os.path.join(path_output, filename)
            data = stack.read_cell_extracted(path)
            cell_mask = data["cell_mask"]
            nuc_mask = data["nuc_mask"]
            rna_coord = data["rna_coord"]
            foci_coord = data["foci"]
            smfish = data["smfish"]

            # compute features
            features, features_names = classification.compute_features(
                cell_mask, nuc_mask, ndim=3, rna_coord=rna_coord,
                smfish=smfish, voxel_size_yx=103,
                foci_coord=foci_coord,
                centrosome_coord=None,
                compute_distance=True,
                compute_intranuclear=True,
                compute_protrusion=True,
                compute_dispersion=True,
                compute_topography=True,
                compute_foci=True,
                compute_area=True,
                return_names=True
            )

            # build dataframe
            features = features.reshape((1, -1))
            df_cell = pd.DataFrame(data=features, columns=features_names)

            # Add the cell index column
            df_cell['cell_index'] = cell_index

            dataframes.append(df_cell)

        except Exception as e:
            print(f"Error processing {npz_file}: {e}")

    # Define treatment name to be assigned in the final DataFrame
    treatment_parts = top_level_folder.split("_")

    # Concatenate DataFrames
    if dataframes:
        df = pd.concat(dataframes, ignore_index=True)

        # Set the cell index as the index of the DataFrame
        df.set_index('cell_index', inplace=True)

        # Save the concatenated DataFrame to a new CSV file
        csv_filename = os.path.join(path_output, f'output_dataframe_{treatment_parts}.csv')
        df.to_csv(csv_filename, index=True)  # Change index to True to include the cell_index column
        print(f"Final DataFrame saved to: {csv_filename}")

if __name__ == "__main__":
    # Allow the user to choose a folder interactively
    path_input = choose_folder_colab()

    # Extract the name of the top-level folder
    top_level_folder = os.path.basename(os.path.normpath(path_input))

    if path_input:
        # The user selected the treatment folder, so guide them in the "output" subfolder
        process_images_interactively(path_input)