# Detection of Axonal Fragments
---

As part of Mayoral lab investigation of oligodendrocyte precursor cell influence on neurodegenertation, have fluroescent images with channels:

- GFP (sparse axonal labeling)
- DegenoTag
- Myelin staining
- DAPI

This programs goal is to develop an automated pipeline to detect and measure axonal fragments.


In [63]:
#%% Import block

# System
import os
import os.path
from datetime import datetime


# Plotting
import matplotlib.pyplot as plt 


# Image handling
import tifffile as tf 
import imageio as img # tiff writing
import czifile
from tifffile import imsave, imread, imwrite
from enum import Enum


# Numerical and statistics
import seaborn as sb
import pandas as pd 
import numpy as np
from PIL import Image
import math
import csv



from scipy.signal import find_peaks, peak_prominences
from scipy.ndimage import gaussian_filter

# Image analysis
import cv2 as cv
import skimage as sk
from skimage import io, morphology
from skimage.color import gray2rgb
from skimage import data
from skimage import img_as_float
from skimage.morphology import reconstruction

In [30]:
'''
This cell determines where the matplotlib will show up. 
The value 1 will show plots on a separate screen whereas a value of 0 will show plots in the Jupyter notebook.
'''
if 1:
    %matplotlib qt
    plt.ion()
else:
    %matplotlib inline
    plt.ion()


# Pre-processing stage
### The cell below until the next markdown cells will prepare the correct directories and files to feed into the automated pipeline.
 This section will do the following:
 - Convert CZI files to TIFF
 - Read the images into respective image arrays
 - Crop the image arrays to the respective region of interest


In [31]:
''' 
This cell takes in a particular input directory and output directory to convert the czi files to tiff files. 
You can change the main directory to a specific directory which contains your data. If you do not have a directory within 
your working directory please create the following directories with the following file format. You do not have to use the following
naming conventions but would require refactoring the code:

->current working directory
    -> data
        -> channels_directory
        -> cropped_images_directory
        -> czi_images_directory
        -> tiff_images_directory

Upload your .czi files into the czi_images_directory and begin running this cell
'''

main_directory = "data"
input_directory = os.path.join(main_directory, "czi_images_directory")
output_directory = os.path.join(main_directory, "tiff_images_directory")

# Check if input directory exists, create if not
if not os.path.exists(input_directory):
    print("Output Directory not found, creating one \n")
    os.makedirs(input_directory)
    print("Output Directory successfully created \n")

# Check if output directory exists, create if not
if not os.path.exists(output_directory):
    print("Output Directory not found, creating one \n")
    os.makedirs(output_directory)
    print("Output Directory successfully created \n")


# Loop through files in the input directory
for filename in os.listdir(input_directory):
    full_file_path = os.path.join(input_directory, filename)
    
    # Check if it is a file and has a .czi extension
    if os.path.isfile(full_file_path) and filename.lower().endswith('.czi'):
        with czifile.CziFile(full_file_path) as czi:
            image_array = czi.asarray()
        
        # Save the image as TIFF in the output directory
        output_file_path = os.path.join(output_directory, filename + '.tiff')
        tf.imsave(output_file_path, image_array, imagej=True)
        print(f"Successfully converted: {filename} to {output_directory} \n")

  tf.imsave(output_file_path, image_array, imagej=True)


Successfully converted: CTRL_EN_1.czi to data/tiff_images_directory 

Successfully converted: CTRL_INT_4.czi to data/tiff_images_directory 



In [32]:
def crop_image(channel_array):  
    '''
    This function takes in:
        A 2D array (channel_array which is ideally a channel of an array) 
        
    This function:
        Crops of the image to the specified x and y limits 

    This function returns:
        a cropped 2D array
    '''
    # You can change these values if necessary or negate this function compeletely rto 
    x_min = 2200
    x_max = 3200
    y_min = 1100
    y_max = 3000
    return channel_array[y_min:y_max, x_min:x_max]
    

In [33]:
'''
This array stores the information of the 2D arrays of the channel for statistical analysis
'''
channel_data_array = []

In [34]:
def save_array_before_crop(image_array):
    '''
    This function takes in:
        A 2D array (image_array) 
        
    This function:
        Flattens the 2D array and sorts it numerically

    This function returns:
        The flattened and sorted 1D array (sorted_pixel_values)
    '''
    pixel_values = image_array.flatten()
    sorted_pixel_values = np.sort(pixel_values)
    return sorted_pixel_values
    

In [35]:
def split_channels(tiff_image):
    ''' 
    This function takes in:
        A image path to a tiff image (tiff_image)
        
    This function:
        Reads the image, splits the image into its distinct channels and adds those arrays to an array of arrays.

    This function returns:
        An array of arrays (image_channel_arrays)
        
    ''' 
    image_array = tf.imread(tiff_image) #Gets the image array
    image_channels_array = []
    num_channels = image_array.shape[0] # Gets the number of channels
    for channel in range(num_channels):
        channel_data = image_array[channel]  # Extract data for one channel
        channel_data_array.append(save_array_before_crop(channel_data))
        image_channels_array.append(channel_data)
    return image_channels_array
    

    

## Splitting and cropping images
The cell directly below aims to split the generated tiff image in the specified directory into its various channels and save the various channels into the channels_directory, enabling us to view and perform operations on each channel if needed. We are particularlly focused on the first channel. The cell below also crops the images produced after splitting the original image into its various channels to use as training data for the algorithm. These cropped images allow us to focus on a particular section of the image to better evaluate and refine the algorithm.


In [36]:
'''
This section of code will split all the the images in the tiff_images_directory (or subsequent directory), loop through the directory
save the channels and then crop them. The crop function call may be removed or commented out in order to analyze the full image. 
Select the code and (CMD + /) to comment out the code segment. 
'''
tiff_images_directory = 'data/tiff_images_directory'

channels_directory = "data/channels_directory"
cropped_images_directory = "data/cropped_images"

if not os.path.exists(channels_directory):
    os.makedirs(channels_directory)
    print(f"Created directory: {channels_directory}")

if not os.path.exists(cropped_images_directory):
    os.makedirs(cropped_images_directory)
    print(f"Created directory: {cropped_images_directory}")



# Loop through the .tiff files in the tiff_images_directory
for filename in os.listdir(tiff_images_directory):
    if filename.endswith(".tiff"):
        filepath = os.path.join(tiff_images_directory, filename)
        
        # Split the channels
        array = split_channels(filepath)
        
        # Save each channel
        for i, channel in enumerate(array):
            channel_filename = f'{os.path.splitext(filename)[0]}_channel_{i+1}.tiff'
            channel_filepath = os.path.join(channels_directory, channel_filename)
            tf.imwrite(channel_filepath, channel)
            ## Feel free to remove these print lines if necessary
            print(f"Saved channel {i + 1} for {filename} to {channel_filepath} \n")

        # Crop and save each channel into the cropped_images_directory
        cropped_array = [crop_image(channel) for channel in array]
        for j, cropped_channel in enumerate(cropped_array):
            cropped_channel_filename = f'{os.path.splitext(filename)[0]}_cropped_channel_{j + 1}.tiff'
            cropped_channel_filepath = os.path.join(cropped_images_directory, cropped_channel_filename)
            tf.imwrite(cropped_channel_filepath, cropped_channel, imagej=True)
            ## Feel free to remove these print lines if necessary
            print(f"Successfully saved cropped channel {j + 1} to {cropped_channel_filepath} \n")

Saved channel 1 for CTRL_EN_1.czi.tiff to data/channels_directory/CTRL_EN_1.czi_channel_1.tiff
Saved channel 2 for CTRL_EN_1.czi.tiff to data/channels_directory/CTRL_EN_1.czi_channel_2.tiff
Saved channel 3 for CTRL_EN_1.czi.tiff to data/channels_directory/CTRL_EN_1.czi_channel_3.tiff
Saved channel 4 for CTRL_EN_1.czi.tiff to data/channels_directory/CTRL_EN_1.czi_channel_4.tiff
Successfully saved cropped channel 1 to data/cropped_images/CTRL_EN_1.czi_cropped_channel_1.tiff 

Successfully saved cropped channel 2 to data/cropped_images/CTRL_EN_1.czi_cropped_channel_2.tiff 

Successfully saved cropped channel 3 to data/cropped_images/CTRL_EN_1.czi_cropped_channel_3.tiff 

Successfully saved cropped channel 4 to data/cropped_images/CTRL_EN_1.czi_cropped_channel_4.tiff 

Saved channel 1 for CTRL_INT_4.czi.tiff to data/channels_directory/CTRL_INT_4.czi_channel_1.tiff
Saved channel 2 for CTRL_INT_4.czi.tiff to data/channels_directory/CTRL_INT_4.czi_channel_2.tiff
Saved channel 3 for CTRL_INT_4

## This section below is the start of the automated pipeline. The pipeline may or may not require fine-tuning to improve the algorithm. This is the current minimum-viable product.



In [37]:
def find_local_maximums(image_array):
    '''
    find_peaks() may require further tuning for apply across all images
    This function takes in:
        A 2D array (image_array)
    This function:
        Loops through each row of the original image and uses the find_peaks() method
        in order to find the local peaks of the row.
    This function returns:
        A 2D array with 0s and the peak values at the 
        corresponding index location of the original image
    Note: the find_peaks() function will return a int64 array.
    '''
    rows = len(image_array) 
    cols = len(image_array[0])
    peaks_indices_array = []
    new_image_array = np.zeros((rows, cols), dtype=np.uint16)
    for row in range(len(image_array) - 1):
        peaks, properties = find_peaks(image_array[row], height=(1500,23000), threshold=None, distance=3, 
                                          prominence=3000, width=50, wlen=None, rel_height=100, 
                                          plateau_size=None)
        prom_array = peak_prominences(image_array[row], peaks)
        peaks_indices_array.append(peaks)
    for i in range(len(image_array) - 1):
        for j in range(len(image_array[i]) - 1):
            if (j in peaks_indices_array[i]):
                new_image_array[i][j] = image_array[i][j]
    return new_image_array
    

## The current algorithm works by looking into the cropped_images directory.
----


##### The multiple variable:
- multiple = False : This will only read a single image specified by changing the single_image_path string value.
- multiple = True : This will read all your .tiff files in the cropped_images_directory 
##### The image_dir variable:
- image_dir = "data/some_directory" : This will work on files in this specific directory, for example if you would like the algorithm to work on a cropped image or the full image.
##### The all_channels variable: 
- all_channels = False: This will only look at channel_1 files (you may change this by changing the variable string value to channel_n.tiff where n is the channel number)
- all_channels = True: This will run the algorithm on every channel.
##### The print_image variable: 
- print_image = False: This will not print the images.
- all_channels = True: This will print the images, it may increase the run time of the algorithm.

In [38]:
multiple = True 
image_dir = "data/channels_directory"
all_channels = False
print_image = True #Default/tested is False

In [54]:
'''
This section reads the image.
'''

single_image_path = image_dir + "/CTRL_INT_4.czi_cropped_channel_1"
image_arrays = []
filenames = sorted(os.listdir(image_dir))
ending = ".tiff"


if not all_channels:
    ending = "channel_1.tiff"

for file in filenames:
    if file.endswith(ending):  # Check if the file is a TIFF image

        print(f"Inspecting: {file}")

if multiple: 
    for filename in filenames:
        if filename.endswith(ending):  # Check if the file is a TIFF image
            # Construct the full path to the image file
            filepath = os.path.join(image_dir, filename)
            # Read the image and add it to the list
            image = plt.imread(filepath)
            print(f"Added {filename} to list")
            image_arrays.append(image)
else:
    im = plt.imread(single_image_path)
    plt.figure()
    plt.imshow(im)



Inspecting: CTRL_EN_1.czi_channel_1.tiff
Inspecting: CTRL_INT_4.czi_channel_1.tiff




Added CTRL_EN_1.czi_channel_1.tiff to list
Added CTRL_INT_4.czi_channel_1.tiff to list


In [60]:
directory = 'data/channels_directory'

# List files in directory
files = os.listdir(directory)

# Sort files by name
files.sort()

# If you want to sort by date, you can use:
# files.sort(key=lambda x: os.path.getmtime(os.path.join(directory, x)))

print("Files sorted by name:")
for file in files:
    if file.endswith(".tiff"):
        print(file)

Files sorted by name:
CTRL_EN_1.czi_channel_1.tiff
CTRL_EN_1.czi_channel_2.tiff
CTRL_EN_1.czi_channel_3.tiff
CTRL_EN_1.czi_channel_4.tiff
CTRL_INT_4.czi_channel_1.tiff
CTRL_INT_4.czi_channel_2.tiff
CTRL_INT_4.czi_channel_3.tiff
CTRL_INT_4.czi_channel_4.tiff


In [55]:
'''
This section finds the local maximums and displays it
'''
maximums_list = []

if multiple:
    for image in image_arrays:
        # Call the function and store the result in maximums_list
        local_maximums = find_local_maximums(image)
        maximums_list.append(local_maximums) 
        
        if print_image:
            plt.figure()
            plt.imshow(local_maximums)       
else:
    maximums_list.append(find_local_maximums(im)) 
    plt.figure()
    plt.imshow(maximums_list[0])

In [56]:
def find_non_zero_neighbors(image_array, start_y, start_x, global_visited):
    '''
    This function takes in:
        A 2D array (_array); the y coordinate of the pixel (start_y) ; the x cooridnate of the pixel (start_x); and a global 1D array
        which determines if a pixel has already been visited (global_visited)
    This function:
        Finds a non-zero pixel and checks if any adjacent pixels are also non-zero, if so continue to find adjacent pixels until
        no adjacent pixels are found. An example of the directions is shown below with P being the pixel to check adjacent pixels.

                    ↖ ↑ ↗
                    ← P →
                    ↙ ↓ ↘
                    
            P: Original pixel value
            →: Right
            ←: Left 
            ↙: Bottom-left
            ↓: Bottom 
            ↘: Bottom-right 
        
    This function returns:
       4 integer values which are start and end coordinate pairs of the lines (start_y, start_x, end_y, end_x)
    '''
    class Direction(Enum):
        LEFT = (0, -1)
        BOTTOM_LEFT = (1, -1)
        BOTTOM = (1, 0)
        BOTTOM_RIGHT = (1, 1)
        RIGHT = (0, 1)
        
    visited = set()
    stack = [(start_y, start_x)]
    end_y, end_x = start_y, start_x

    while stack:
        y, x = stack.pop()
        if (y, x) in visited or (y, x) in global_visited:
            continue
        visited.add((y, x))        
        global_visited.add((y, x))


        for direction in Direction:
            dy, dx = direction.value
            ny, nx = y + dy, x + dx
            if 0 <= ny < len(image_array) and 0 <= nx < len(image_array[0]) and image_array[ny][nx] != 0:
                stack.append((ny, nx))
                end_y, end_x = ny, nx

    return start_y, start_x, end_y, end_x


In [61]:
def find_lines(test_array): 
    '''
    This function takes in:
        A 2D array (_array)
    This function:
        Loops through the entire image to find neighboring pixel values that are non-zero and continues this operation
        until it cannot find a non-zero neighboring pixel. It then adds the start and end coordinates to a 2D array
        and calculates the Euclidean distance and adds that value to a 1D array.
    This function returns:
       A 2D array (line_endpoints); a 1D array (line_lengths)
    '''   
    global_visited = set()
    line_endpoints = []
    line_lengths = []
    # Loop through the entire array
    for y in range(len(test_array)):
        for x in range(len(test_array[y])):
            if test_array[y][x] != 0 and (y, x) not in global_visited:
                start_y, start_x, end_y, end_x = find_non_zero_neighbors(test_array, y, x, global_visited)
                distance = math.sqrt((end_y - start_y) ** 2 + (end_x - start_x) ** 2)
                if distance > 5: ## Can change the size
                    line_endpoints.append([start_y, start_x, end_y, end_x])
                    line_lengths.append(distance)
                    
    print("Line endpoints (start_y, start_x, end_y, end_x):", line_endpoints)
    print("Line lengths:", line_lengths)

    # Specify the filename

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f'file_at_time:{timestamp}.csv'

    # Write the 1D array to a CSV file
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        for item in line_lengths:
            writer.writerow([item])

    print(f"The 1D array has been written to {filename}")

    return line_endpoints, line_lengths

   

In [64]:
'''
This section finds the lines, and overlays the lines onto the original image.
'''
## Converts to a 8bit image. May lose information.

if multiple:
    for i in range(len(maximums_list)):
        line_endpoints, line_lengths = find_lines(maximums_list[i])

        ## If the print_image value is true
        if print_image:

            gray_image = maximums_list[i] * 255
            color_image = cv.cvtColor(gray_image.astype(np.uint8), cv.COLOR_GRAY2BGR)

            for (start_y, start_x, end_y, end_x) in line_endpoints:
                cv.line(color_image, (start_x, start_y), (end_x, end_y), (255, 0, 0), 2)  # Red color (0, 0, 255) and thickness of 1 
            plt.figure()    
            plt.imshow(color_image)
            plt.show()
else:
    line_endpoints, line_lengths = find_lines(im)
    color_image = cv.cvtColor(im * 255, cv.COLOR_GRAY2BGR)
    for (start_y, start_x, end_y, end_x) in line_endpoints:
        cv.line(color_image, (start_x, start_y), (end_x, end_y), (0, 0, 255), 2)  # Red color (0, 0, 255) and thickness of 1 
    plt.figure()    
    plt.imshow(color_image)




Line endpoints (start_y, start_x, end_y, end_x): [[1834, 2382, 1841, 2380], [1879, 2416, 1889, 2417], [1942, 2855, 1947, 2857], [1945, 2343, 1953, 2348], [1953, 2451, 1961, 2454], [1962, 2461, 1977, 2470], [2012, 2502, 2018, 2504], [2130, 2749, 2145, 2747], [2146, 2745, 2153, 2747], [2218, 3000, 2227, 2999], [2228, 3001, 2240, 3004], [2241, 3002, 2249, 3001], [2250, 3004, 2255, 3005], [2256, 3008, 2293, 3008], [2294, 3011, 2303, 3014], [2304, 3016, 2321, 3021], [2354, 2734, 2359, 2733], [2418, 3087, 2423, 3089], [2432, 2681, 2437, 2683], [2439, 2685, 2445, 2686], [2474, 1606, 2479, 1608], [3046, 2829, 3056, 2830], [3070, 2839, 3076, 2839], [3081, 2845, 3092, 2848], [3093, 2850, 3105, 2853], [3106, 2850, 3121, 2852], [3122, 2850, 3201, 2869], [3206, 2876, 3228, 2883], [3229, 2886, 3239, 2886], [3242, 2889, 3264, 2896], [3245, 3236, 3252, 3241], [3253, 3244, 3285, 3239], [3265, 2544, 3271, 2546], [3273, 2543, 3282, 2544], [3284, 2899, 3293, 2902], [3295, 2902, 3328, 2908], [3319, 3237, 3

# END OF CURRENT PIPELINE (August 2024)
#### Future work could be taking these endpoints and line lengths to validate and refine the algorithm.
***


## The cells below are experimental and may be used in order to further refine the current algorithm. These functions were written as previous attempts to develop a complex algorithm.

### The cells below, until the next markdown cell was/is mainly for exploration of using thresholding and morphological functions impact on image quality. 

In this code segment we will be doing the following:
- Defining the threshhold of for the image
- Reconstructing the image 
- Removing noise and skeletizing the image

In [None]:
def reconstruction_of_image(image_array): 
    '''
    This function takes in:
        A 2D array (image_array)
    This function:
        Performs a morphological reconstruction of the 2D array
    This function returns:
        A reconstructed 2D array (enhanced_image)
    '''
    image = image_array[0]
    image = img_as_float(image)
    image_blurred = gaussian_filter(image, 1)
    seed = np.copy(image)
    seed[1:-1, 1:-1] = image.min()
    mask = image
    dilated = reconstruction(seed, mask, method='dilation')
    enhanced_image = image - dilated
    return enhanced_image

In [None]:
def define_threshold(channel_data_array, percentile):
    '''
    This function takes in:
        A 2D array (channel_data_array); a percentage value (percentile)
    This function:
        Calculates the threshold value.   
    This function returns:
        A threshold value (threshold_value)
    '''
    top_percentile_value = 100 - percentile
    threshold_value = np.percentile(channel_data_array[0], top_percentile_value)
    return threshold_value

In [None]:
def remove_noise(image_array):
    '''
    This function takes in:
        A 2D array (image_array); a percentage value (percentile)
    This function:
        Performs a variety of morphological operations, speciifcally
        closing, dilation, skeletonization, dilation, and skeletonization    
    This function returns:
        A 2D array that has been morphed (final_image)
    '''
    
    opened_image = morphology.closing(image_array, morphology.square(3))
    # plt.imshow(opened_image)
    dilated = morphology.dilation(opened_image, morphology.rectangle(50,1))
    # plt.imshow(~dilated)
    skel = morphology.medial_axis(opened_image, return_distance= False)
    # plt.imshow(skel)
    skeltized = morphology.skeletonize(dilated)
    # plt.imshow(skeltized)

    dilated2 = morphology.dilation(skeltized,morphology.rectangle(50,1))
    skeltized2 = morphology.skeletonize(dilated2)
    
    # cleaned = remove_small_objects(skeltized2, min_size = 2)

    final_image = opened_image
    return final_image #Returns final result of skeletized

### The code below attempts to develop a unique algorithm that finds continuous lines. It is not fully refined and may contain bugs

In [None]:
def percentage_change(final, original_value):
    '''
    This function takes in:
        An int (final); a int (original_value)
    This function:
        Calculates the percentage change between the final and the original value.
    This function returns:
        A boolean True if the calculated percentage is above a certain value (percentage_threshold), and is non negative
    '''
    percentage_threshold = 50
    calculated_percent = ((final - original_value) / original_value) * 100.0
    if calculated_percent > percentage_threshold and calculated_percent >= 0:
        return True
    else:
        return False

In [None]:
def split_array(array, pixel_location, max_distance_from_pixel_loc):
    '''
    This function takes in:
        A 2D array (array); a pixel location [pixel_location], and a maximum distance from the
        pixel location (max_distance_from_pixel_loc)
    This function:
        Splits the array at the pixel location into a left and right array.
    This function returns:
        An array of arrays, where the sub-arrays are the left and right side arrays from the pixel 
        location
    '''
    right_array = []
    left_array = []
    left_and_right = []
    # If the pixel's locations left side is negative, indicating it is on the far left
    if pixel_location - max_distance_from_pixel_loc < 0: 
        # Determines if it is on the edge of the image
        if pixel_location - max_distance_from_pixel_loc == -1 * max_distance_from_pixel_loc:
            left_array = []
        else:
            left_array = array[0 : pixel_location]
        right_array = array[pixel_location: pixel_location + max_distance_from_pixel_loc]
    # If the pixel's location right side is greater than the row length
    elif (pixel_location + max_distance_from_pixel_loc) > (len(array) - 1):
        right_array = []
        left_array = array[pixel_location - max_distance_from_pixel_loc: pixel_location-1]
    # If the pixel location is the middle of the image
    else: 
        left_array = array[pixel_location - max_distance_from_pixel_loc: pixel_location]
        right_array = array[pixel_location + 1 : pixel_location + 1 + max_distance_from_pixel_loc]
    # Appends the arrays to a final array which is returned
    left_and_right.append(left_array)
    left_and_right.append(right_array)
    return left_and_right
        
        

In [None]:
def test_algorithm(new_image_array, b, pixel_tolerance ):
    '''
    This function takes in:
        A 2D array (new_image_array); a background value (b), and a pixel tolerance (pixel_tolerance)
    This function:
        This loops through the image and determines the percentage change of the left and right side array of the corresponding
        pixel. It then determines if this pixel should be changed or set to 0. This function aims to make continous axon
        fragment lines
    This function returns:
        A 2D array which is process array.
    '''
   
    n = pixel_tolerance #pixels away no longer count maximum values
    mean_of_background = b
    non_empty_row = 0

    for i in range(len(new_image_array) - 1): ## Loops through the rows
        # print(f"row mean: {np.mean(new_image_array[i])}")
        if np.mean(new_image_array[i]) > mean_of_background: ## This is how I determine if a row is important (contains potential axons)
            # print("passed mean test")
            non_empty_row +=1 ## Increments the row count

            if non_empty_row != 1 : ## want to leave first row untouched, just want to load it                
                for j in range(len(new_image_array[i] - 1)): ## Loops through the columns
                    splitted_array = split_array(new_image_array[i-1], j, n) ## Checks the row above and splits the array into the left and right
                    combined_val = 0 # Value to calculate the combined value
                    left_side_boolean = False
                    right_side_boolean = False
                    for k in range(len(splitted_array)): # Loops through the array (size of 2)
                        if percentage_change(new_image_array[i][j], np.mean(splitted_array[k])) == True: ## If the pixel value at the specific pixel location meets the minimum threshold change 
                            combined_val += np.mean(splitted_array[k]) # Updates combined value
                            if k == 0:
                                left_side_boolean = True
                            if k == 1:
                                right_side_boolean = True
                        if k == 1:

                            if left_side_boolean == True and right_side_boolean == True :
                                new_image_array[i][j] = combined_val # Sets the original array value
                                combined_val = 0
                            elif((left_side_boolean == False and right_side_boolean == True) or
                                         (left_side_boolean == True and right_side_boolean == False)):
                                new_image_array[i][j] = 0 # Sets the original array value
                                combined_val = 0
        else:
            non_empty_row = 0 ## Resets it to the 
    return new_image_array

In [None]:
'''
This section tests the test_algorithm to produce a final image
'''
plt.figure()
output_array = test_algorithm(test_array, 500, 10 )
plt.imshow(output_array)
final = test_algorithm(remove_noise(output_array * test_array), 9.9**7, 10 )




In [None]:

def merge_lines(lines, distance_threshold):
    '''
    This function takes in:
        A 2D array (lines); a maximum distance that pixels can be apart from eachother to be merged into one line.
    This function:
        This function goes through the entire lines 2D array. It checks if the current line's (line 1) start coordinate pair
        is within distance of another line (line 2) end coordinate pair. Vice versa as well. This then merges the higher start coordinates
        with the lower end coordinates in order to create a single line start and end coordinate pair.
    This function returns:
      The original array (lines) after merging two lines.
    '''
    lines = np.squeeze(lines)
    lines = lines.tolist()

    
    '''
    This is a helper function that calculates the distance from two coordinate pairs.
        This function takes in:
            Two coordinate pairs [x,y] (p1 & p2); a maxmimum shortest distance which those coordinate pairs can be apart form eachother (threshold)
        This function:
           Calculates the Euclidean distance between those coordinate pairs. 
        This function returns:
            A boolean True or False, if the calculated distance is less than or equal to the threshold.
    '''
    def is_within_distance(p1, p2, threshold):
        distance = np.linalg.norm(np.array(p1) - np.array(p2))
        return distance <= threshold

    '''
    This is a helper function that merges two lines into one line
        This function takes in:
            2 lines, which are represented as arrays, each of which contains the start and end coordinate pairs 
            [x1,y1,x2,y2] (line1 & line2)
        This function:
            Merges the two lines
        This function returns:
            Returns a single 1D array 
    '''
    def merge_two_lines(line1, line2):
        if is_within_distance((line1[0], line1[1]), (line2[2], line2[3]), distance_threshold):
            return [line2[0], line2[1], line1[2], line1[3]]
        elif is_within_distance((line1[2], line1[3]), (line2[0], line2[1]), distance_threshold):
            return [line1[0], line1[1], line2[2], line2[3]]
        else:
            return None
    '''
    This is a helper function determines if the lines array has any mergeable lines
        This function takes in:
            An array (array); a maximum distance start and end (and vice versa) line pixels can be from eachother
        This function:
            Loops through the array and calculates the Euclidean distance to determine if they are within distance 
        This function returns:
            Returns a boolean True if within distance or a boolean False if not
    '''
    def possible_merges(array,distance_threshold):
        for i in range (len(array)):
            x1,y1,x2,y2 = array[i] ## line one
            for j in range(i + 1, len(array)):  # Start from i + 1 to avoid redundant checks
                    x3,y3,x4,y4 =array[j]
                    if is_within_distance((x2, y2), (x3, y3), distance_threshold) or \
                    is_within_distance((x1, y1), (x4, y4), distance_threshold):
                        return True
        return False


    while possible_merges(lines, distance_threshold): ## while there are still possible merges
        print(len(lines))
        for i in range(len(lines)):
            x1, y1, x2, y2 = lines[i] ## gets line 1 
            for j in range (len(lines)):
                merged = False
                if j != i:
                    x3, y3, x4, y4 = lines[j] ## gets line 2
                    merged_line = merge_two_lines([x1, y1, x2, y2], [x3, y3, x4, y4]) ## merge the lines
                    if merged_line:
                        lines.pop(j)
                        lines.pop(i)
                        lines.append(merged_line)
                        merged = True
                        break
            if merged:
                print("merged")
                print(f"i: {i}")
                break
    return lines

   

In [None]:

'''
This function is still experimental and has not been fully fleshed out.
This function takes in:
    An image array (image_array)
This function:
    Converts the image into an 8bit image in order to run a Canny Edge Detectioon, HoughLines Probabilistic Transform
This function returns:
    Returns None, and displays the overlayed lines onto the image
'''
def detect_lines (image_array):
    max_16bit = (2**16)-1
    min = 1.2 * (10**4)
    max = 1.2 * (10**4)
    normalized_min = (min/max_16bit) * 255
    normalized_max = (max/max_16bit) * 255
    normalized_image = cv.normalize(image_array, None, 0, 255, cv.NORM_MINMAX).astype(np.uint8)
    # print(normalized_min)
    # print(normalized_max)
    edges = cv.Canny(normalized_image, normalized_min, normalized_max)
    cdst = cv.cvtColor(edges, cv.COLOR_GRAY2BGR)
    cdstP = np.copy(cdst)
    linesP = cv.HoughLinesP(edges, 1, np.pi / 180, 50, None, 25, 5)
    if linesP is not None:
        for i in range(0, len(linesP)):
            l = linesP[i][0]
            cv.line(cdstP, (l[0], l[1]), (l[2], l[3]), (0,0,255), 3, cv.LINE_AA)

    plt.figure()
    plt.imshow(cdstP)


    # merged_lines = merge_lines(linesP, 15)  # Merge lines

    # def calculate_line_lengths(lines):
    #     lengths = np.sqrt((lines[:, 2] - lines[:, 0])**2 + (lines[:, 3] - lines[:, 1])**2)
    #     return lengths
    
    # def analyze_line_lengths(lengths):
    #     mean_length = np.mean(lengths)
    #     median_length = np.median(lengths)
    #     std_dev = np.std(lengths)
    #     quantiles = np.percentile(lengths, [25, 50, 75])
    
    #     stats = {
    #         'mean': mean_length,
    #         'median': median_length,
    #         'std_dev': std_dev,
    #         'quantiles': quantiles
    #     }
    #     return stats
    
    # line_lengths = calculate_line_lengths(merged_lines)
    # print(f"Line lengths: {line_lengths}")

    # Analyze lengths
    # stats = analyze_line_lengths(line_lengths)
    # print(f"Mean length: {stats['mean']}")
    # print(f"Median length: {stats['median']}")
    # print(f"Standard deviation: {stats['std_dev']}")
    # print(f"Quantiles (25th, 50th, 75th): {stats['quantiles']}")

    # # Count the number of lines
    # num_lines = len(merged_lines)
    # print(f"Number of lines: {num_lines}")
    
    # cv.imshow("Source", image_array)
    # cv.imshow("Detected Lines (in red) - Standard Hough Line Transform", cdst)
    # cv.imshow("Detected Lines (in red) - Probabilistic Line Transform", cdstP)
    # print(f"Number of lines detected: {line_count}")
    # print(f"Lengths of detected lines: {line_lengths}")

    # plt.figure()
    # plt.imshow( image_array)
    # plt.figure()
    # plt.imshow( cdst)
    # plt.figure()
    # plt.imshow(cdstP)


In [None]:
plt.figure()
plt.imshow(final.astype(np.uint8))
detect_lines(final)