# Image analysis for a complete area captured by Clemex

Aim: 
- Read in the full image from the google drive 
- Segment the each sample from the background 
- Segment the pores from the base material 
- Get the ratio of pores to solid material


In [2]:
# Set up the module for the porosity_analysis packages  

import os 
import sys 

Pyro_module  = '../20250312_PorosityAnalysis/porosity_analysis'
# Add the parent directory of Pyro_DataAnalysis to the system path
if os.path.exists(Pyro_module) == True:
    sys.path.insert(0, os.path.abspath(Pyro_module))
else :
    print('The file does not exist')

In [None]:
# import statements 

#import statements 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns 

import numpy as np

from skimage.filters import difference_of_gaussians,gaussian
from skimage.filters import threshold_otsu
from skimage.segmentation import clear_border
from skimage.measure import label, regionprops,regionprops_table ,find_contours
from skimage.color import label2rgb
import matplotlib.patches as mpatches
import imageio.v3 as iio
from pathlib import Path
import config 
import custom_funcs 
import image_analysis as ia


import cv2

In [4]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import PIL.Image
PIL.Image.MAX_IMAGE_PIXELS = 3019898880


In [5]:
# Google drive location 
experimental_dir = config.technical_dir_path

### Reference image for thresholding 

In [6]:
threshold_csv = os.path.join(experimental_dir,'data/processed/global_threshold.csv')
df_threshold = pd.read_csv(threshold_csv)
df_threshold

Unnamed: 0,ref_im,date,Material,Designation,Otsu
0,20250219_Nickel_04_thresh.png,20250219,Nickel,4,0.420488
1,20250310_Nickel_02_00_thresh.png,20250310,Nickel,2,0.534619
2,20250310_Nickel_01_thresh.png,20250310,Nickel,1,0.537771
3,20250310_Nickel_05_thresh.png,20250310,Nickel,5,0.528401
4,20250310_Nickel_04_thresh.png,20250310,Nickel,4,0.534359
5,20250310_Nickel_03_thresh.png,20250310,Nickel,3,0.535766
6,20250219_Nickel_03_thresh.png,20250219,Nickel,3,0.420022
7,20250312_Nickel_02_thresh.png,20250312,Nickel,2,0.528459
8,20250312_Nickel_01_thresh.png,20250312,Nickel,1,0.401673


In [7]:
df_threshold['Designation'] = '0'+ df_threshold['Designation'].astype(str)

In [8]:
df_threshold['Material_Designation'] = df_threshold['date'].astype(str) + '_' +  df_threshold['Material'] + '_' +  df_threshold['Designation']

In [9]:
df_threshold

Unnamed: 0,ref_im,date,Material,Designation,Otsu,Material_Designation
0,20250219_Nickel_04_thresh.png,20250219,Nickel,4,0.420488,20250219_Nickel_04
1,20250310_Nickel_02_00_thresh.png,20250310,Nickel,2,0.534619,20250310_Nickel_02
2,20250310_Nickel_01_thresh.png,20250310,Nickel,1,0.537771,20250310_Nickel_01
3,20250310_Nickel_05_thresh.png,20250310,Nickel,5,0.528401,20250310_Nickel_05
4,20250310_Nickel_04_thresh.png,20250310,Nickel,4,0.534359,20250310_Nickel_04
5,20250310_Nickel_03_thresh.png,20250310,Nickel,3,0.535766,20250310_Nickel_03
6,20250219_Nickel_03_thresh.png,20250219,Nickel,3,0.420022,20250219_Nickel_03
7,20250312_Nickel_02_thresh.png,20250312,Nickel,2,0.528459,20250312_Nickel_02
8,20250312_Nickel_01_thresh.png,20250312,Nickel,1,0.401673,20250312_Nickel_01


# All towers

### Image preparation:

- All the images shoud be cropped using Fiji/Image-J to show only relevant part 
- All images should be saved as .png's or .tifs    
More details on the sample labelling is on the explainer.

In [10]:
# Image directory ---->
processed_ImagDir = Path("data/processed")

In [11]:
# Google drive location for the all cropped images
Processed_ImagDir_loc = experimental_dir / processed_ImagDir
print(f'This is the list of all the images in the processed data directory: {os.listdir(Processed_ImagDir_loc)}')

This is the list of all the images in the processed data directory: ['thresholding', '20250219_Nickel_03_00.png', '20250219_Nickel_03_10.png', '20250219_Nickel_04_00.png', '20250219_Nickel_04_10.png', '20250310_Nickel_02_00.png', '20250310_Nickel_02_01.png', '20250310_Nickel_01_01.png', '20250310_Nickel_01_00.png', '20250310_Nickel_01_10.png', '20250310_Nickel_01_11.png', '20250310_Nickel_05_00.png', '20250310_Nickel_05_01.png', '20250310_Nickel_04_01.png', '20250310_Nickel_04_00.png', '20250310_Nickel_04_10.png', '20250310_Nickel_04_11.png', '20250310_Nickel_03_00.png', '20250310_Nickel_03_10.png', '20250312_Nickel_01_00.png', '20250312_Nickel_01_01.png', '20250312_Nickel_02_00.png', '20250312_Nickel_02_01.png', '20250312_Nickel_02_10.png', '20250312_Nickel_02_11.png', 'global_threshold.csv', 'npy_cache']


### Change all files from .png or .tifs to .npy make process quicker

In [12]:
save_fig_Dir = Path("figures/density_images")

save_fig_Dir.mkdir(parents=True, exist_ok=True)  # Ensure the folder exists


In [13]:
npy_saveFolder  = ia.png_toNumpy(Processed_ImagDir_loc)
npy_saveFolder

Processing: G:\Shared drives\Instruct3d\Technical\Experiment and case study campaign Q1 2025\Exp 2 - Thermal & Sensor Fusion\data\processed\20250219_Nickel_03_00.png
Skipped (already exists): G:\Shared drives\Instruct3d\Technical\Experiment and case study campaign Q1 2025\Exp 2 - Thermal & Sensor Fusion\data\processed\npy_cache\20250219_Nickel_03_00.npy
Processing: G:\Shared drives\Instruct3d\Technical\Experiment and case study campaign Q1 2025\Exp 2 - Thermal & Sensor Fusion\data\processed\20250219_Nickel_03_10.png
Skipped (already exists): G:\Shared drives\Instruct3d\Technical\Experiment and case study campaign Q1 2025\Exp 2 - Thermal & Sensor Fusion\data\processed\npy_cache\20250219_Nickel_03_10.npy
Processing: G:\Shared drives\Instruct3d\Technical\Experiment and case study campaign Q1 2025\Exp 2 - Thermal & Sensor Fusion\data\processed\20250219_Nickel_04_00.png
Skipped (already exists): G:\Shared drives\Instruct3d\Technical\Experiment and case study campaign Q1 2025\Exp 2 - Thermal

'G:\\Shared drives\\Instruct3d\\Technical\\Experiment and case study campaign Q1 2025\\Exp 2 - Thermal & Sensor Fusion\\data\\processed\\npy_cache'

In [14]:
npy_saveFolder

'G:\\Shared drives\\Instruct3d\\Technical\\Experiment and case study campaign Q1 2025\\Exp 2 - Thermal & Sensor Fusion\\data\\processed\\npy_cache'

In [15]:
### Check which files have been done already 

# Save location
df_save_loc = '../data/processed/Dataframe_Full.csv'
df_done = pd.read_csv(df_save_loc)
df_done
File_done = list(df_done['File'])
File_done

['20250219_Nickel_03_00',
 '20250219_Nickel_03_10',
 '20250219_Nickel_04_00',
 '20250219_Nickel_04_10',
 '20250310_Nickel_02_00',
 '20250310_Nickel_02_01',
 '20250310_Nickel_01_01',
 '20250310_Nickel_01_00',
 '20250310_Nickel_01_10',
 '20250310_Nickel_01_11',
 '20250310_Nickel_05_00',
 '20250310_Nickel_05_01',
 '20250310_Nickel_04_01',
 '20250310_Nickel_04_00',
 '20250310_Nickel_04_10',
 '20250310_Nickel_04_11',
 '20250310_Nickel_03_00',
 '20250310_Nickel_03_10',
 '20250312_Nickel_01_00',
 '20250312_Nickel_02_00',
 '20250312_Nickel_02_01',
 '20250312_Nickel_02_10',
 '20250312_Nickel_02_11',
 '20250312_Nickel_01_01']

In [16]:
# Dictionary to store loaded .npy files
npy_data = {}

for file_npy in os.listdir(npy_saveFolder):
    if file_npy.endswith(".npy"):  # Ensure it's a .npy file
        save_file = os.path.join(npy_saveFolder, file_npy)
        
        with open(save_file, 'rb') as f:
            dict_fname = file_npy[:-4]
            npy_data[dict_fname] = np.load(f)  # Store in dictionary with filename as key
npy_data.keys()

dict_keys(['20250219_Nickel_03_00', '20250219_Nickel_03_10', '20250219_Nickel_04_00', '20250219_Nickel_04_10', '20250310_Nickel_02_00', '20250310_Nickel_02_01', '20250310_Nickel_01_01', '20250310_Nickel_01_00', '20250310_Nickel_01_10', '20250310_Nickel_01_11', '20250310_Nickel_05_00', '20250310_Nickel_05_01', '20250310_Nickel_04_01', '20250310_Nickel_04_00', '20250310_Nickel_04_10', '20250310_Nickel_04_11', '20250310_Nickel_03_00', '20250310_Nickel_03_10', '20250312_Nickel_01_00', '20250312_Nickel_02_00', '20250312_Nickel_02_01', '20250312_Nickel_02_10', '20250312_Nickel_02_11', '20250312_Nickel_01_01'])

In [17]:
df = pd.DataFrame(columns= ['File','Date','Material','Designation','Sample_label','Otsu','denisty/%'])

In [18]:
for key in npy_data:
    parts = key.split("_")

    
    Date = parts[0]
    Material = parts[1]
    Designation = parts[2]
    Sample_label = parts[3]

    # Append to DataFrame
    df = pd.concat([df, pd.DataFrame([{'File': key,'Date':Date,'Material': Material, 'Designation': Designation, 'Sample_label': Sample_label}])], ignore_index=True)

df

Unnamed: 0,File,Date,Material,Designation,Sample_label,Otsu,denisty/%
0,20250219_Nickel_03_00,20250219,Nickel,3,0,,
1,20250219_Nickel_03_10,20250219,Nickel,3,10,,
2,20250219_Nickel_04_00,20250219,Nickel,4,0,,
3,20250219_Nickel_04_10,20250219,Nickel,4,10,,
4,20250310_Nickel_02_00,20250310,Nickel,2,0,,
5,20250310_Nickel_02_01,20250310,Nickel,2,1,,
6,20250310_Nickel_01_01,20250310,Nickel,1,1,,
7,20250310_Nickel_01_00,20250310,Nickel,1,0,,
8,20250310_Nickel_01_10,20250310,Nickel,1,10,,
9,20250310_Nickel_01_11,20250310,Nickel,1,11,,


In [19]:
#### Look for unique Date_Material_Designation combos: 

df['Material_Designation'] = df['Date'].astype(str) + '_' +  df['Material'] + '_' +  df['Designation']
df

Unnamed: 0,File,Date,Material,Designation,Sample_label,Otsu,denisty/%,Material_Designation
0,20250219_Nickel_03_00,20250219,Nickel,3,0,,,20250219_Nickel_03
1,20250219_Nickel_03_10,20250219,Nickel,3,10,,,20250219_Nickel_03
2,20250219_Nickel_04_00,20250219,Nickel,4,0,,,20250219_Nickel_04
3,20250219_Nickel_04_10,20250219,Nickel,4,10,,,20250219_Nickel_04
4,20250310_Nickel_02_00,20250310,Nickel,2,0,,,20250310_Nickel_02
5,20250310_Nickel_02_01,20250310,Nickel,2,1,,,20250310_Nickel_02
6,20250310_Nickel_01_01,20250310,Nickel,1,1,,,20250310_Nickel_01
7,20250310_Nickel_01_00,20250310,Nickel,1,0,,,20250310_Nickel_01
8,20250310_Nickel_01_10,20250310,Nickel,1,10,,,20250310_Nickel_01
9,20250310_Nickel_01_11,20250310,Nickel,1,11,,,20250310_Nickel_01


In [20]:
df_threshold

Unnamed: 0,ref_im,date,Material,Designation,Otsu,Material_Designation
0,20250219_Nickel_04_thresh.png,20250219,Nickel,4,0.420488,20250219_Nickel_04
1,20250310_Nickel_02_00_thresh.png,20250310,Nickel,2,0.534619,20250310_Nickel_02
2,20250310_Nickel_01_thresh.png,20250310,Nickel,1,0.537771,20250310_Nickel_01
3,20250310_Nickel_05_thresh.png,20250310,Nickel,5,0.528401,20250310_Nickel_05
4,20250310_Nickel_04_thresh.png,20250310,Nickel,4,0.534359,20250310_Nickel_04
5,20250310_Nickel_03_thresh.png,20250310,Nickel,3,0.535766,20250310_Nickel_03
6,20250219_Nickel_03_thresh.png,20250219,Nickel,3,0.420022,20250219_Nickel_03
7,20250312_Nickel_02_thresh.png,20250312,Nickel,2,0.528459,20250312_Nickel_02
8,20250312_Nickel_01_thresh.png,20250312,Nickel,1,0.401673,20250312_Nickel_01


In [21]:
## check if all images have Mat_des in threshold and other compile df 

mask = ~df['Material_Designation'].isin(df_threshold['Material_Designation'])
#The ~ inverts the boolean values, so that it returns the values that are NOT in the other series.
result = df_threshold[mask]
print(result)

Empty DataFrame
Columns: [ref_im, date, Material, Designation, Otsu, Material_Designation]
Index: []


  result = df_threshold[mask]


In [22]:
df = pd.merge(df, df_threshold[['Material_Designation', 'Otsu']], on='Material_Designation', how='left').drop(columns='Otsu_x')
df


Unnamed: 0,File,Date,Material,Designation,Sample_label,denisty/%,Material_Designation,Otsu_y
0,20250219_Nickel_03_00,20250219,Nickel,3,0,,20250219_Nickel_03,0.420022
1,20250219_Nickel_03_10,20250219,Nickel,3,10,,20250219_Nickel_03,0.420022
2,20250219_Nickel_04_00,20250219,Nickel,4,0,,20250219_Nickel_04,0.420488
3,20250219_Nickel_04_10,20250219,Nickel,4,10,,20250219_Nickel_04,0.420488
4,20250310_Nickel_02_00,20250310,Nickel,2,0,,20250310_Nickel_02,0.534619
5,20250310_Nickel_02_01,20250310,Nickel,2,1,,20250310_Nickel_02,0.534619
6,20250310_Nickel_01_01,20250310,Nickel,1,1,,20250310_Nickel_01,0.537771
7,20250310_Nickel_01_00,20250310,Nickel,1,0,,20250310_Nickel_01,0.537771
8,20250310_Nickel_01_10,20250310,Nickel,1,10,,20250310_Nickel_01,0.537771
9,20250310_Nickel_01_11,20250310,Nickel,1,11,,20250310_Nickel_01,0.537771


In [23]:
df.rename(columns={"Otsu_y": "Otsu"}, inplace= True)

### Check for all the images 

In [27]:
for key in npy_data:

    # Get the Otsu threshold from the threshold df 
    temp_key = key[:-3]
    Otsu = df.loc[df.Material_Designation == temp_key,'Otsu'].values[0]

    img_np_array = npy_data[key]
    
    image_bw  = np.round(img_np_array).astype(np.uint8)[:,:,0]
    image_blur = gaussian(image_bw,2)
    image_threshold = image_blur < Otsu
    


    # morphological tidying
    image_eroded_dilated = ia.erosion_dilation_loop(image_threshold, 1, 1, False)
    # remove artifacts connected to image border
    cleared = clear_border(image_eroded_dilated)
    image_small_objects = ia.clean_small_objects(cleared,1, False) # 100
    image_small_holes = ia.clean_small_holes(image_small_objects,1, False) # 100

    #print(image_small_holes.shape)

    # label image regions
    label_image = label(image_small_holes)
    #image_label_overlay = label2rgb(label_image, image=image_bw, bg_label=0)
    #fig, ax = plt.subplots(figsize=(10, 6))
    #plt.imshow(image_label_overlay)
    

    # save plot file name 
    plt_name = os.path.join(save_fig_Dir,key + f"{key}.png")

    
    fig, ax = plt.subplots(figsize=(10, 6))

    regions = regionprops(label_image)

    for props in regions:

        minr, minc, maxr, maxc = props.bbox
        bx = (minc, maxc, maxc, minc, minc)
        by = (minr, minr, maxr, maxr, minr)
        ax.plot(bx, by, '-b', linewidth=2.5)

    ax.set_axis_off()
    plt.tight_layout()
    plt.savefig(plt_name)
    #plt.show() # added plt.show to display the image.
    plt.close()

    #! region props table

    props = regionprops_table(label_image, properties=['label', 'area'])
    data = pd.DataFrame(props)  
    sum_pores_px = data.area.sum()
            

    #Pore ratio calculation 
    w , h = image_bw.shape # Get the width ad the height of the images 

    pore_area_ratio = 100*(1 - (np.count_nonzero(image_small_holes, axis=None))/image_small_holes.size)

    #assign the value correctly.
    df.loc[df.File == key,'denisty/%'] = pore_area_ratio
    print(f"Updated density for {key} is {pore_area_ratio}")

    #else:
        #print(f"No matching Material_Designation found for {temp_key}")'


Updated density for 20250219_Nickel_03_00 is 99.9913514527847
Updated density for 20250219_Nickel_03_10 is 99.99772486172397
Updated density for 20250219_Nickel_04_00 is 99.97994680281
Updated density for 20250219_Nickel_04_10 is 99.9734336371753
Updated density for 20250310_Nickel_02_00 is 99.97584615870426
Updated density for 20250310_Nickel_02_01 is 99.99548095950615
Updated density for 20250310_Nickel_01_01 is 99.97894825481033
Updated density for 20250310_Nickel_01_00 is 99.94996668156483
Updated density for 20250310_Nickel_01_10 is 99.96125669202593
Updated density for 20250310_Nickel_01_11 is 99.97850798994699
Updated density for 20250310_Nickel_05_00 is 99.99839581653242
Updated density for 20250310_Nickel_05_01 is 99.99653860728131
Updated density for 20250310_Nickel_04_01 is 99.92587862697279
Updated density for 20250310_Nickel_04_00 is 99.9957997719934
Updated density for 20250310_Nickel_04_10 is 99.9944739168877
Updated density for 20250310_Nickel_04_11 is 99.9951975706286


In [29]:
df.loc[df.File == '20250312_Nickel_02_00','denisty/%'] 

19    99.200069
Name: denisty/%, dtype: object

In [28]:

df.to_csv(df_save_loc, index=False)