# Image analysis for a complete area captured by Clemex

Aim: 
- Read in the full image from the google drive 
- Segment the each sample from the background 
- Segment the pores from the base material 
- Get the ratio of pores to solid material


In [2]:
# Set up the module for the porosity_analysis packages  

import os 
import sys 

Pyro_module  = '../../20250312_PorosityAnalysis/porosity_analysis'
# Add the parent directory of Pyro_DataAnalysis to the system path
if os.path.exists(Pyro_module) == True:
    sys.path.insert(0, os.path.abspath(Pyro_module))
else :
    print('The file does not exist')

In [3]:
# import statements 

#import statements 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns 

import numpy as np

from skimage.filters import difference_of_gaussians,gaussian
from skimage.filters import threshold_otsu
from skimage.segmentation import clear_border
from skimage.measure import label, regionprops,regionprops_table ,find_contours
from skimage.color import label2rgb
import matplotlib.patches as mpatches
import imageio.v3 as iio
from pathlib import Path
import config 
import custom_funcs 
import image_analysis as ia


import cv2

In [4]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import PIL.Image
PIL.Image.MAX_IMAGE_PIXELS = 3019898880


In [5]:
# Google drive location 
experimental_dir = config.technical_dir_path

### Reference image for thresholding 

In [6]:
threshold_csv = os.path.join(experimental_dir,'data/processed/global_threshold.csv')
df_threshold = pd.read_csv(threshold_csv)
df_threshold

FileNotFoundError: [Errno 2] No such file or directory: 'G:\\My Drive\\202503Fran\\data/processed/global_threshold.csv'

In [7]:
df_threshold['Designation'] = '0'+ df_threshold['Designation'].astype(str)

In [8]:
df_threshold['Material_Designation'] = df_threshold['date'].astype(str) + '_' +  df_threshold['Material'] + '_' +  df_threshold['Designation']

In [7]:
df_threshold

NameError: name 'df_threshold' is not defined

# All towers

### Image preparation:

- All the images shoud be cropped using Fiji/Image-J to show only relevant part 
- All images should be saved as .png's or .tifs    
More details on the sample labelling is on the explainer.

In [10]:
# Image directory ---->
processed_ImagDir = Path("data/processed")

In [8]:
# Google drive location for the all cropped images
Processed_ImagDir_loc = experimental_dir / processed_ImagDir
print(f'This is the list of all the images in the processed data directory: {os.listdir(Processed_ImagDir_loc)}')

NameError: name 'processed_ImagDir' is not defined

### Change all files from .png or .tifs to .npy make process quicker

In [9]:
save_fig_Dir = Path("figures/density_images")

save_fig_Dir.mkdir(parents=True, exist_ok=True)  # Ensure the folder exists


In [10]:
npy_saveFolder  = ia.png_toNumpy(Processed_ImagDir_loc)
npy_saveFolder

NameError: name 'Processed_ImagDir_loc' is not defined

In [11]:
npy_saveFolder

NameError: name 'npy_saveFolder' is not defined

In [12]:
### Check which files have been done already 

# Save location
df_save_loc = '../../data/processed/Dataframe_Full.csv'
df_done = pd.read_csv(df_save_loc)
df_done
File_done = list(df_done['File'])
File_done

FileNotFoundError: [Errno 2] No such file or directory: '../../data/processed/Dataframe_Full.csv'

In [13]:
# Dictionary to store loaded .npy files
npy_data = {}

for file_npy in os.listdir(npy_saveFolder):
    if file_npy.endswith(".npy"):  # Ensure it's a .npy file
        save_file = os.path.join(npy_saveFolder, file_npy)
        
        with open(save_file, 'rb') as f:
            dict_fname = file_npy[:-4]
            npy_data[dict_fname] = np.load(f)  # Store in dictionary with filename as key
npy_data.keys()

NameError: name 'npy_saveFolder' is not defined

In [14]:
df = pd.DataFrame(columns= ['File','Date','Material','Designation','Sample_label','Otsu','denisty/%'])

In [15]:
for key in npy_data:
    parts = key.split("_")

    
    Date = parts[0]
    Material = parts[1]
    Designation = parts[2]
    Sample_label = parts[3]

    # Append to DataFrame
    df = pd.concat([df, pd.DataFrame([{'File': key,'Date':Date,'Material': Material, 'Designation': Designation, 'Sample_label': Sample_label}])], ignore_index=True)

df

Unnamed: 0,File,Date,Material,Designation,Sample_label,Otsu,denisty/%


In [16]:
#### Look for unique Date_Material_Designation combos: 

df['Material_Designation'] = df['Date'].astype(str) + '_' +  df['Material'] + '_' +  df['Designation']
df

Unnamed: 0,File,Date,Material,Designation,Sample_label,Otsu,denisty/%,Material_Designation


In [17]:
## check if all images have Mat_des in threshold and other compile df 

mask = ~df['Material_Designation'].isin(df_threshold['Material_Designation'])
#The ~ inverts the boolean values, so that it returns the values that are NOT in the other series.
result = df_threshold[mask]
print(result)

NameError: name 'df_threshold' is not defined

In [18]:
df = pd.merge(df, df_threshold[['Material_Designation', 'Otsu']], on='Material_Designation', how='left').drop(columns='Otsu_x')
df


NameError: name 'df_threshold' is not defined

In [19]:
df.rename(columns={"Otsu_y": "Otsu"}, inplace= True)

### Check for all the images 

In [20]:
for key in npy_data:

    # Get the Otsu threshold from the threshold df 
    temp_key = key[:-3]
    Otsu = df.loc[df.Material_Designation == temp_key,'Otsu'].values[0]

    img_np_array = npy_data[key]
    
    image_bw  = np.round(img_np_array).astype(np.uint8)[:,:,0]
    image_blur = gaussian(image_bw,2)
    image_threshold = image_blur < Otsu
    


    # morphological tidying
    image_eroded_dilated = ia.erosion_dilation_loop(image_threshold, 1, 1, False)
    # remove artifacts connected to image border
    cleared = clear_border(image_eroded_dilated)
    image_small_objects = ia.clean_small_objects(cleared,1, False) # 100
    image_small_holes = ia.clean_small_holes(image_small_objects,1, False) # 100

    #print(image_small_holes.shape)

    # label image regions
    label_image = label(image_small_holes)
    #image_label_overlay = label2rgb(label_image, image=image_bw, bg_label=0)
    #fig, ax = plt.subplots(figsize=(10, 6))
    #plt.imshow(image_label_overlay)
    

    # save plot file name 
    plt_name = os.path.join(save_fig_Dir,key + f"{key}.png")

    
    fig, ax = plt.subplots(figsize=(10, 6))

    regions = regionprops(label_image)

    for props in regions:

        minr, minc, maxr, maxc = props.bbox
        bx = (minc, maxc, maxc, minc, minc)
        by = (minr, minr, maxr, maxr, minr)
        ax.plot(bx, by, '-b', linewidth=2.5)

    ax.set_axis_off()
    plt.tight_layout()
    plt.savefig(plt_name)
    #plt.show() # added plt.show to display the image.
    plt.close()

    #! region props table

    props = regionprops_table(label_image, properties=['label', 'area'])
    data = pd.DataFrame(props)  
    sum_pores_px = data.area.sum()
            

    #Pore ratio calculation 
    w , h = image_bw.shape # Get the width ad the height of the images 

    pore_area_ratio = 100*(1 - (np.count_nonzero(image_small_holes, axis=None))/image_small_holes.size)

    #assign the value correctly.
    df.loc[df.File == key,'denisty/%'] = pore_area_ratio
    print(f"Updated density for {key} is {pore_area_ratio}")

    #else:
        #print(f"No matching Material_Designation found for {temp_key}")'


In [21]:
df.loc[df.File == '20250312_Nickel_02_00','denisty/%'] 

Series([], Name: denisty/%, dtype: object)

In [22]:

df.to_csv(df_save_loc, index=False)