This Python script is used to extract and save metadata from all TIFF image files in a specified Google Drive folder into a structured JSON file. It is designed to run in Google Colab.

In [None]:
from google.colab import drive
import tifffile
import os
import json
import numpy as np

# Mount Google Drive to access files
drive.mount('/content/drive')

# Define input and output folders
folder_path = '/content/drive/My Drive/EOD_2'
output_folder = '/content/drive/My Drive/EOD_2/'
os.makedirs(output_folder, exist_ok=True)  # Ensure output folder exists

# Dictionary to store metadata for all images
all_metadata = {}

# Iterate through each file in the folder
for filename in os.listdir(folder_path):
    # Process only TIFF files
    if filename.endswith('.tif') or filename.endswith('.tiff'):
        image_path = os.path.join(folder_path, filename)
        metadata = {}

        # Open the TIFF image and extract metadata
        with tifffile.TiffFile(image_path) as sem_image:
            for page in sem_image.pages:
                for tag in page.tags.values():
                    # Convert NumPy arrays to lists for JSON serialization
                    if isinstance(tag.value, np.ndarray):
                        metadata[tag.name] = tag.value.tolist()
                    else:
                        metadata[tag.name] = tag.value

        # Store metadata using filename as the key
        all_metadata[filename] = metadata
        print(f"Metadata for {filename} added to the collection")

# Save all metadata as a JSON file
output_filename = 'all_metadata10.json'
output_path = os.path.join(output_folder, output_filename)
with open(output_path, 'w') as f:
    json.dump(all_metadata, f, indent=4)

print(f"All metadata saved to {output_path}")

The script bellow extracts specific SEM image metadata from a JSON file and writes it into a new filtered JSON file.

In [None]:
from google.colab import drive
import json

# Mount Google Drive to access files
drive.mount('/content/drive')

# Path to the input metadata JSON file
file_path = '/content/drive/My Drive/EOD_2_SN/SN/all_metadata.json'

# Load the JSON data from the file
with open(file_path, 'r') as f:
    data = json.load(f)

# Define the metadata keys to extract
keys_to_extract = [
    "image_name", "dp_detector_type", "ap_actualkv",
    "ap_wd", "ap_aperturesize"
]

# List to hold the filtered metadata
extracted_data = []

# Iterate through each image in the dataset
for image_name, image_data in data.items():
    # Access the nested "CZ_SEM" dictionary
    cz_sem_data = image_data.get("CZ_SEM", {})

    if cz_sem_data:
        # Dictionary to store extracted fields for the current image
        extracted_item = {}

        # Add the image name explicitly
        extracted_item['image_name'] = image_name

        # Extract each requested key from the CZ_SEM block
        for key in keys_to_extract:
            extracted_item[key] = cz_sem_data.get(key)

        # Append the extracted record to the results list
        extracted_data.append(extracted_item)

# Define output path for the filtered JSON data
output_file_path = '/content/drive/My Drive/EOD_2_SN/SN/filtered_data_all2.json'

# Save the extracted metadata to the output file in JSON format
with open(output_file_path, 'w') as outfile:
    json.dump(extracted_data, outfile, indent=4)

print(f"Extracted data written to: {output_file_path}")

The script bellow designed to generate and save a full-factorial experimental design table for Scanning Electron Microscopy (SEM) parameter testing.

In [None]:
from google.colab import drive
from itertools import product
import pandas as pd

# Mount Google Drive to access files
drive.mount('/content/drive')

# Define levels for SEM parameters
working_distance = [3, 5, 7, 10]     # in millimeters (mm)
EHT = [1.5, 3, 5, 10]               # accelerating voltage in kilovolts (kV)
aperture_size = [10, 20, 30, 60]    # in micrometers (µm)

# Generate all possible combinations of the parameter values (full factorial design)
factorial_design = list(product(working_distance, EHT, aperture_size))

# Create a DataFrame to represent the experimental design
experiment_table = pd.DataFrame(
    factorial_design,
    columns=['Working Distance (mm)', 'EHT (kV)', 'Aperture Size (µm)']
)

# Insert a sequential run number as the first column
experiment_table.insert(0, 'Run #', range(1, len(experiment_table) + 1))

# Display the DataFrame in Colab's output
print(experiment_table)

# Save the design to a CSV file in Google Drive
experiment_table.to_csv(
    '/content/drive/My Drive/EOD_2/full_factorial_design_10.csv',
    index=False
)


This script automatically renames and organizes SEM image files based on metadata embedded in their TIFF headers.

In [None]:
from google.colab import drive
import os
import re
import tifffile
from datetime import datetime
from shutil import copyfile

# Mount Google Drive
drive.mount('/content/drive')

# Define input and output folders in Google Drive
input_folder = '/content/drive/My Drive/EOD_2_SN/EOD-2/'
output_folder = '/content/drive/My Drive/EOD_2_SN/SN/'

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)


def clean(text):
    """
    Sanitize strings for use in filenames:
    replaces non-alphanumeric characters with underscores.
    """
    return re.sub(r'[^\w\-]', '_', str(text)).strip()


# Index for generating sequential filenames
index = 1

# Loop through sorted list of files in the input folder
for filename in sorted(os.listdir(input_folder)):
    # Skip non-TIFF files
    if not filename.lower().endswith('.tif'):
        continue

    # Full path to the original image
    original_path = os.path.join(input_folder, filename)

    try:
        # Open the TIFF file and access the CZ_SEM metadata tag
        with tifffile.TiffFile(original_path) as tif:
            cz_sem = tif.pages[0].tags.get('CZ_SEM')

            # Skip if no CZ_SEM metadata is found
            if cz_sem is None:
                print(f"⚠️ No CZ_SEM metadata in {filename}")
                continue

            # Extract metadata dictionary
            metadata = cz_sem.value if isinstance(
                cz_sem.value, dict
            ) else tif.pages[0].tags['CZ_SEM'].value

        # Safely extract relevant metadata fields with fallbacks
        dp_detector_type = clean(
            metadata.get('dp_detector_type', ['Detector', 'UNKNOWN'])[1]
        )

        # Extract and format the acquisition date
        ap_date_raw = metadata.get('ap_date', ['Date', 'UNKNOWN'])[1]
        try:
            ap_date = datetime.strptime(
                ap_date_raw, "%d %b %Y"
            ).strftime("%Y%m%d")
        except Exception:
            ap_date = "UNKNOWNDATE"

        # Extract and clean the sample name
        sv_file_name = clean(
            metadata.get('sv_file_name', ['File Name', filename])[1].replace('.tif', '')
        )

        # Extract and format the magnification
        ap_mag_val = metadata.get('ap_mag', ['Mag', ''])[1]
        ap_mag = (
            f"{ap_mag_val}X"
            if isinstance(ap_mag_val, (int, float))
            else clean(str(ap_mag_val))
        )

        # Generate new standardized filename
        new_name = f"{dp_detector_type}_{ap_date}_{sv_file_name}_{ap_mag}_{index:03d}.tif"
        new_path = os.path.join(output_folder, new_name)

        # Copy the original image to the new location with the new name
        copyfile(original_path, new_path)

        print(f"✅ Copied: {filename} → {new_name}")
        index += 1

    except Exception as e:
        # Log any errors during processing
        print(f"❌ Error with {filename}: {e}")


This script bellow is designed to evaluate the visual quality of SEM (Scanning Electron Microscopy) images using a no-reference metric called BRISQUE, and to extract associated image metadata embedded within the TIFF files.

In [None]:
# Mount Google Drive to access SEM image files
from google.colab import drive
drive.mount('/content/drive')

# Define the folder path containing SEM .tif images
folder_path = '/content/drive/My Drive/EOD_2_SN/SN/'

# Install required libraries for image processing and BRISQUE computation
!pip install piq torch torchvision pillow pandas

# Import necessary Python libraries
import torch
import os
from PIL import Image
import torchvision.transforms as T
import piq
from glob import glob
import pandas as pd
import tifffile as tiff


def make_metadata_with_brisque_score_tiff(folder_path, mode):
    """
    Compute BRISQUE quality scores for a batch of TIFF SEM images
    and extract selected metadata from each image. Save the combined
    results to a CSV file.

    Args:
        folder_path (str): Path to the folder containing TIFF files.
        mode (str): Write mode ('w' for write, 'a' for append).
    """
    scores = []           # List to store BRISQUE scores
    metadata_list = []    # List to store extracted metadata

    # Get all .tif files in the folder
    files = glob(folder_path + '*.tif')

    # Metadata fields to retain
    keys_to_keep = [
        'image_name', 'ap_aperturesize', 'ap_wd',
        'sv_file_name', 'ap_actualkv'
    ]

    # Iterate over each image
    for img_name in files:
        print(f"Processing: {img_name}")
        image_path = img_name

        # Open image and convert to RGB
        image = Image.open(image_path).convert('RGB')
        width, height = image.size

        # Crop image: keep full width but limit height to 691 pixels
        cropped_image = image.crop((0, 0, width, min(691, height)))

        # Convert image to tensor (shape: [1, C, H, W])
        to_tensor = T.ToTensor()
        img_tensor = to_tensor(cropped_image).unsqueeze(0)

        # Compute BRISQUE score using PIQ
        score = piq.brisque(img_tensor, data_range=1.0)
        scores.append(score.item())

        # Extract CZ_SEM metadata from TIFF tags
        with tiff.TiffFile(image_path) as tif:
            cz_sem_metadata = None

            # Search for CZ_SEM metadata tag
            for page in tif.pages:
                for tag in page.tags.values():
                    if tag.name == 'CZ_SEM':
                        cz_sem_metadata = tag.value
                        break
                if cz_sem_metadata:
                    break

            # Extract and clean metadata
            parsed_metadata = {}
            if cz_sem_metadata and isinstance(cz_sem_metadata, dict):
                for key in keys_to_keep:
                    value = cz_sem_metadata.get(key, None)
                    # Extract only value if it's a tuple
                    if isinstance(value, tuple) and len(value) > 0:
                        parsed_metadata[key] = value[1]
                    else:
                        parsed_metadata[key] = value
            else:
                # Fill with None if no metadata found
                parsed_metadata = {key: None for key in keys_to_keep}

            metadata_list.append(parsed_metadata)

    # Convert metadata and scores to DataFrames
    metadata_df = pd.DataFrame(metadata_list)
    score_df = pd.DataFrame({'File': files, 'Score': scores})

    # Combine scores and metadata into one table
    result_df = pd.concat([score_df, metadata_df], axis=1)

    # Write header only when mode is not append
    header = mode != 'a'

    # Save the result to CSV
    result_df.to_csv(
        os.path.join(folder_path, 'brisque_scores_with_metadata10.csv'),
        index=False,
        header=header,
        mode=mode
    )


# Run the function to process images and export BRISQUE scores
make_metadata_with_brisque_score_tiff(folder_path, mode='w')


The scripts below automates the process of quality assessment and metadata extraction for Scanning Electron Microscopy (SEM) images saved in TIFF format. It computes a no-reference image quality score using the NIQE (Natural Image Quality Evaluator) metric and retrieves important metadata from each image’s CZ_SEM tag.




In [None]:
import os
from glob import glob

import torch
import pandas as pd
import tifffile as tiff
from skimage import io, img_as_float
import pyiqa


# Load NIQE model from pyiqa for no-reference image quality assessment
niqe_model = pyiqa.create_metric('niqe')


def make_metadata_with_niqe_score_tiff(folder_path, mode):
    """
    Computes NIQE scores for TIFF SEM images and extracts associated metadata.

    Parameters:
        folder_path (str): Path to the folder containing .tif SEM images.
        mode (str): 'w' to write a new CSV, 'a' to append to existing CSV.

    Output:
        Saves a CSV file named 'niqe_scores_with_metadata.csv' to the same folder.
    """
    scores = []  # Stores NIQE scores
    metadata_list = []  # Stores parsed TIFF metadata

    # Get all .tif files in the specified folder
    files = glob(os.path.join(folder_path, '*.tif'))

    # Define the metadata fields to extract from CZ_SEM tag
    keys_to_keep = ['image_name', 'ap_aperturesize', 'ap_wd', 'sv_file_name', 'ap_actualkv']

    # Iterate through each image file
    for img_path in files:
        print(f"Processing: {img_path}")

        try:
            # Load image using skimage, convert to grayscale if RGB, normalize
            image = io.imread(img_path)
            if image.ndim == 3:  # Convert RGB to grayscale
                image = image[:, :, 0]
            image = img_as_float(image)  # Convert to float [0,1]
            image_tensor = torch.tensor(image).unsqueeze(0).unsqueeze(0)  # Shape: [1, 1, H, W]

            # Calculate NIQE score and store it
            score = niqe_model(image_tensor).item()
            scores.append(score)

            print(f"{os.path.basename(img_path)}: NIQE = {score:.4f}")

        except Exception as e:
            print(f"Error processing {img_path}: {e}")
            scores.append(None)

        # --- Extract Metadata from TIFF tags ---
        with tiff.TiffFile(img_path) as tif:
            cz_sem_metadata = None

            # Look for 'CZ_SEM' tag in TIFF pages
            for page in tif.pages:
                for tag in page.tags.values():
                    if tag.name == 'CZ_SEM':
                        cz_sem_metadata = tag.value
                        break
                if cz_sem_metadata:
                    break

            # Parse metadata fields
            parsed_metadata = {}
            if cz_sem_metadata and isinstance(cz_sem_metadata, dict):
                for key in keys_to_keep:
                    value = cz_sem_metadata.get(key, None)
                    parsed_metadata[key] = value[1] if isinstance(value, tuple) else value
            else:
                parsed_metadata = {key: None for key in keys_to_keep}

            metadata_list.append(parsed_metadata)

    # --- Build DataFrame and Save Results ---

    # Create DataFrames from scores and metadata
    metadata_df = pd.DataFrame(metadata_list)
    scores_df = pd.DataFrame({
        'File': files,
        'Score': scores
    })

    # Merge and write to CSV
    result_df = pd.concat([scores_df, metadata_df], axis=1)
    output_csv_path = os.path.join(folder_path, 'niqe_scores_with_metadata.csv')

    result_df.to_csv(
        output_csv_path,
        index=False,
        mode=mode,
        header=(mode != 'a')
    )

    print(f"\n✅ NIQE scores saved to: {output_csv_path}")


# Run the function
make_metadata_with_niqe_score_tiff('/content/drive/My Drive/EOD_2_SN/SN/', mode='w')


This script merges two CSV files—one containing BRISQUE scores and the other containing NIQE scores—for a set of SEM .tif images. It matches the records by image filename (File), combines quality scores and key SEM metadata (e.g., aperture size, working distance, voltage), and saves the result as a single, clean CSV file.

In [None]:
from google.colab import drive
import pandas as pd
import os

# 🚀 Mount Google Drive to access files
drive.mount('/content/drive')

# 📁 Define the folder path where the CSV files are stored
folder_path = '/content/drive/My Drive/EOD_2_SN/SN/'

# 📄 Define full paths to the BRISQUE and NIQE CSV files
brisque_path = os.path.join(folder_path, 'brisque_scores_with_metadata.csv')
niqe_path = os.path.join(folder_path, 'niqe_scores_with_metadata.csv')

# 📥 Load both CSV files into pandas DataFrames
brisque_df = pd.read_csv(brisque_path)
niqe_df = pd.read_csv(niqe_path)

# 🔗 Merge the DataFrames on the 'File' column
# Keep 'Score' from NIQE and rename overlapping columns
merged_df = pd.merge(
    brisque_df,
    niqe_df[['File', 'Score']],
    on='File',
    suffixes=('_BRISQUE', '_NIQE')
)

# 🏷️ Rename and clean up column names for clarity
merged_df = merged_df.rename(columns={
    'File': 'Image_File',
    'Score_BRISQUE': 'BRISQUE_Score',
    'Score_NIQE': 'NIQE_Score',
    'ap_aperturesize': 'Aperture_Size',
    'ap_wd': 'Working_Distance',
    'ap_actualkv': 'Accelerating_Voltage',
    'sv_file_name': 'Sample_Name'
})

# 🎯 Select the final columns to keep in the output
final_df = merged_df[[
    'Image_File', 'Sample_Name', 'Accelerating_Voltage',
    'Working_Distance', 'Aperture_Size',
    'BRISQUE_Score', 'NIQE_Score'
]]

# 💾 Save the combined dataset to a new CSV file
output_path = os.path.join(folder_path, 'combined_sem_dataset.csv')
final_df.to_csv(output_path, index=False)

# ✅ Confirmation message
print(f"✅ Combined dataset saved to: {output_path}")


This code analyzes and clusters SEM image quality data based on BRISQUE scores using K-Means clustering, and evaluates the importance of imaging parameters (aperture size, working distance, and accelerating voltage) using a Random Forest regressor. The goal is to uncover patterns in image quality and identify which parameters most influence it.

In [None]:
# 📦 Import necessary libraries
from google.colab import drive
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D

# 🚀 Mount Google Drive to access dataset
drive.mount('/content/drive')

# 📄 Load the dataset containing BRISQUE scores and SEM metadata
data = pd.read_csv('/content/drive/MyDrive/EOD_2_SN/SN/brisque_scores_with_metadata.csv')

# 🏷️ Rename columns for simplicity and consistency
data = data.rename(columns={
    'ap_aperturesize': 'Aperture_Size',
    'ap_wd': 'Working_Distance',
    'ap_actualkv': 'Accelerating_Voltage',
    'Score': 'BRISQUE_Score'
})

# 🎯 Select relevant numerical features for clustering and analysis
data_cleaned = data[[
    'Aperture_Size', 'Working_Distance',
    'Accelerating_Voltage', 'BRISQUE_Score'
]]

# ⚖️ Standardize the data for clustering (zero mean, unit variance)
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_cleaned)

# 📈 Use the elbow method to determine optimal number of clusters (k)
inertia = []
k_range = range(1, 11)

for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans.fit(data_scaled)
    inertia.append(kmeans.inertia_)

# 📊 Plot elbow curve to visualize optimal k
plt.figure(figsize=(8, 5))
plt.plot(k_range, inertia, marker='o')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal k (BRISQUE)')
plt.show()

# 🔧 Set optimal number of clusters (as identified from elbow plot)
optimal_k = 4

# ⚙️ Perform K-Means clustering
kmeans = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
clusters = kmeans.fit_predict(data_scaled)
data['Cluster'] = clusters  # Add cluster labels to dataset

# 🌲 Use Random Forest to determine feature importance for BRISQUE score
X = data_cleaned.drop(columns=['BRISQUE_Score'])  # Features
y = data_cleaned['BRISQUE_Score']  # Target

model = RandomForestRegressor(random_state=42)
model.fit(X, y)

# 🔍 Get feature importance values
feature_importances = pd.Series(model.feature_importances_, index=X.columns)

# 🖨️ Print feature importance ranking
print("Feature Importance Ranking (Random Forest - BRISQUE):")
print(feature_importances.sort_values(ascending=False))

# 📉 2D visualization of clusters using BRISQUE Score and Working Distance
plt.figure(figsize=(8, 6))
sns.scatterplot(
    x=data['BRISQUE_Score'],
    y=data['Working_Distance'],
    hue=data['Cluster'],
    palette='viridis',
    s=100
)
plt.xlabel('BRISQUE Score')
plt.ylabel('Working Distance')
plt.title('K-Means Clustering (BRISQUE Score vs Working Distance)')
plt.legend(title='Cluster')
plt.show()

# 🔺 3D visualization using top 2 features + BRISQUE Score
top_features = feature_importances.nlargest(2).index.tolist()

fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

scatter = ax.scatter(
    data[top_features[0]],
    data[top_features[1]],
    data['BRISQUE_Score'],
    c=data['Cluster'],
    cmap='viridis',
    s=100
)

ax.set_xlabel(top_features[0])
ax.set_ylabel(top_features[1])
ax.set_zlabel('BRISQUE Score')
ax.set_title('3D Cluster Visualization (Top Features - BRISQUE)')
plt.colorbar(scatter, label='Cluster')
plt.show()


This script analyzes and clusters SEM image quality data using K-Means clustering, based on NIQE scores. It also uses a Random Forest regressor to evaluate the importance of SEM imaging parameters (aperture size, working distance, and accelerating voltage) in determining image quality.



In [None]:
# 📦 Import required libraries
from google.colab import drive
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D

# 🚀 Mount Google Drive to access the dataset
drive.mount('/content/drive')

# 📄 Load the CSV file containing NIQE scores and SEM metadata
data = pd.read_csv('/content/drive/MyDrive/EOD_2_SN/SN/niqe_scores_with_metadata.csv')

# 🏷️ Rename columns for clarity and consistency
data = data.rename(columns={
    'ap_aperturesize': 'Aperture_Size',
    'ap_wd': 'Working_Distance',
    'ap_actualkv': 'Accelerating_Voltage',
    'Score': 'NIQE_Score'
})

# 🎯 Select only numeric features for clustering and modeling
data_cleaned = data[[
    'Aperture_Size',
    'Working_Distance',
    'Accelerating_Voltage',
    'NIQE_Score'
]]

# ⚖️ Standardize the features to have zero mean and unit variance
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_cleaned)

# 📈 Use the Elbow Method to determine the optimal number of clusters (k)
inertia = []
k_range = range(1, 11)

for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans.fit(data_scaled)
    inertia.append(kmeans.inertia_)

# 📊 Plot the inertia to find the elbow point
plt.figure(figsize=(8, 5))
plt.plot(k_range, inertia, marker='o')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal k (NIQE)')
plt.show()

# 🔧 Select the optimal number of clusters (e.g., from elbow plot)
optimal_k = 4

# ⚙️ Apply K-Means clustering with optimal k
kmeans = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
clusters = kmeans.fit_predict(data_scaled)

# 🧬 Add cluster labels to the original dataset
data['Cluster'] = clusters

# 🌲 Train a Random Forest to identify important features affecting NIQE score
X = data_cleaned.drop(columns=['NIQE_Score'])  # Input features
y = data_cleaned['NIQE_Score']  # Target variable

model = RandomForestRegressor(random_state=42)
model.fit(X, y)

# 🔍 Calculate feature importances
feature_importances = pd.Series(model.feature_importances_, index=X.columns)

# 🖨️ Print feature ranking
print("Feature Importance Ranking (Random Forest - NIQE):")
print(feature_importances.sort_values(ascending=False))

# 📉 2D scatter plot to visualize clusters
plt.figure(figsize=(8, 6))
sns.scatterplot(
    x=data['NIQE_Score'],
    y=data['Working_Distance'],
    hue=data['Cluster'],
    palette='viridis',
    s=100
)
plt.xlabel('NIQE Score')
plt.ylabel('Working Distance')
plt.title('K-Means Clustering (NIQE Score vs Working Distance)')
plt.legend(title='Cluster')
plt.show()

# 📈 3D scatter plot using top 2 features + NIQE score
top_features = feature_importances.nlargest(2).index.tolist()

fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

scatter = ax.scatter(
    data[top_features[0]],
    data[top_features[1]],
    data['NIQE_Score'],
    c=data['Cluster'],
    cmap='viridis',
    s=100
)

ax.set_xlabel(top_features[0])
ax.set_ylabel(top_features[1])
ax.set_zlabel('NIQE Score')
ax.set_title('3D Cluster Visualization (Top Features - NIQE)')
plt.colorbar(scatter, label='Cluster')
plt.show()


This script performs a 3D surface regression analysis on SEM image quality data, specifically using BRISQUE scores as the quality metric. It models the relationship between imaging parameters, aperture size, working distance, and accelerating voltage, and the resulting image quality using a second-degree polynomial function.

In [None]:
# 📦 Import required libraries
from google.colab import drive
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.optimize import curve_fit

# 🚀 Mount Google Drive to access files
drive.mount('/content/drive')

# 📁 Path to the BRISQUE metadata CSV
file_path = '/content/drive/MyDrive/EOD_2_SN/SN/brisque_scores_with_metadata.csv'

# 📌 Step 1: Load data from CSV
df = pd.read_csv(file_path)

# 📌 Step 2: Extract independent and dependent variables
x_data = df['ap_aperturesize'].values              # Aperture size (µm)
y_data = df['ap_wd'].values                         # Working distance (mm)
z_data = df['ap_actualkv'].values                  # Accelerating voltage (kV)
score_data = df['Score'].values                    # BRISQUE image quality score (lower is better)

# 📌 Step 3: Define a 3D second-degree polynomial regression surface
def surface_function(X, a, b, c, d, e, f, g, h, i, j):
    x, y, z = X
    return (
        a * x ** 2 + b * y ** 2 + c * z ** 2 +
        d * x * y + e * x * z + f * y * z +
        g * x + h * y + i * z + j
    )

# 📌 Step 4: Fit the model to the data
params, _ = curve_fit(surface_function, (x_data, y_data, z_data), score_data)

# Unpack coefficients for readability
a, b, c, d, e, f, g, h, i, j = params

# 🖨️ Show the model parameters
print('🔹 Optimized Model Coefficients:')
print(f'   a={a}, b={b}, c={c}, d={d}, e={e}, f={f}, g={g}, h={h}, i={i}, j={j}')

# 📌 Step 5: Generate a mesh grid to evaluate the surface
x_range = np.linspace(min(x_data), max(x_data), 30)
y_range = np.linspace(min(y_data), max(y_data), 30)
z_range = np.linspace(min(z_data), max(z_data), 30)

X, Y, Z = np.meshgrid(x_range, y_range, z_range)
Scores = surface_function((X, Y, Z), a, b, c, d, e, f, g, h, i, j)

# 📌 Step 6: Find optimal conditions that minimize the BRISQUE score
optimal_idx = np.unravel_index(np.argmin(Scores, axis=None), Scores.shape)
optimal_x = X[optimal_idx]
optimal_y = Y[optimal_idx]
optimal_z = Z[optimal_idx]
optimal_score = Scores[optimal_idx]

# 🖨️ Display optimal SEM settings and lowest score
print('✅ Optimal Conditions:')
print(f'   ap_aperturesize = {optimal_x}')
print(f'   ap_wd = {optimal_y}')
print(f'   ap_actualkv = {optimal_z}')
print(f'   Lowest Score (Best Quality) = {optimal_score}')

# 📌 Step 7: Create a 3D surface plot (fixing one variable: accelerating voltage)
z_fixed = np.mean(z_data)  # Fix accelerating voltage at its average
X_plot, Y_plot = np.meshgrid(x_range, y_range)
Z_plot = surface_function(
    (X_plot, Y_plot, np.full_like(X_plot, z_fixed)),
    a, b, c, d, e, f, g, h, i, j
)

# 📌 Step 8: Visualize the regression surface and data points
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')

# Plot actual data points
ax.scatter(x_data, y_data, score_data, color='red', label='Real Data')

# Plot fitted surface
ax.plot_surface(X_plot, Y_plot, Z_plot, cmap='viridis', alpha=0.7)

# Set axis labels and title
ax.set_xlabel('ap_aperturesize')
ax.set_ylabel('ap_wd')
ax.set_zlabel('Score')
ax.set_title(f'Surface Regression (Fixed ap_actualkv = {z_fixed:.2f})')

# Show legend and plot
plt.legend()
plt.show()


This code fits a 3D polynomial regression model to predict NIQE image quality scores based on SEM imaging parameters: aperture size, working distance, and accelerating voltage. It finds the optimal parameter combination that minimizes the NIQE score and visualizes the regression surface along with actual data in a 3D plot.

In [None]:
# 📦 Import required libraries
from google.colab import drive
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.optimize import curve_fit

# 🚀 Mount Google Drive
drive.mount('/content/drive')

# 📁 Path to the NIQE metadata CSV
file_path = '/content/drive/MyDrive/EOD_2_SN/SN/niqe_scores_with_metadata.csv'

# 📌 Step 1: Load data from CSV
df = pd.read_csv(file_path)

# 📌 Step 2: Extract variables
x_data = df['ap_aperturesize'].values             # Aperture size (µm)
y_data = df['ap_wd'].values                        # Working distance (mm)
z_data = df['ap_actualkv'].values                 # Accelerating voltage (kV)
score_data = df['Score'].values                   # NIQE image quality score

# 📌 Step 3: Define a 3D second-degree polynomial regression surface
def surface_function(X, a, b, c, d, e, f, g, h, i, j):
    x, y, z = X
    return (
        a * x ** 2 + b * y ** 2 + c * z ** 2 +
        d * x * y + e * x * z + f * y * z +
        g * x + h * y + i * z + j
    )

# 📌 Step 4: Fit the polynomial surface to the data
params, _ = curve_fit(surface_function, (x_data, y_data, z_data), score_data)

# 📌 Step 5: Extract and print model coefficients
a, b, c, d, e, f, g, h, i, j = params
print('🔹 Optimized Model Coefficients:')
print(f'   a={a}, b={b}, c={c}, d={d}, e={e}, f={f}, g={g}, h={h}, i={i}, j={j}')

# 📌 Step 6: Generate a 3D mesh grid for surface evaluation
x_range = np.linspace(min(x_data), max(x_data), 30)
y_range = np.linspace(min(y_data), max(y_data), 30)
z_range = np.linspace(min(z_data), max(z_data), 30)

X, Y, Z = np.meshgrid(x_range, y_range, z_range)
Scores = surface_function((X, Y, Z), a, b, c, d, e, f, g, h, i, j)

# 📌 Step 7: Locate the optimal imaging conditions (lowest predicted NIQE score)
optimal_idx = np.unravel_index(np.argmin(Scores, axis=None), Scores.shape)
optimal_x = X[optimal_idx]
optimal_y = Y[optimal_idx]
optimal_z = Z[optimal_idx]
optimal_score = Scores[optimal_idx]

# 📌 Step 8: Print optimal settings
print('✅ Optimal Conditions:')
print(f'   ap_aperturesize = {optimal_x}')
print(f'   ap_wd = {optimal_y}')
print(f'   ap_actualkv = {optimal_z}')
print(f'   Lowest Score (Best Quality) = {optimal_score}')

# 📌 Step 9: Prepare 3D surface plot (fixing one variable for visualization)
z_fixed = np.mean(z_data)  # Fix accelerating voltage at its average
X_plot, Y_plot = np.meshgrid(x_range, y_range)
Z_plot = surface_function(
    (X_plot, Y_plot, np.full_like(X_plot, z_fixed)),
    a, b, c, d, e, f, g, h, i, j
)

# 📌 Step 10: Visualize real data and regression surface
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')

# Plot original data points
ax.scatter(x_data, y_data, score_data, color='red', label='Real Data')

# Plot fitted regression surface
ax.plot_surface(X_plot, Y_plot, Z_plot, cmap='viridis', alpha=0.7)

# Add labels and title
ax.set_xlabel('ap_aperturesize')
ax.set_ylabel('ap_wd')
ax.set_zlabel('Score')
ax.set_title(f'Surface Regression (Fixed ap_actualkv = {z_fixed:.2f})')

# Show legend and plot
plt.legend()
plt.show()
