# Biophotonic chips quality identification

## import package required

In [2]:
import cv2
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np
import seaborn as sns

In [None]:
class Automatic_SampleSelection:
    def __init__(self, image_path, output_folder, SelectSize=0.1):
        self.image = cv2.imread(image_path)
        self.output_folder = output_folder
        self.roi_indices = []
        self.SelectSize=SelectSize
        
    def Auto_Selection(self):
        # Extract the red channel of the image
        red_channel = self.image[:, :, 2]
    
        # Apply thresholding to the grayscale image to create a binary image
        _, thresh = cv2.threshold(red_channel, 127, 255, cv2.THRESH_BINARY)
    
        # Find contours in the binary image
        contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
        # Sort contours from top to bottom, left to right
        contours = sorted(contours, key=lambda c: cv2.boundingRect(c)[1] * self.image.shape[1] + cv2.boundingRect(c)[0])
        centerX=[]
        centerY=[]
        height=[]
        width=[]
    
        # Iterate through each contour and find the bounding box
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
    
            # Check if the rectangle is at least 50x50 pixels
            if w >= 50 and h >= 50 and abs(w - h) <= 15:
                # Select the ROI and save it as a separate image
                cen_x=int((x+(x+w))/2)
                cen_y=int((y+(y+h))/2)
                hei=int(self.SelectSize*h)
                wid=int(self.SelectSize*w)
                Sample=self.image[y:y+h, x:x+w]
                roi=self.image[cen_y-hei:cen_y +hei, cen_x-wid:cen_x +wid]
                centerX.append(cen_x)
                centerY.append(cen_y)
                height.append(hei)
                width.append(wid)
    
                # Generate the filename for the ROI image
                filename = 'ROI{}.jpg'.format(len(self.roi_indices))
    
                # Specify the output path for the ROI image
                output_path = os.path.join(self.output_folder, filename)
    
                # Save the ROI image
                cv2.imwrite(output_path, roi)
    
                # Show the sample as a separate plot with a label indicating the sample number
                plt.imshow(cv2.cvtColor(Sample, cv2.COLOR_BGR2RGB))
                plt.title('sample {}'.format(len(self.roi_indices)))
                plt.show()
    
                # Show the ROI as a separate plot with a label indicating the ROI number
                plt.imshow(cv2.cvtColor(roi, cv2.COLOR_BGR2RGB))
                plt.title('ROI {}'.format(len(self.roi_indices)))
                plt.show()
    
                # Draw the bounding box on the original image with ROI number
                cv2.rectangle(self.image, (x, y), (x + w, y + h), (0, 255,0), 2)
                cv2.rectangle(self.image, (cen_x-wid, cen_y-hei), 
                              (cen_x + wid, cen_y + hei), (0,0,255), 2)
                # Add text with ROI number
                cv2.putText(self.image, str(len(self.roi_indices)), (x, y - 10), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
    
                # Increment roi_indices by 1
                self.roi_indices.append(1)
    
        # Show the resulting image using matplotlib
        plt.imshow(cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB))
        plt.show()
        image_coor=pd.DataFrame({'centerX':centerX,'centerY':centerY,
                                 'height':height,'width':width})
        image_coor.to_csv('image_coord.csv')
        print(image_coor)

In [None]:
class ImageProcessor:
    def __init__(self, image_path):
        self.image_path = image_path
        self.image_name = os.path.basename(image_path)
        self.image = cv2.imread(image_path)
        self.rgb_data = self.extract_RGB_data()
        self.df = self.process_image()

    def extract_RGB_data(self):
        # Split RGB channels
        b, g, r = cv2.split(self.image)
        return np.array([r, g, b]).reshape(self.image.shape)

    def rgb_to_wavelength(self, rgb):
        # Normalize RGB values
        normalized_rgb = [val / 255.0 for val in rgb]

        # Calculate the dominant wavelength
        max_val = max(normalized_rgb)
        min_val = min(normalized_rgb)
        wavelength = (max_val - min_val) * 100 + 400

        # Calculate the standard deviation of RGB values
        std_dev = np.std(rgb)

        # Calculate the error in wavelength estimation
        error = std_dev * 100

        return wavelength, error

    def process_image(self):
        # Convert RGB data to wavelength
        wavelengths = []
        errors = []

        for i in range(self.rgb_data.shape[0]):  # Loop over rows
            for j in range(self.rgb_data.shape[1]):
                w, e = self.rgb_to_wavelength(self.rgb_data[i,j,:])
                wavelengths.append(w)
                errors.append(e)

        # Create a dataframe with the wavelength and error data
        df = pd.DataFrame({'Wavelength': wavelengths, 'Error': errors})

        # Plot the wavelength data
        ax = sns.violinplot(x=df['Wavelength'])
        ax.set_title(self.image_name)
        plt.show()
        return df['Wavelength']

class FolderProcessor:
    def __init__(self, folder_path):
        self.folder_path = folder_path
        self.overall_data = self.process_images()

    def process_images(self):
        # Iterate over files in the folder
        overall_data = pd.DataFrame()
        for filename in os.listdir(self.folder_path):
            # Check if the file is an image file
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(self.folder_path, filename)
                image_processor = ImageProcessor(image_path)
                overall_data[filename[3:-4]] = image_processor.df

        # Create a larger figure
        plt.figure(figsize=(10, 8))

        # Plot all the data
        ax = sns.violinplot(data=overall_data, inner='stick')
        ax.set(xlabel='Sample', ylabel='Wavelength')
        plt.show()

        return overall_data


In [None]:
class ColorUniformityAnalyzer:
    def __init__(self, image_path):
        self.image_path = image_path

    def calculate_color_uniformity(self):
        # Load the image
        image = cv2.imread(self.image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Reshape the image to a flat array
        pixels = image.reshape(-1, 3)

        # Calculate the standard deviation of color values
        std_dev = np.std(pixels, axis=0)

        # Calculate the mean of the standard deviation across color channels
        self.uniformity_score = np.mean(std_dev)

        # Plot the histogram
        self._plot_histogram(pixels)

        print('Color Uniformity Score:', self.uniformity_score)
    
        return self.uniformity_score

    def _plot_histogram(self, pixels):
        # Extract the file name from the image path
        file_name = os.path.basename(self.image_path)

        # Calculate the histogram
        hist, bins = np.histogram(pixels.flatten(), bins=256, range=[0, 256])

        plt.figure()
        plt.title('Color Histogram - ' + file_name)
        plt.xlabel('Pixel Value')
        plt.ylabel('Frequency')
        plt.plot(hist, color='gray')
        plt.xlim([0, 256])
        plt.text(10, np.max(hist) - np.max(hist) * 0.1, f'Uniformity Score: {self.uniformity_score:.2f}', fontsize=12, color='black')
        plt.show()
        
class FolderProcessorUniformaty:
    def __init__(self, folder_path, automatic_selection):
        self.folder_path = folder_path
        self.select_size = automatic_selection.SelectSize
        self.overall_data = self.process_images()

    def process_images(self):
        fileNames = [] # List to store file names
        uniformity = [] # List to store uniformity scores

        for filename in os.listdir(self.folder_path):
            # Check if the file is an image file
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(self.folder_path, filename)
                
                # Perform image processing or analysis here
                # Assuming you have a ColorUniformityAnalyzer class that calculates uniformity
                analyzer = ColorUniformityAnalyzer(image_path)
                uniformity_score = analyzer.calculate_color_uniformity()
                
                # Store the file name and uniformity score in respective lists
                fileNames.append(filename[:-4])
                uniformity.append(uniformity_score)
                
                print('Color Uniformity Score:', uniformity_score)

        # Create a dictionary from the lists
        data = {'File Name': fileNames, str(self.select_size): uniformity}

        # Create a DataFrame from the dictionary
        df = pd.DataFrame(data)
        
        # Save the DataFrame to a CSV file
        path='C:/Users/james/Desktop/Surface plasma/Uniformaty_data/'
        df.to_csv(str(path)+'uniformity_df_' + str(self.select_size) + '.csv', index=False)
        
        return df

The code encompasses a Python implementation designed for conducting image processing and analysis tasks within a scientific context. Its purpose is to facilitate automated sample selection, wavelength analysis, and color uniformity assessment of images. Here is a comprehensive breakdown of the different classes and their associated functionalities, reflecting their academic value:

1. `Automatic_SampleSelection`: This class addresses the automated selection of samples from an input image using specific criteria. It undertakes essential steps, such as extracting the red channel from the image, applying thresholding techniques to produce a binary image, and subsequently identifying contours. By considering size and aspect ratio, it proceeds to select rectangular regions of interest (ROIs) that satisfy predefined conditions. These ROIs are saved as individual images and visualized using the widely adopted `matplotlib.pyplot` library.

2. `ImageProcessor`: Within this class, the provided code encompasses the processing of a single image. It effectively extracts the RGB data and performs conversions to wavelength values. This step is followed by the calculation of dominant wavelengths and error estimations, providing insight into the accuracy of the wavelength estimation process. The resultant wavelength and error data are organized in a structured manner using a pandas dataframe. Additionally, the visualization of the wavelength distribution is achieved through the utilization of the expressive `seaborn` library, employing a violin plot.

3. `FolderProcessor`: The role of this class lies in managing multiple images within a specified folder. It engages in a systematic iteration over the files contained within the designated directory. By employing the capabilities of the `ImageProcessor` class, each image is individually processed, and its RGB data is extracted and converted into wavelength values. The collective wavelength data for all images is then structured within a pandas dataframe. Finally, an informative violin plot is generated to illustrate the overall distribution of wavelengths across the entire collection of images.

4. `ColorUniformityAnalyzer`: This class offers a comprehensive means of assessing color uniformity in individual images. The initial step involves loading the image data and subsequently reshaping it into a flattened array. By calculating the standard deviation of color values, an evaluation of color uniformity is obtained. The mean of the standard deviations across the color channels is utilized as a representative uniformity score. Furthermore, the code includes the generation of a histogram plot depicting pixel values, with the resulting uniformity score prominently displayed as text, thereby enhancing the visual representation of the analysis.

5. `FolderProcessorUniformaty`: This class extends the functionality of the folder processing operation to encompass the evaluation of color uniformity across a collection of images. It iterates through the files within the designated folder, employing the `ColorUniformityAnalyzer` class to calculate the color uniformity score for each image. The resulting uniformity scores, along with the corresponding file names, are collected and organized within lists. Subsequently, a pandas dataframe is constructed using these lists, facilitating further analysis and interpretation. Finally, the resulting dataframe is preserved in a CSV file, ensuring reproducibility and ease of access to the collected uniformity data.

In conclusion, this codebase serves as a valuable academic resource, providing a practical implementation of image processing and analysis techniques. By automating sample selection, facilitating wavelength analysis, and enabling color uniformity assessment, it enhances research capabilities within scientific domains. The code embraces well-established libraries, such as `matplotlib.pyplot`, `seaborn`, and `pandas`, which are widely recognized within the academic community for their utility in data visualization, analysis, and storage.

## Execute the code and analyze the data

In [None]:
from Automatic_SampleSelection import *
import os
import pandas as pd

# Initialize ROIAnalyzer object
image_path = 'C:/Users/james/Desktop/Surface plasma/Oil_water_1/water001.JPG'
output_folder = 'C:/Users/james/Desktop/Surface plasma/Oil_water_1/ROIs_1'

# Process the image and analyze ROIs
# Test the effect of SelectSize
Selectsize_li = [SelectSi / 100 for SelectSi in range(10, 51, 1)]
for num in Selectsize_li:
    analyzer = Automatic_SampleSelection(image_path, output_folder, SelectSize=num)
    analyzer.Auto_Selection()

    # Process the ROIs and calculate uniformity
    folder_path = "C:/Users/james/Desktop/Surface plasma/Oil_water_1/ROIs_1"
    folder_processor = FolderProcessorUniformaty(folder_path, analyzer)

    # Access the processed data
    overall_data = folder_processor.overall_data
    print(overall_data)


### the effectiveness of Select size

In [None]:
import glob
import pandas as pd

folder_path = "C:/Users/james/Desktop/Surface plasma/Uniformaty_data"  # Specify the folder path where CSV files are located

# Get a list of CSV file paths in the folder
csv_files = glob.glob(folder_path + "/*.csv")

# Initialize an empty list to store the individual DataFrames
dfs = []

# Read each CSV file and append its DataFrame to the list
for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    dfs.append(df)

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(dfs)

merged_df = combined_df.groupby('File Name').sum()

# Reset the index to make the 'File Name' column a regular column
merged_df = merged_df.reset_index()

print(merged_df)

# Visualize the DataFrame

import matplotlib.pyplot as plt
import numpy as np

x_axis = np.array([i for i in merged_df.columns[1:]])
roi_names = merged_df['File Name']

fig, ax = plt.subplots(figsize=(15, 10))
for i, roi in enumerate(roi_names):
    values = merged_df.iloc[i, 1:].values.flatten()
    ax.plot(x_axis, values, 'o--',alpha =0.8, label=roi)

ax.set_xlabel('Selection Size')
ax.set_ylabel('Uniformity score')
ax.set_title('The effect of selection size on Uniformaty score')
ax.legend()

plt.show()


In [None]:
import glob
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np

%matplotlib notebook

folder_path = "C:/Users/james/Desktop/Surface plasma/Uniformaty_data"  # Specify the folder path where CSV files are located

# Get a list of CSV file paths in the folder
csv_files = glob.glob(folder_path + "/*.csv")

# Initialize an empty list to store the individual DataFrames
dfs = []

# Read each CSV file and append its DataFrame to the list
for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    dfs.append(df)

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(dfs)

merged_df = combined_df.groupby('File Name').sum()

# Reset the index to make the 'File Name' column a regular column
merged_df = merged_df.reset_index()

# Visualize the DataFrame in 3D

fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

x_axis = np.arange(len(merged_df.columns[1:]))
y_axis = np.arange(len(merged_df['File Name']))
X, Y = np.meshgrid(x_axis, y_axis)
Z = merged_df.iloc[:, 1:].values

surface = ax.plot_surface(X, Y, Z, cmap='viridis')

ax.set_xlabel('Selection Size')
ax.set_ylabel('File Name')
ax.set_zlabel('Uniformity Score')
ax.set_title('The effect of selection size on Uniformity score')

# Add interactive rotation
def update_plot(elev, azim):
    ax.view_init(elev=elev, azim=azim)

# Connect the update_plot function to the mouse event
fig.canvas.mpl_connect('motion_notify_event', lambda event: update_plot(event.ydata, event.xdata))

plt.show()
