<a href="https://colab.research.google.com/github/amitgal21/Final_Project/blob/main/dataset_statistics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

At this stage, we present all the code snippets that generate statistics for our dataset. The operations here are conducted using the data extracted from our segmentation codes. We utilize image processing algorithms with well-known libraries such as NumPy, Keras, scikit-learn, JSON, and others.


In [None]:
from google.colab import drive
drive.mount('/content/drive')
#connect to drive for load our data set and our results files

The purpose of the code is to calculate the percentage of bacterial area in grayscale images relative to the total area of the image using image processing techniques. This allows us to determine the ratio between the bacterial area and the total area of the image, thus estimating the quantity and size of bacteria present in the image.

In [None]:
#code that provide percentage of the bacterial area relative to the total area
import cv2
import numpy as np
import os

def calculate_bacterial_area_percentage(image_path):
    # Load the image in grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        print(f"Error loading image: {image_path}")
        return

    # Calculate the total area of the image
    total_area = image.size

    # Calculate the area of the bacteria (white pixels)
    bacterial_area = np.sum(image == 255)

    # Calculate the percentage of the bacterial area relative to the total area
    bacterial_area_percentage = (bacterial_area / total_area) * 100

    print(f"Bacterial area percentage in {os.path.basename(image_path)}: {bacterial_area_percentage:.2f}%")

def process_directory_recursively(directory_path):
    # Process each file and directory within the provided path
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if "_segmentation" in file and file.lower().endswith(('.png', '.jpg', '.jpeg', '.tif', '.tiff')):
                image_path = os.path.join(root, file)
                calculate_bacterial_area_percentage(image_path)

# Run the function on the directory containing the segmented images
directory_path = '/content/drive/MyDrive/Part_B/Datset3/Learn'
process_directory_recursively(directory_path)



The provided code calculates the contrast of images in grayscale by computing the standard deviation of pixel intensities. It then recursively processes a directory, considering only common image formats. The contrast values are stored in a list and used to create a histogram using Matplotlib.

This code segment allows analysis of the distribution of contrast within the dataset and provides insights into variations in contrast among different images.

In [None]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt

def calculate_image_contrast(image_path):
    # Load the image in grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        print(f"Error loading image: {image_path}")
        return None
    # Calculate the standard deviation of pixel intensities, which represents contrast
    contrast = np.std(image)
    return contrast

def process_directory_recursively_for_contrast(directory_path):
    contrast_values = []  # List to collect contrast values
    # Recursively process each file and directory within the provided path
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            # Exclude images with "_segmentation" in the filename and only process common image formats
            if "_segmentation" not in file and file.lower().endswith(('.png', '.jpg', '.jpeg', '.tif', '.tiff')):
                image_path = os.path.join(root, file)
                contrast = calculate_image_contrast(image_path)
                if contrast is not None:
                    contrast_values.append(contrast)
    return contrast_values

# Update the directory path to your images location
directory_path = '/content/drive/MyDrive/Part_B/Datset3/Learn'
contrast_values = process_directory_recursively_for_contrast(directory_path)

# Create a histogram of the contrast values
plt.hist(contrast_values, bins=20, color='blue', edgecolor='black')
plt.title('Histogram of Image Contrast')
plt.xlabel('Contrast Value')
plt.ylabel('Frequency')
plt.show()



The code calculates the percentage of bacterial area covered in an image when it's in grayscale, by counting the number of white pixels (representing bacteria) and dividing it by the total number of pixels in the image. It recursively traverses each folder in the provided directory and performs the calculations only on images in valid formats that are not marked as segmentation images. The percentages of bacterial area calculated are then used to create a histogram using the Matplotlib library.

In [None]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt

def calculate_bacterial_area_percentage(image_path):
    # Load the image in grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        print(f"Error loading image: {image_path}")
        return None

    # Calculate the total area of the image
    total_area = image.size

    # Calculate the area of the bacteria (white pixels)
    bacterial_area = np.sum(image == 255)

    # Calculate the percentage of the bacterial area relative to the total area
    bacterial_area_percentage = (bacterial_area / total_area) * 100

    return bacterial_area_percentage

def process_directory_recursively(directory_path):
    bacterial_area_percentages = []

    # Process each file and directory within the provided path
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if "_segmentation" in file and file.lower().endswith(('.png', '.jpg', '.jpeg', '.tif', '.tiff')):
                image_path = os.path.join(root, file)
                percentage = calculate_bacterial_area_percentage(image_path)
                if percentage is not None:
                    bacterial_area_percentages.append(percentage)

    # After collecting all percentages, plot the histogram
    plt.hist(bacterial_area_percentages, bins=20, color='blue', alpha=0.7)
    plt.title('Distribution of Bacterial Area Percentages')
    plt.xlabel('Bacterial Area Percentage')
    plt.ylabel('Frequency')
    plt.show()

# Example directory path
directory_path = '/content/drive/MyDrive/Part_B/Datset3/Learn'
process_directory_recursively(directory_path)



The provided code performs texture correlation analysi.It calculates the texture correlation for each image using the Gray Level Co-occurrence Matrix (GLCM) method and stores the results in a DataFrame. Finally, it saves this DataFrame to a CSV file.







In [None]:

from google.colab import drive
drive.mount('/content/drive')

# Import the necessary libraries
import os
from skimage import io, color  # For image loading and conversion
from skimage.feature import greycomatrix, greycoprops  # For texture analysis
import pandas as pd  # For data manipulation and saving

# Function to calculate the texture correlation of an image
def calculate_correlation(image_path):
    # Load the image from the specified path
    image = io.imread(image_path)

    # If the image is colored, convert it to grayscale
    if len(image.shape) > 2:
        gray_image = color.rgb2gray(image)
    else:
        # If already in grayscale, normalize the pixel values
        gray_image = image / 255.0

    # Convert grayscale image to uint8, required for GLCM calculation
    gray_image = (gray_image * 255).astype('uint8')

    # Calculate the GLCM and the correlation property
    glcm = greycomatrix(gray_image, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    correlation = greycoprops(glcm, 'correlation')[0, 0]
    return correlation

# Function to process multiple images in a directory and calculate their texture correlation
def process_images(directory_path):
    """
    Processes a directory of images, calculating and collecting their texture correlation values.
    """
    data = []  # Initialize a list to hold the correlation data

    # Walk through the directory, accessing each image file
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.tif', '.tiff')):
                full_path = os.path.join(root, file)  # Get the full image path
                try:
                    # Calculate the correlation for each image
                    correlation = calculate_correlation(full_path)
                    data.append({'Image': full_path, 'Correlation': correlation})
                except Exception as e:
                    print(f"Error processing {full_path}: {e}")

    # Convert the list of data into a DataFrame for analysis and storage
    df = pd.DataFrame(data)
    return df

# Specify the directory path containing the images to be processed
directory_path = '/content/drive/MyDrive/Part_B/Datset3/Learn'

# Process the images and obtain the DataFrame with correlations
df = process_images(directory_path)

# Define the path for saving the DataFrame as a CSV file
csv_path = '/content/drive/My Drive/image_correlations.csv'
df.to_csv(csv_path, index=False)  # Save the DataFrame to CSV
print(f"Saved the correlation data to {csv_path}")  # Confirmation message


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Read the CSV file into a DataFrame
df = pd.read_csv('/content/drive/My Drive/image_correlations.csv')

# Create a histogram of correlation values
plt.figure(figsize=(10, 6))
plt.hist(df['Correlation'], bins=30, color='blue', alpha=0.7)
plt.title('Distribution of Texture Correlation in Images')
plt.xlabel('Correlation')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()