In [None]:
# !pip install opencv-python


In [None]:
# !pip install pytesseract


In [9]:
# !pip install opencv-python
# !pip install scikit-image
# !pip install pytesseract


In [8]:
import os
import cv2
from skimage import color
import pytesseract

def extract_features(image_path):
    # Check if file exists
    if not os.path.exists(image_path):
        print(f"Error: {image_path} does not exist")
        return None
    
    # Load image
    img = cv2.imread(image_path)
    
    # Check if image is loaded successfully
    if img is None:
        print(f"Error: Unable to load {image_path}")
        return None
    
    # Preprocess image (resize, convert to grayscale, etc.)
    img_resized = cv2.resize(img, (224, 224))
    img_gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)
    
    # Color features
    mean_color = img_resized.mean(axis=(0, 1))
    
    # Shape features (example using contours)
    contours, _ = cv2.findContours(img_gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    num_contours = len(contours)
    
    # Edge features (example using Canny edge detector)
    edges = cv2.Canny(img_gray, threshold1=30, threshold2=100)
    num_edges = edges.sum()
    
    # Text features using OCR
    text = pytesseract.image_to_string(img_gray)
    
    # Combine features into a dictionary
    features = {
        'mean_color': mean_color.tolist(),
        'num_contours': num_contours,
        'num_edges': num_edges,
        'text': text
    }
    
    return features

def process_images_in_folder(folder_path):
    # Check if folder exists
    if not os.path.isdir(folder_path):
        print(f"Error: {folder_path} is not a valid directory")
        return
    
    # Get list of all files in the folder
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    # Dictionary to store features of all images
    all_features = {}
    
    # Iterate over all image files and extract features
    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        features = extract_features(image_path)
        if features is not None:
            all_features[image_file] = features
    
    return all_features

# Example usage:
folder_path = 'Costco Water -samples'
features_of_all_images = process_images_in_folder(folder_path)
print(features_of_all_images)


{'0.jpg': {'mean_color': [119.8746412627551, 118.12143255739795, 129.38588169642858], 'num_contours': 1, 'num_edges': 1379040, 'text': ''}, '1.jpg': {'mean_color': [120.31483179209184, 118.46233258928571, 129.44086814413265], 'num_contours': 1, 'num_edges': 1453755, 'text': ''}, '10.jpg': {'mean_color': [119.33934550382654, 117.54308832908163, 128.58235012755102], 'num_contours': 1, 'num_edges': 1320645, 'text': ''}, '100.jpg': {'mean_color': [107.66802853954081, 108.69232302295919, 119.9598612882653], 'num_contours': 1, 'num_edges': 1001130, 'text': ''}, '101.jpg': {'mean_color': [107.86571269132654, 109.13089923469387, 119.79318797831633], 'num_contours': 1, 'num_edges': 992715, 'text': ''}, '102.jpg': {'mean_color': [107.99029416454081, 109.45792809311224, 119.79356664540816], 'num_contours': 1, 'num_edges': 981495, 'text': ''}, '103.jpg': {'mean_color': [108.30929129464286, 110.03617267219387, 120.25946667729592], 'num_contours': 1, 'num_edges': 1021275, 'text': ''}, '104.jpg': {'m

In [2]:
import csv

def save_features_to_csv(features_dict, folder_path):
    # Set the output CSV path to be in the same folder as your images
    output_csv_path = os.path.join(folder_path, 'extracted_features.csv')
    
    # Open CSV file for writing
    with open(output_csv_path, mode='w', newline='') as file:
        # Create CSV writer
        writer = csv.writer(file)

        # Write header row to CSV file
        writer.writerow(["Image", "Mean_Color", "Num_Contours", "Num_Edges", "Text"])

        # Iterate over features_dict and write each row to the CSV file
        for image, features in features_dict.items():
            # Prepare row data
            row = [
                image,
                features['mean_color'],
                features['num_contours'],
                features['num_edges'],
                features['text'].replace('\n', ' ')  # Replace newline characters in text to avoid breaking the CSV format
            ]
            # Write row to CSV file
            writer.writerow(row)

# Usage example:
folder_path = 'Costco Water -samples'  # This should be the path to your images
features_of_all_images = process_images_in_folder(folder_path)  # You already have this line in your code
save_features_to_csv(features_of_all_images, folder_path)  # Call the function to save features to CSV
