In [5]:
#save images as HOG features
import os
import pandas as pd
from skimage.feature import hog
from skimage.io import imread
from skimage.color import rgb2gray

def extract_hog_features(image_folder, output_csv, 
                         orientations=9, pixels_per_cell=(8, 8), 
                         cells_per_block=(2, 2), filter_keyword="_grayscale"):
    features = []
    image_names = []
    
    for image_name in os.listdir(image_folder):
        if filter_keyword in image_name:
            image_path = os.path.join(image_folder, image_name)
            try:
                image = imread(image_path)
                
                if len(image.shape) == 3:
                    image = rgb2gray(image)
                hog_features = hog(image, 
                                   orientations=orientations, 
                                   pixels_per_cell=pixels_per_cell, 
                                   cells_per_block=cells_per_block, 
                                   block_norm='L2-Hys', 
                                   visualize=False)
                
                features.append(hog_features)
                image_names.append(image_name)
                print(f"Processed: {image_name}")
            except Exception as e:
                print(f"Error processing '{image_name}': {e}")

    df = pd.DataFrame(features)
    df.insert(0, 'image_name', image_names)
    
    df.to_csv(output_csv, index=False)
    print(f"HOG features saved to {output_csv}")

input_folder = 'train_new_ims' 
output_file = 'hog_features_grayscale.csv' 

extract_hog_features(image_folder=input_folder, output_csv=output_file, filter_keyword="_grayscale")


Processed: 9e1d819_augmented_grayscale.jpg
Processed: 7db1b9e_augmented_grayscale.jpg
Processed: b877c16_augmented_grayscale.jpg
Processed: 3e67b7f_grayscale.jpg
Processed: 0abffee_augmented_grayscale.jpg
Processed: 9339f5a_grayscale.jpg
Processed: 45dd1f6_grayscale.jpg
Processed: 1f4db69_grayscale.jpg
Processed: ba9261c_grayscale.jpg
Processed: 9de70f5_grayscale.jpg
Processed: 67472ea_grayscale.jpg
Processed: a5d64e1_augmented_grayscale.jpg
Processed: 198d523_augmented_grayscale.jpg
Processed: 22d3ba9_grayscale.jpg
Processed: 31a7a43_augmented_grayscale.jpg
Processed: ae3906d_augmented_grayscale.jpg
Processed: ced9869_grayscale.jpg
Processed: 22bda9e_augmented_grayscale.jpg
Processed: 49fe936_grayscale.jpg
Processed: 3b569be_grayscale.jpg
Processed: 710b109_augmented_grayscale.jpg
Processed: 5587d60_grayscale.jpg
Processed: 945e536_augmented_grayscale.jpg
Processed: 18690db_grayscale.jpg
Processed: 46c19f9_grayscale.jpg
Processed: 58adbbf_augmented_grayscale.jpg
Processed: 3f672be_gra

In [2]:
#save histogram features
import os
import pandas as pd
from skimage.color import rgb2gray
from skimage.io import imread
import numpy as np

def extract_and_save_histogram_features(image_folder, output_csv, bins=32, filter_keyword="_grayscale"):
    histogram_features = []
    image_names = []

    for image_name in os.listdir(image_folder):
        if filter_keyword in image_name:
            image_path = os.path.join(image_folder, image_name)
            try:
                image = imread(image_path)

                if len(image.shape) == 3:
                    image = rgb2gray(image)

                hist, _ = np.histogram(image.ravel(), bins=bins, range=(0, 1))
                hist = hist / hist.sum()  # Normalize histogram

                histogram_features.append(hist)
                image_names.append(image_name)
                print(f"Processed: {image_name}")
            except Exception as e:
                print(f"Error processing {image_name}: {e}")

    hist_df = pd.DataFrame(histogram_features)
    hist_df.insert(0, 'image_name', image_names)
    hist_df.to_csv(output_csv, index=False)
    print(f"Histogram features saved to {output_csv}")


input_folder = 'train_new_ims' 
output_file = 'histogram_features_grayscale.csv'  

extract_and_save_histogram_features(image_folder=input_folder, output_csv=output_file, filter_keyword="_grayscale")


Processed: 9e1d819_augmented_grayscale.jpg
Processed: 7db1b9e_augmented_grayscale.jpg
Processed: b877c16_augmented_grayscale.jpg
Processed: 3e67b7f_grayscale.jpg
Processed: 0abffee_augmented_grayscale.jpg
Processed: 9339f5a_grayscale.jpg
Processed: 45dd1f6_grayscale.jpg
Processed: 1f4db69_grayscale.jpg
Processed: ba9261c_grayscale.jpg
Processed: 9de70f5_grayscale.jpg
Processed: 67472ea_grayscale.jpg
Processed: a5d64e1_augmented_grayscale.jpg
Processed: 198d523_augmented_grayscale.jpg
Processed: 22d3ba9_grayscale.jpg
Processed: 31a7a43_augmented_grayscale.jpg
Processed: ae3906d_augmented_grayscale.jpg
Processed: ced9869_grayscale.jpg
Processed: 22bda9e_augmented_grayscale.jpg
Processed: 49fe936_grayscale.jpg
Processed: 3b569be_grayscale.jpg
Processed: 710b109_augmented_grayscale.jpg
Processed: 5587d60_grayscale.jpg
Processed: 945e536_augmented_grayscale.jpg
Processed: 18690db_grayscale.jpg
Processed: 46c19f9_grayscale.jpg
Processed: 58adbbf_augmented_grayscale.jpg
Processed: 3f672be_gra

  hist = hist / hist.sum()  # Normalize histogram


Processed: 91789c4_augmented_grayscale.jpg
Processed: 3b10ae3_grayscale.jpg
Processed: 8f07ee8_augmented_grayscale.jpg
Processed: b99d101_augmented_grayscale.jpg
Processed: b3f7c92_grayscale.jpg
Processed: 2c1416a_augmented_grayscale.jpg
Processed: 0ba15df_augmented_grayscale.jpg
Processed: 5385aac_augmented_grayscale.jpg
Processed: 2a809e9_augmented_grayscale.jpg
Processed: 8f0f06d_grayscale.jpg
Processed: 35a743d_grayscale.jpg
Processed: 49764d3_grayscale.jpg
Processed: 0413019_grayscale.jpg
Processed: 3213133_grayscale.jpg
Processed: 96a8144_grayscale.jpg
Processed: 4ff408a_augmented_grayscale.jpg
Processed: 9b1e708_augmented_grayscale.jpg
Processed: 97edcc7_augmented_grayscale.jpg
Processed: c1dbe9f_augmented_grayscale.jpg
Processed: bd27d22_augmented_grayscale.jpg
Processed: 7c3f5d1_augmented_grayscale.jpg
Processed: 911ffd6_grayscale.jpg
Processed: 06ecc48_augmented_grayscale.jpg
Processed: c1e06df_augmented_grayscale.jpg
Processed: a002d32_augmented_grayscale.jpg
Processed: 69f8

In [4]:
#combine the the hog and hist features into a single csv file
def combine_hog_histogram(hog_csv, histogram_csv, train_csv):
    hog_df = pd.read_csv(hog_csv)
    hist_df = pd.read_csv(histogram_csv)

    hog_df['image_name'] = hog_df['image_name'].str.replace('_gs', '', regex=False)
    hist_df['image_name'] = hist_df['image_name'].str.replace('_gs', '', regex=False)

    combined_df = pd.merge(hog_df, hist_df, on='image_name', how='inner')

    train_df = pd.read_csv(train_csv)
    final_df = pd.merge(combined_df, train_df, left_on='image_name', right_on='im_name', how='inner')

    X = final_df.iloc[:, 1:-1].values 
    y = final_df['label'].values     
    return X, y

def save_combined_features(hog_csv, histogram_csv, train_csv, output_csv):
    X, y = combine_hog_histogram(hog_csv, histogram_csv, train_csv)
    combined_df = pd.DataFrame(X)
    combined_df['label'] = y
    combined_df.to_csv(output_csv, index=False)
    print(f"Combined features saved to {output_csv}")

save_combined_features('hog_features.csv', 'histogram_features.csv', 'train.csv', 'combined_features.csv')


Combined features saved to combined_features.csv


In [10]:
combined_df = pd.read_csv('combined_features.csv')
print(combined_df.columns)


Index(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
       ...
       '8124', '8125', '8126', '8127', '8128', '8129', '8130', '8131', '8132',
       'label'],
      dtype='object', length=8134)
