In [None]:
import cv2
import numpy as np
import pandas as pd
import os
from skimage.feature import hog, local_binary_pattern
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from concurrent.futures import ProcessPoolExecutor

In [None]:
labels_df = pd.read_csv('label.csv')

image_directory = 'data'

In [None]:
def extract_edges(image):
    edges = cv2.Canny(image, 100, 200)
    return edges.flatten()

# Function to extract ORB features
def extract_orb_features(image, max_features=128):
    orb = cv2.ORB_create()
    keypoints, descriptors = orb.detectAndCompute(image, None)
    if descriptors is not None:
        if descriptors.shape[0] > max_features:
            descriptors = descriptors[:max_features, :]
        elif descriptors.shape[0] < max_features:
            padding = np.zeros((max_features - descriptors.shape[0], descriptors.shape[1]))
            descriptors = np.vstack((descriptors, padding))
        return descriptors.flatten()
    else:
        return np.zeros(max_features * 32)

# Function to extract HOG features
def extract_hog_features(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features = hog(gray_image, pixels_per_cell=(24, 24), block_norm='L2-Hys')
    return features

# Function to extract LBP features
def extract_lbp_features(image, radii=[1, 2, 3], n_points=8, method='uniform'):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp_features = []
    
    for radius in radii:
        lbp = local_binary_pattern(gray_image, n_points, radius, method=method)
        hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
        hist = hist.astype("float")
        hist /= (hist.sum() + 1e-7)
        lbp_features.extend(hist)
    
    return np.array(lbp_features) 

# Function to extract color histogram features
def extract_color_histogram(image):
    # Load the image
    
    # Compute the histogram for each color channel (B, G, R)
    hist_b = cv2.calcHist([image], [0], None, [256], [0, 256])
    hist_g = cv2.calcHist([image], [1], None, [256], [0, 256])
    hist_r = cv2.calcHist([image], [2], None, [256], [0, 256])
    
    # Normalize the histograms
    hist_b = cv2.normalize(hist_b, hist_b).flatten()
    hist_g = cv2.normalize(hist_g, hist_g).flatten()
    hist_r = cv2.normalize(hist_r, hist_r).flatten()

    # Concatenate the histograms into a single feature vector
    hist_features = np.concatenate([hist_b, hist_g, hist_r])

    # print(hist_features.shape)

    return hist_features



def extract_gftt_features(image, max_corners=100, quality_level=0.01, min_distance=10):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    corners = cv2.goodFeaturesToTrack(gray_image, maxCorners=max_corners, qualityLevel=quality_level, minDistance=min_distance)
    if corners is not None:
        corners = corners.flatten()
    else:
        corners = np.zeros(max_corners * 2)  # Assuming 2 coordinates per corner
    return corners

def extract_harris_features(image, block_size=2, ksize=3, k=0.04):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    harris_corners = cv2.cornerHarris(gray_image, blockSize=block_size, ksize=ksize, k=k)
    harris_corners = cv2.normalize(harris_corners, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX)
    return harris_corners.flatten()

def extract_gabor_features(image, ksize=31, sigma=4.0, theta=1.0, lambd=10.0, gamma=0.5, psi=0):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gabor_kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lambd, gamma, psi, ktype=cv2.CV_32F)
    filtered_image = cv2.filter2D(gray_image, cv2.CV_8UC3, gabor_kernel)
    return filtered_image.flatten()

def extract_sobel_features(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    grad_x = cv2.Sobel(gray_image, cv2.CV_64F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(gray_image, cv2.CV_64F, 0, 1, ksize=3)
    grad_magnitude = cv2.magnitude(grad_x, grad_y)
    grad_magnitude = cv2.normalize(grad_magnitude, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX)
    return grad_magnitude.flatten()


# Combine all features into a single feature vector
def extract_combined_features(image, pca_model):
    edges = extract_edges(image)
    if edges is not None:
        edges = pca_model.transform([edges])[0]  # Apply PCA

    orb_features = extract_orb_features(image)
    hog_features = extract_hog_features(image)
    lbp_features = extract_lbp_features(image)
    color_histogram = extract_color_histogram(image)
    gftt_features = extract_gftt_features(image)

    harris_features = extract_harris_features(image)
    if harris_features is not None:
        harris_features = pca_model.transform([harris_features])[0]  # Apply PCA

    gabor_features = extract_gabor_features(image)
    if gabor_features is not None:
        gabor_features = pca_model.transform([gabor_features])[0]  # Apply PCA

    sobel_features = extract_sobel_features(image)
    if sobel_features is not None:
        sobel_features = pca_model.transform([sobel_features])[0]  # Apply PCA


    # print(f"edges shape: {edges.shape}, orb_features shape: {orb_features.shape}, hog_features shape: {hog_features.shape}, lbp_features shape: {lbp_features.shape}, color_histogram shape: {color_histogram.shape}")

    combined_features = np.concatenate((edges, orb_features, hog_features, lbp_features, color_histogram, gftt_features, harris_features, gabor_features, sobel_features))
    return combined_features


In [None]:
image_path = "data/Image_1.jpg"
image = cv2.imread(image_path)

# Resize the image if needed
resized_image = cv2.resize(image, (250, 200))

# # pca = PCA(n_components=100)
# # Extract combined features
# combined_features = extract_combined_features(resized_image)


# gftt_features = extract_gftt_features(resized_image)
# harris_features = extract_harris_features(resized_image)
# gabor_features = extract_gabor_features(resized_image)
# sobel_features = extract_sobel_features(resized_image)

# # Printing the shapes of each feature
# print(f"gftt features shape: {gftt_features.shape}")
# print(f"harris features shape: {harris_features.shape}")
# print(f"gabor features shape: {gabor_features.shape}")
# print(f"sobel features shape: {sobel_features.shape}")

In [None]:
all_edges = []

# First pass: Collect edges to fit PCA
for index, row in labels_df.iterrows():
    image_path = os.path.join(image_directory, row['filename'])
    image = cv2.imread(image_path)

    # Ensure the image is loaded correctly
    if image is None:
        continue
    
    # Resize the image if needed
    resized_image = cv2.resize(image, (250, 200))
    
    # Extract edges
    edges = extract_edges(resized_image)
    print(image_path, edges.shape, end='\r')

    if edges is not None:
        all_edges.append(edges)

# Fit PCA on the collected edge features
all_edges = np.array(all_edges)

# Apply PCA and store the transformed features
pca = PCA(n_components=100)
all_edges_pca = pca.fit_transform(all_edges)  # This line transforms the features and stores them

# Print the shape of the reduced-dimension data
print(all_edges_pca.shape)

print(f"PCA model fitted. {all_edges_pca.shape[0]} samples with {all_edges_pca.shape[1]} features.")

In [None]:
all_harris_features = []

# First pass: Collect Harris corner features to fit PCA
for index, row in labels_df.iterrows():
    image_path = os.path.join(image_directory, row['filename'])
    image = cv2.imread(image_path)

    # Ensure the image is loaded correctly
    if image is None:
        continue
    
    # Resize the image if needed
    resized_image = cv2.resize(image, (250, 200))
    
    # Extract Harris corner features
    harris_features = extract_harris_features(resized_image)
    print(image_path, harris_features.shape, end='\r')

    if harris_features is not None:
        all_harris_features.append(harris_features)

# Convert the list to a NumPy array
all_harris_features = np.array(all_harris_features)

# Fit PCA on the collected Harris corner features and apply transformation
pca_harris = PCA(n_components=100)  # Choose appropriate number of components
harris_features_pca = pca_harris.fit_transform(all_harris_features)  # Store the transformed features

# Print the shape of the reduced-dimension data
print(harris_features_pca.shape)

print(f"PCA model fitted on Harris features. {harris_features_pca.shape[0]} samples with {harris_features_pca.shape[1]} features.")

In [None]:
all_gabor_features = []

# First pass: Collect Gabor features to fit PCA
for index, row in labels_df.iterrows():
    image_path = os.path.join(image_directory, row['filename'])
    image = cv2.imread(image_path)

    # Ensure the image is loaded correctly
    if image is None:
        continue
    
    # Resize the image if needed
    resized_image = cv2.resize(image, (250, 200))
    
    # Extract Gabor features
    gabor_features = extract_gabor_features(resized_image)
    print(image_path, gabor_features.shape, end='\r')

    if gabor_features is not None:
        all_gabor_features.append(gabor_features)

# Convert the list to a NumPy array
all_gabor_features = np.array(all_gabor_features)

# Fit PCA on the collected Gabor features and apply transformation
pca_gabor = PCA(n_components=100)  # Choose appropriate number of components
gabor_features_pca = pca_gabor.fit_transform(all_gabor_features)  # Store the transformed features

# Print the shape of the reduced-dimension data
print(gabor_features_pca.shape)

print(f"PCA model fitted on Gabor features. {gabor_features_pca.shape[0]} samples with {gabor_features_pca.shape[1]} features.")

In [None]:
all_sobel_features = []

# First pass: Collect Sobel features to fit PCA
for index, row in labels_df.iterrows():
    image_path = os.path.join(image_directory, row['filename'])
    image = cv2.imread(image_path)

    # Ensure the image is loaded correctly
    if image is None:
        continue
    
    # Resize the image if needed
    resized_image = cv2.resize(image, (250, 200))
    
    # Extract Sobel features
    sobel_features = extract_sobel_features(resized_image)
    print(image_path, sobel_features.shape, end='\r')

    if sobel_features is not None:
        all_sobel_features.append(sobel_features)

# Convert the list to a NumPy array
all_sobel_features = np.array(all_sobel_features)

# Fit PCA on the collected Sobel features and apply transformation
pca_sobel = PCA(n_components=100)  # Choose appropriate number of components
sobel_features_pca = pca_sobel.fit_transform(all_sobel_features)  # Store the transformed features

# Print the shape of the reduced-dimension data
print(sobel_features_pca.shape)

print(f"PCA model fitted on Sobel features. {sobel_features_pca.shape[0]} samples with {sobel_features_pca.shape[1]} features.")

In [None]:
# header_written = False

# with open('extracted_features_pca.csv', 'w') as csvfile:
#     for index, row in labels_df.iterrows():
#         image_path = os.path.join(image_directory, row['filename'])
#         image = cv2.imread(image_path)

#         # Ensure the image is loaded correctly
#         if image is None:
#             continue
        
#         # Resize the image if needed
#         resized_image = cv2.resize(image, (250, 200))
        
#         # Extract combined features
#         combined_features = extract_combined_features(resized_image, pca)
#         print(image_path, combined_features.shape, end='\r')

#         # Normalize features
#         scaler = StandardScaler()
#         X = scaler.fit_transform(combined_features)
        
#         # Convert features to a DataFrame row with label and filename
#         combined_row = np.append(X, [row['label'], row['filename']])
        
#         # Convert to DataFrame
#         combined_df = pd.DataFrame([combined_row])
        
#         # Write the row to the CSV, writing the header only once
#         if not header_written:
#             combined_df.to_csv(csvfile, header=['feature_' + str(i) for i in range(len(combined_features))] + ['label', 'filename'], index=False, mode='a')
#             header_written = True
#         else:
#             combined_df.to_csv(csvfile, header=False, index=False, mode='a')

# print("Feature extraction completed and saved to CSV.")

In [None]:
def process_image(row_tuple):
    index, row = row_tuple
    image_path = os.path.join(image_directory, row['filename'])
    image = cv2.imread(image_path)

    if image is None:
        return None

    resized_image = cv2.resize(image, (250, 200))

    # Extract combined features
    combined_features = extract_combined_features(resized_image, pca)
    print(image_path, combined_features.shape, end='\r')

    # Return features along with label and filename
    return np.append(combined_features, [row['label'], row['filename']])

In [None]:
features_list = []

with ProcessPoolExecutor() as executor:
    # Directly pass the row tuples from labels_df.iterrows()
    results = list(executor.map(process_image, labels_df.iterrows()))

# Filter out None results (in case any image failed to load)
results = [result for result in results if result is not None]
features_list.extend(results)

# Convert to DataFrame for easier manipulation
features_df = pd.DataFrame(features_list)

# Normalize features using StandardScaler
scaler = StandardScaler()
features_df.iloc[:, :-2] = scaler.fit_transform(features_df.iloc[:, :-2])

# Write to CSV
features_df.columns = ['feature_' + str(i) for i in range(features_df.shape[1] - 2)] + ['label', 'filename']
features_df.to_csv('extracted_features_pca.csv', index=False)

print("Feature extraction completed and saved to CSV.")