In [28]:
import matplotlib.pyplot as plt
import numpy as np
import cv2 
import pandas as pd
import shutil
import os 
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from glob import glob
from tqdm import tqdm
from skimage.feature import hog, local_binary_pattern
from PIL import Image

In [6]:
resnet18 = models.resnet18(pretrained=True)
# Remove the fully connected layer to get feature vectors
resnet18 = torch.nn.Sequential(*list(resnet18.children())[:-1])
resnet18.eval()  # Set the model to evaluation mode

# Step 2: Define Image Preprocessing
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize image to 224x224
    transforms.ToTensor(),          # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])



In [4]:
dataDF = pd.read_csv("mixed_dataset_v1.csv")
print(dataDF.shape)
dataDF.head()

(2147, 4)


Unnamed: 0,filename,set,is_augmented,label
0,22_png.rf.31843c8a3d74795b58f1e718b5eed556.jpg,test,0,1
1,pose07_1_2_3.jpg,test,0,1
2,pose07_1_3_2.jpg,test,0,1
3,pose07_1_3_3.jpg,test,0,1
4,pose07_2_2_2.jpg,test,0,1


## Traditional methods

### Histogram of Oriented Gradients (HOG)

In [26]:
num_hog_features = 324
feature_columns = {f'hog_{i+1}': None for i in range(num_hog_features)}
hog_feature_df = pd.DataFrame(feature_columns, index=dataDF.index)

for idx, row in tqdm(dataDF.iterrows(), total=len(dataDF)):
    image_path = os.path.join("mixed_datasets", row["set"], "irex" if row["label"] == 1 else "others", row["filename"])

    # Load the image and convert to grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    resized_image = cv2.resize(image, (64, 64))
    hog_features, hog_image = hog(
        resized_image,
        orientations=9,  # Number of gradient orientations
        pixels_per_cell=(16, 16),  # Size of cell
        cells_per_block=(2, 2),  # Number of cells per block
        block_norm='L2-Hys',  # Block normalization
        visualize=True,  # Output HOG image
        feature_vector=True,  # Return features as a vector
        )
    
    # Assign Features to DataFrame
    for i, feature in enumerate(hog_features):
        column_name = f"hog_{i+1}"
        if column_name in hog_feature_df.columns:  # Check if the column exists
            hog_feature_df.at[idx, column_name] = feature  # Assign feature to the appropriate column

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2147/2147 [00:30<00:00, 69.93it/s]


In [27]:
hog_feature_df.head()

Unnamed: 0,hog_1,hog_2,hog_3,hog_4,hog_5,hog_6,hog_7,hog_8,hog_9,hog_10,...,hog_315,hog_316,hog_317,hog_318,hog_319,hog_320,hog_321,hog_322,hog_323,hog_324
0,0.238798,0.2066,0.238798,0.085922,0.094069,0.034114,0.028349,0.032334,0.178798,0.17969,...,0.113392,0.068071,0.087588,0.190218,0.075091,0.023938,0.017551,0.028246,0.025814,0.022683
1,0.105851,0.01003,0.033357,0.044995,0.074824,0.032313,0.0323,0.020061,0.010733,0.200561,...,0.023561,0.152509,0.050195,0.024987,0.007851,0.015462,0.007737,0.01355,0.022147,0.085864
2,0.063727,0.015057,0.020632,0.029595,0.046573,0.015193,0.030233,0.020654,0.00995,0.082676,...,0.035893,0.104215,0.039217,0.060119,0.061684,0.39137,0.057378,0.022726,0.041927,0.039418
3,0.096622,0.028979,0.04891,0.069154,0.103628,0.033462,0.0607,0.03516,0.018661,0.120624,...,0.12836,0.302174,0.062293,0.059455,0.094062,0.124957,0.041884,0.117814,0.122488,0.182452
4,0.047135,0.024314,0.024849,0.028498,0.039671,0.017994,0.021846,0.01525,0.001476,0.06503,...,0.019637,0.055286,0.016827,0.042773,0.029667,0.06137,0.018784,0.030997,0.009802,0.013862


### Local Binary Patterns (LBP)

In [37]:
num_lbp_features = 25
feature_columns = {f'lbp_{i+1}': None for i in range(num_lbp_features)}
lbp_feature_df = pd.DataFrame(feature_columns, index=dataDF.index)

for idx, row in tqdm(dataDF.iterrows(), total=len(dataDF)):
    image_path = os.path.join("mixed_datasets", row["set"], "irex" if row["label"] == 1 else "others", row["filename"])

    # Load the image and convert to grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    # Parameters for LBP
    radius = 3  # Radius of the circular neighborhood
    n_points = 8 * radius  # Number of points in the circular neighborhood
    lbp = local_binary_pattern(image, n_points, radius, method='uniform')
    n_bins = 25
    lbp_histogram, _ = np.histogram(lbp.ravel(), bins=n_bins, range=(0, n_bins))
    
    # Normalize the histogram
    lbp_histogram = lbp_histogram.astype("float")
    lbp_histogram /= (lbp_histogram.sum() + 1e-6)

    # Assign Features to DataFrame
    for i, feature in enumerate(lbp_histogram):
        column_name = f"lbp_{i+1}"
        if column_name in lbp_feature_df.columns:  # Check if the column exists
            lbp_feature_df.at[idx, column_name] = feature  # Assign feature to the appropriate column

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2147/2147 [01:24<00:00, 25.49it/s]


In [38]:
lbp_feature_df.head()

Unnamed: 0,lbp_1,lbp_2,lbp_3,lbp_4,lbp_5,lbp_6,lbp_7,lbp_8,lbp_9,lbp_10,...,lbp_16,lbp_17,lbp_18,lbp_19,lbp_20,lbp_21,lbp_22,lbp_23,lbp_24,lbp_25
0,0.036676,0.015209,0.017526,0.015434,0.013447,0.013036,0.013007,0.014729,0.017226,0.021559,...,0.031695,0.019352,0.023703,0.015943,0.016555,0.016249,0.018237,0.019196,0.015365,0.390446
1,0.011245,0.00501,0.006512,0.008269,0.008882,0.012505,0.01154,0.014267,0.019543,0.026662,...,0.042108,0.032932,0.03526,0.02227,0.022553,0.021554,0.017925,0.017509,0.008153,0.260338
2,0.012129,0.005386,0.006547,0.009159,0.0098,0.013452,0.011574,0.014371,0.01971,0.026402,...,0.041368,0.033457,0.038138,0.024235,0.02327,0.023674,0.020069,0.020011,0.009107,0.278141
3,0.012158,0.005547,0.006368,0.008708,0.009887,0.014128,0.013481,0.017335,0.022669,0.030499,...,0.042062,0.032839,0.035023,0.022692,0.022397,0.021317,0.017295,0.016775,0.007616,0.241974
4,0.012874,0.00579,0.006778,0.009142,0.009592,0.013747,0.011384,0.013545,0.018693,0.024478,...,0.039721,0.032643,0.039161,0.024923,0.026286,0.026078,0.022276,0.023368,0.010222,0.322063


## Extract Resnet18 features

In [39]:
num_re18_features = 512
feature_columns = {f'R18_{i+1}': None for i in range(num_re18_features)}
r18_feature_df = pd.DataFrame(feature_columns, index=dataDF.index)

for idx, row in tqdm(dataDF.iterrows(), total=len(dataDF)):
    image_path = os.path.join("mixed_datasets", row["set"], "irex" if row["label"] == 1 else "others", row["filename"])
    image = Image.open(image_path).convert("RGB")  # Open the image
    input_tensor = preprocess(image).unsqueeze(0)  # Preprocess and add batch dimension
    
    with torch.no_grad():
        features = resnet18(input_tensor).squeeze().numpy()  # Extract and convert to NumPy array

    # Assign Features to DataFrame
    for i, feature in enumerate(features):
        column_name = f"R18_{i+1}"
        if column_name in r18_feature_df.columns:  # Check if the column exists
            r18_feature_df.at[idx, column_name] = feature  # Assign feature to the appropriate column

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2147/2147 [01:14<00:00, 28.81it/s]


In [40]:
r18_feature_df.head()

Unnamed: 0,R18_1,R18_2,R18_3,R18_4,R18_5,R18_6,R18_7,R18_8,R18_9,R18_10,...,R18_503,R18_504,R18_505,R18_506,R18_507,R18_508,R18_509,R18_510,R18_511,R18_512
0,0.895254,1.66316,2.212232,1.54954,0.689803,0.645096,3.274418,0.210356,0.63256,0.632855,...,0.913866,0.524153,2.012202,1.042136,1.226978,0.258365,0.838587,0.108025,0.944895,0.414115
1,0.110175,0.458025,0.221522,2.122657,0.188917,0.664596,3.099096,0.339622,1.500386,0.945414,...,0.109936,0.047731,1.073908,1.094541,5.629246,0.0,0.070865,1.159087,0.58677,0.57986
2,0.089826,0.52339,0.419507,2.412418,0.08944,0.088878,1.607852,0.03349,0.949751,0.70368,...,0.204381,0.188164,0.494481,2.225845,5.9375,0.0,0.040817,0.642517,0.456618,0.256816
3,0.382237,0.361276,0.309558,2.206474,0.300386,0.36134,1.673388,0.03704,1.713342,0.971622,...,0.136055,0.144617,0.95204,1.355797,4.59034,0.087288,0.089996,1.743022,0.648363,0.576937
4,0.347494,0.131341,1.05267,1.545299,0.409324,0.619162,2.021487,0.033948,0.830327,0.813862,...,0.07489,0.5611,0.283085,1.32558,3.964945,0.0,0.225154,0.1803,0.003969,0.152804


## Merge all features

In [41]:
finalDF = pd.concat([dataDF, hog_feature_df, lbp_feature_df, r18_feature_df], axis=1)
finalDF.head()

Unnamed: 0,filename,set,is_augmented,label,hog_1,hog_2,hog_3,hog_4,hog_5,hog_6,...,R18_503,R18_504,R18_505,R18_506,R18_507,R18_508,R18_509,R18_510,R18_511,R18_512
0,22_png.rf.31843c8a3d74795b58f1e718b5eed556.jpg,test,0,1,0.238798,0.2066,0.238798,0.085922,0.094069,0.034114,...,0.913866,0.524153,2.012202,1.042136,1.226978,0.258365,0.838587,0.108025,0.944895,0.414115
1,pose07_1_2_3.jpg,test,0,1,0.105851,0.01003,0.033357,0.044995,0.074824,0.032313,...,0.109936,0.047731,1.073908,1.094541,5.629246,0.0,0.070865,1.159087,0.58677,0.57986
2,pose07_1_3_2.jpg,test,0,1,0.063727,0.015057,0.020632,0.029595,0.046573,0.015193,...,0.204381,0.188164,0.494481,2.225845,5.9375,0.0,0.040817,0.642517,0.456618,0.256816
3,pose07_1_3_3.jpg,test,0,1,0.096622,0.028979,0.04891,0.069154,0.103628,0.033462,...,0.136055,0.144617,0.95204,1.355797,4.59034,0.087288,0.089996,1.743022,0.648363,0.576937
4,pose07_2_2_2.jpg,test,0,1,0.047135,0.024314,0.024849,0.028498,0.039671,0.017994,...,0.07489,0.5611,0.283085,1.32558,3.964945,0.0,0.225154,0.1803,0.003969,0.152804


In [42]:
finalDF.to_csv("mixed_dataset_w_features_v1.csv", index=False)