In [12]:
import os
import numpy as np
from skimage.io import imread
from skimage.transform import resize

In [13]:
from skimage.feature import graycomatrix, graycoprops
from skimage.color import rgb2gray
from skimage import img_as_ubyte, feature
# print(skimage.__version__)
def extract_features(image):

    #Convert it to a gray image
    image_gray = rgb2gray(image)
    # print(image_gray.shape)

    # Rescale the image to 0-255 and convert to uint8
    image_gray = img_as_ubyte(image_gray)

    # Compute the GLCM and Haralick texture features
    glcm = graycomatrix(image_gray, distances=[5], angles=[0], levels=256, symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    dissimilarity = graycoprops(glcm, 'dissimilarity')[0, 0]
    homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
    energy = graycoprops(glcm, 'energy')[0, 0]
    correlation = graycoprops(glcm, 'correlation')[0, 0]
    haralick_features = np.array([contrast, dissimilarity, homogeneity, energy, correlation])

    # Compute the HOG features
    hog_features = feature.hog(image_gray,orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys')

    # Combine the Haralick and HOG features into a single feature vector
    features = np.concatenate((haralick_features, hog_features))

    return features

In [14]:
def load_images_from_folder(folder_name, categories, image_size=(150, 150, 3)):
    """
    Load images from a specified folder, resize them to the same size, and flatten them.
    
    Args:
    folder_name (str): The folder in which the images are stored.
    categories (list of str): The categories of images.
    image_size (tuple): The size to which to resize the images.

    Returns:
    flat_data_arr (list): The list of flattened, resized images.
    target_arr (list): The list of target categories for each image.
    """
    flat_data_arr = []
    target_arr = []
    
    for i, category in enumerate(categories):
        print(f'Loading... category: {category}')
        category_path = os.path.join(folder_name, category)
        
        if not os.path.isdir(category_path):
            print(f'Category path {category_path} not found')
            continue

        for img_file in os.listdir(category_path):
            img_file_path = os.path.join(category_path, img_file)
            
            try:
                img_array = imread(img_file_path)
                img_resized = resize(img_array, image_size)
                features = extract_features(img_resized)
                flat_data_arr.append(features)
                target_arr.append(i)
                # flat_data_arr.append(img_resized.flatten())
                # target_arr.append(i)
            except:
                print(f'Error occurred while processing file {img_file_path}')
                continue

        print(f'Loaded category: {category} successfully')
    
    return flat_data_arr, target_arr , img_resized

In [15]:
# Usage:
categories = ['covid', 'normal']
flat_data, target, img_resized = load_images_from_folder('dataset_18', categories)
flat_data = np.array(flat_data)
target = np.array(target)

Loading... category: covid
Loaded category: covid successfully
Loading... category: normal
Loaded category: normal successfully


In [23]:
from sklearn.model_selection import train_test_split
# Splitting the data into training and testing sets
x_train,x_test,y_train,y_test=train_test_split(flat_data,target,test_size=0.3,
                                               random_state=1,
                                               stratify=target)

x_test,x_val,y_test,y_val = train_test_split(x_test,y_test,test_size=0.5,random_state=1,stratify=y_test)                                

In [25]:
y_val

array([0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1])