# Haralick Features Extraction - JMUBEN Dataset

In [1]:
import os, cv2
import numpy as np
import pandas as pd
import mahotas as mt
import matplotlib as plt
from tqdm import tqdm

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
DATASETS_FOLDER = "./datasets"
PATH_JMUBEN = DATASETS_FOLDER + "/jmuben"
PATH_JMUBEN_GRAYSCALE = PATH_JMUBEN + "/grayscale"

## 1. Defining DataFrame to save features

In [4]:
columns = ["name"] + ["H" + str(i) for i in range(1,14)] + ["target"]
df = pd.DataFrame(columns=columns)
df.head()

Unnamed: 0,name,H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,target


## 2. Extracting Haralick Features from images

### 2.1. Defining function to extract Haralick Features

In [5]:
def extract_haralick_features(image):
    """Extract Haralick Features from image."""
    
    textures = mt.features.haralick(image)

    ht_mean = textures.mean(axis=0)
    return ht_mean

### 2.2. Extracting Haralick Features

In [6]:
directories = os.listdir(PATH_JMUBEN_GRAYSCALE)

In [7]:
for directory in directories:
    root = f"{PATH_JMUBEN_GRAYSCALE}/{directory}"
    files = next(os.walk(root))[2]
    files = sorted(files)
    
    for file in tqdm(files, "Extracting Haralick Features"):
        file_info = []
        file_info.append(file)
        
        image = cv2.imread(f"{root}/{file}")
        hfeatures = extract_haralick_features(image)
        file_info.extend(hfeatures)
        
        file_info.append(directory)
        
        df_new_line = pd.DataFrame([file_info], columns=columns)
        df = df.append(df_new_line)
    
    print(f"Extracted Haralick Features from all images from {directory} folder!")

Extracting Haralick Features: 100%|███████████████████████████████████████████████| 16978/16978 [08:59<00:00, 31.49it/s]


Extracted Haralick Features from all images from Miner folder!


Extracting Haralick Features: 100%|█████████████████████████████████████████████████| 6571/6571 [03:31<00:00, 31.03it/s]


Extracted Haralick Features from all images from Phoma folder!


Extracting Haralick Features: 100%|███████████████████████████████████████████████| 18983/18983 [07:28<00:00, 42.34it/s]


Extracted Haralick Features from all images from Healthy folder!


Extracting Haralick Features: 100%|█████████████████████████████████████████████████| 8336/8336 [03:19<00:00, 41.69it/s]


Extracted Haralick Features from all images from Leaf rust folder!


Extracting Haralick Features: 100%|█████████████████████████████████████████████████| 7681/7681 [03:47<00:00, 33.71it/s]

Extracted Haralick Features from all images from Cerscospora folder!





In [8]:
df.to_csv(f"{PATH_JMUBEN}/haralick_features.csv", header=True, index=False)

### 2.3. Visualizing DataFrame

In [9]:
df.head()

Unnamed: 0,name,H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,target
0,1 (1).jpg,0.001586,93.192069,0.96045,1177.657526,0.291168,249.064033,4617.438034,7.718249,10.967639,0.000635,3.740615,-0.391,0.995172,Miner
0,1 (10000).jpg,0.002062,97.0083,0.939763,805.227808,0.26165,211.017844,3123.902932,7.249024,10.589788,0.000597,3.729357,-0.336786,0.98596,Miner
0,1 (10001).jpg,0.000928,226.929118,0.967796,3523.245958,0.252188,295.117979,13866.054714,8.235735,11.816346,0.000534,4.198673,-0.393656,0.996458,Miner
0,1 (10002).jpg,0.002441,112.890236,0.986898,4306.632913,0.327462,302.522615,17113.641414,8.173147,11.235603,0.000619,3.749459,-0.458315,0.998801,Miner
0,1 (10003).jpg,0.000959,218.905658,0.965931,3213.701018,0.246127,281.627154,12635.898413,8.181094,11.783294,0.00053,4.202501,-0.385315,0.995845,Miner
