In [14]:
import numpy as np
from skimage.feature.texture import graycomatrix, graycoprops
import matplotlib.pyplot as plt
import os
from concurrent.futures import ProcessPoolExecutor, as_completed

cwd = os.getcwd()

dataset_partition = '/media/iittp/new volume'
tiles_dir = 'multispectral_validation_tiles/'
dataset_dir = os.path.join(dataset_partition, tiles_dir)
features_dir = os.path.join(cwd, 'multispectral_validation_glcm_features/')
os.makedirs(features_dir, exist_ok=True)

classes = [os.path.splitext(f)[0] for f in os.listdir(dataset_dir) if f.endswith('.npz')]

In [15]:
classes

['banana', 'elephant_foot', 'paddy', 'turmeric']

Firstly we will extract some GLCM features

In [16]:
# Function to load .npz file
def load_images(npz_file):
    data = np.load(npz_file)
    images = data['tiles']
    return images

# Function to extract GLCM features from a single image
def extract_glcm_features(image, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4]):
    # image needs to be in uint8 format for graycomatrix

    glcm = graycomatrix(image, distances=distances, angles=angles, symmetric=True, normed=True)

    features = {
        'contrast': graycoprops(glcm, 'contrast'),
        'dissimilarity': graycoprops(glcm, 'dissimilarity'),
        'homogeneity': graycoprops(glcm, 'homogeneity'),
        'correlation': graycoprops(glcm, 'correlation'),
        'energy': graycoprops(glcm, 'energy'),
        'entropy': -np.sum(glcm * np.log2(glcm + 1e-10)),
        'variance': np.var(glcm),
        'max_prob': np.max(glcm),
    }

    # Flatten and return all feature values
    feature_values = []
    for prop in features.values():
        feature_values.extend(prop.flatten())

    return feature_values

We will add some manually calculated features like NDVI, NDRE, GNDVI, EVI, CHLORO, etc

In [17]:
def calculate_ndvi(NIR, Red):
    """Calculate NDVI from NIR and Red bands."""
    return (NIR - Red) / (NIR + Red + 1e-4)
def calculate_ndre(NIR, RE):
    """Calculate NDRE from NIR and Red Edge bands."""
    return (NIR - RE) / (NIR + RE + 1e-4)
def calculate_gndvi(NIR, Green):
    """Calculate GNDVI from NIR and Green bands."""
    return (NIR - Green) / (NIR + Green + 1e-4)
def calculate_savi(NIR, Red, L=0.5):
    """Calculate SAVI from NIR and Red bands with soil adjustment factor L."""
    return ((NIR - Red) * (1 + L)) / (NIR + Red + L)
def calculate_evi2(NIR, Red, G=2.5, C1=6, L=10000):
    """Calculate EVI2 from NIR and Red bands."""
    return G * (NIR - Red) / (NIR + C1 * Red + L + 1e-4)
def calculate_cvi(NIR, Green):
    """Calculate CVI from NIR and Green bands."""
    return NIR / (Green + 1e-4)

def calculate_vegetation_indices(g, r, re, nir):
    """Calculate a set of vegetation indices based on available bands."""
    features = {}
    
    features['NDVI'] = calculate_ndvi(nir, r)
    features['NDRE'] = calculate_ndre(nir, re)
    features['GNDVI'] = calculate_gndvi(nir, g)
    features['SAVI'] = calculate_savi(nir, r)
    features['EVI2'] = calculate_evi2(nir, r)
    features['CVI'] = calculate_cvi(nir, g)

    return list(features.values())

In [None]:
# Function to process all images in a .npz file and extract all features
def process_images(npz_file):
    # Load images from the .npz file
    images = load_images(npz_file)
    
    # Number of images in the dataset
    N = images.shape[0]
    
    all_features = []
    feature_length = None  # We will define a fixed feature length after the first image

    # Process each image
    for i in range(N):
        image = images[i]
        H, W, C = image.shape

        # Create a list to store GLCM features for all channels (G, R, RE, NIR)
        image_features = []

        # Loop through all 4 channels (G, R, RE, NIR) which are indices 0, 1, 2, 3
        for channel_idx in range(C):
            # Extract the current channel
            channel = image[:, :, channel_idx]

            # Extract GLCM features from this channel
            glcm_features = extract_glcm_features(channel)

            # Append the features for this channel to the image's feature list
            image_features.extend(glcm_features)  # Add the feature vector of this channel to the image's feature list

        num_glcm_features = len(image_features)

        # Vegetation indices (ensure the same length for all images)
        g, r, re, nir = (image[:,:,i] for i in range(C))
        vegetative_features = calculate_vegetation_indices(g, r, re, nir)
        image_features.extend(vegetative_features)

        num_vegetative_features = len(vegetative_features)

        #print(f"GLCM: {num_glcm_features}, VEGE: {num_vegetative_features}")
        
        # If it's the first image, set the feature length to compare against
        if feature_length is None:
            feature_length = len(image_features)
            print(f"GLCM: {num_glcm_features}, VEGE: {num_vegetative_features}")

        # Ensure all feature vectors are the same length
        if len(image_features) != feature_length:
            print(f"Warning: Feature length mismatch for image {i}. Padding with zeros.")
            # If feature length is smaller, pad with zeros
            image_features.extend([0] * (feature_length - len(image_features)))
            print(f"GLCM: {num_glcm_features}, VEGE: {num_vegetative_features}")
        
        # Append the features for this image to the all_features list
        all_features.append(image_features)
    
    # Convert to a numpy array (each row is a set of features for one image)
    return np.array(all_features)

: 

In [None]:
for cls in classes:
    dataset_path = os.path.join(dataset_dir, f"{cls}.npz")
    features = process_images(dataset_path)

    outpath = os.path.join(features_dir, f'{cls}.npz')
    
    np.savez_compressed(outpath, features=features)
    print(f"Class: {cls}, Features Shape:", features.shape)

GLCM: 92, VEGE: 6
