In [2]:
import os
import pandas as pd
import numpy as np
import cv2
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
import re

# === Helper: Geometric Feature Extractor ===
def extract_geometric_features(frame_np):
    # Optional crop to remove noisy borders
    frame_np = frame_np[1:, 1:]

    # Replace 0s with NaN
    frame_np[frame_np == 0] = np.nan
    imputer = SimpleImputer(strategy="mean")
    frame_np_imputed = imputer.fit_transform(frame_np)

    # Thresholding for hot regions
    mean_val = np.mean(frame_np_imputed)
    std_val = np.std(frame_np_imputed)
    threshold_val = mean_val + 1.5 * std_val
    binary_mask = (frame_np_imputed > threshold_val).astype(np.uint8)

    # Contour detection
    contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    num_blobs = len(contours)
    if num_blobs == 0:
        return [0, 0, 0, 0, 0, 0, 0]

    areas = [cv2.contourArea(c) for c in contours]
    largest_blob = max(contours, key=cv2.contourArea)
    largest_blob_area = cv2.contourArea(largest_blob)
    mean_blob_area = np.mean(areas)

    eccentricity = extent = solidity = orientation = 0

    if len(largest_blob) >= 5:
        ellipse = cv2.fitEllipse(largest_blob)
        (center, axes, angle) = ellipse
        major, minor = max(axes), min(axes)
        if major != 0:
            eccentricity = np.sqrt(1 - (minor / major) ** 2)
        orientation = angle

    x, y, w, h = cv2.boundingRect(largest_blob)
    rect_area = w * h
    extent = largest_blob_area / rect_area if rect_area else 0

    hull = cv2.convexHull(largest_blob)
    hull_area = cv2.contourArea(hull)
    solidity = largest_blob_area / hull_area if hull_area else 0

    return [
        num_blobs,
        largest_blob_area,
        mean_blob_area,
        eccentricity,
        extent,
        solidity,
        orientation
    ]

# === Main Feature Extraction ===

# Path to your thermal frame folder
frames_dir = "/Users/beyzabalota/Desktop/ENS_Graduation_Project-devBerke/data/images"


def extract_number(f):
    match = re.search(r'Frame_(\d+)\.csv', f)
    return int(match.group(1)) if match else -1

frame_files = sorted(
    [f for f in os.listdir(frames_dir) if f.startswith("Frame_") and f.endswith(".csv")],
    key=extract_number
)


feature_list = []

print(f"📂 Total Frames found: {len(frame_files)}")

for file_name in frame_files:
    file_path = os.path.join(frames_dir, file_name)
    frame_data = pd.read_csv(file_path, header=None)

    if frame_data.shape[0] == 0 or frame_data.shape[1] == 0:
        print(f"⚠️ Skipping {file_name} – Empty frame")
        continue

    # Convert to numpy and clean
    frame_np = frame_data.to_numpy(dtype=np.float32)
    frame_np[frame_np == 0] = np.nan
    imputer = SimpleImputer(strategy="mean")
    frame_np_imputed = imputer.fit_transform(frame_np)

    # === Statistical features ===
    min_temp = np.min(frame_np_imputed)
    max_temp = np.max(frame_np_imputed)
    mean_temp = np.mean(frame_np_imputed)
    std_temp = np.std(frame_np_imputed)
    median_temp = np.median(frame_np_imputed)
    q1 = np.percentile(frame_np_imputed, 25)
    q3 = np.percentile(frame_np_imputed, 75)
    iqr = q3 - q1
    skewness = skew(frame_np_imputed.flatten())
    kurt = kurtosis(frame_np_imputed.flatten())
    high_temp_pixels = np.sum(frame_np_imputed > (mean_temp + 1.5 * std_temp))

    # === Geometric features ===
    geo_features = extract_geometric_features(frame_np)

    # === Append to list ===
    feature_list.append([
        file_name,
        min_temp, max_temp, mean_temp, std_temp, median_temp,
        q1, q3, iqr, skewness, kurt, high_temp_pixels,
        *geo_features
    ])

# === Save as DataFrame ===
columns = [
    "Frame", "Min_Temp", "Max_Temp", "Mean_Temp", "Std_Temp", "Median_Temp",
    "Q1", "Q3", "IQR", "Skewness", "Kurtosis", "High_Temp_Pixels",
    "Num_Blobs", "Largest_Blob_Area", "Mean_Blob_Area",
    "Eccentricity", "Extent", "Solidity", "Orientation"
]

df_features = pd.DataFrame(feature_list, columns=columns)
df_features.to_csv("thermal_geometric_features.csv", index=False)
print("✅ All features extracted. Saved as thermal_geometric_features.csv")


📂 Total Frames found: 1564
✅ All features extracted. Saved as thermal_geometric_features.csv
