In [2]:
import pandas as pd
import os
import numpy as np
import cv2
from tqdm import tqdm

# Extract Picture Data

## Color Extraction

In [8]:
# Folder containing images
FOLDER_PATH = "resources/images"  # Change this to your folder path
OUTPUT_FILE = "resources/color_data.csv"

# Define color spaces and their channels
COLOR_SPACES = {
    "RGB": (None, ["R", "G", "B"]),
    "LAB": (cv2.COLOR_BGR2LAB, ["L", "A", "B"]),
    "HSV": (cv2.COLOR_BGR2HSV, ["H", "S", "V"]),
    "GRAY": (cv2.COLOR_BGR2GRAY, ["Gray"])
}

def extract_color(image, conversion_code):
    """Convert image to specified color space and return mean & std per channel."""
    img = cv2.cvtColor(image, conversion_code) if conversion_code else image
    # If the image has 3 dimensions (RGB, LAB, HSV), calculate mean and std for each channel
    if img.ndim == 3:
        return np.concatenate([np.mean(img, axis=(0, 1)), np.std(img, axis=(0, 1))]).astype(float)
    # If the image has only 1 channel (GRAY), return mean and std for the grayscale channel
    else:
        return [np.mean(img), np.std(img)]

# Get list of image files
image_files = [f for f in os.listdir(FOLDER_PATH) if f.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'tiff'))]

# Process images with a progress bar
data = []
for file_name in tqdm(image_files, desc="Processing Images", unit="image"):
    image = cv2.imread(os.path.join(FOLDER_PATH, file_name))
    if image is not None:
        row = [file_name] + [val for space, (conv, _) in COLOR_SPACES.itempps() for val in extract_color(image, conv)]
        data.append(row)

# Generate column names dynamically
columns = ["Filename"] + [f"{stat}_{space}_{ch}" for space, (_, chs) in COLOR_SPACES.items() for stat in ["Mean", "Std"] for ch in chs]

# Save results to CSV
pd.DataFrame(data, columns=columns).to_csv(OUTPUT_FILE, index=False)
print(f"\nColor statistics extraction complete! Data saved to {OUTPUT_FILE}")


Processing Images:   0%|          | 0/337 [00:00<?, ?image/s]


AttributeError: 'dict' object has no attribute 'itempps'

## Texture Extraction

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern, hog

# Folder containing images
FOLDER_PATH = "resources/images"
OUTPUT_FILE = "resources/texture_data.csv"

# GLCM Features
GLCM_PROPS = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM']

def extract_glcm_features(image_gray):
    """Extracts GLCM features from grayscale image."""
    glcm = graycomatrix(image_gray, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    return [graycoprops(glcm, prop).flatten()[0] for prop in GLCM_PROPS]

def extract_lbp_features(image_gray):
    """Extracts Local Binary Pattern (LBP) histogram features."""
    lbp = local_binary_pattern(image_gray, P=8, R=1, method="uniform")
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 11), range=(0, 10))
    return hist.astype(float)

def extract_hog_features(image_gray):
    """Extracts Histogram of Oriented Gradients (HOG) features."""
    return hog(image_gray, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True)

# Process all images in the folder
data = []
image_files = [f for f in os.listdir(FOLDER_PATH) if f.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'tiff'))]

for file_name in tqdm(image_files, desc="Extracting Textures", unit="image"):
    image = cv2.imread(os.path.join(FOLDER_PATH, file_name), cv2.IMREAD_GRAYSCALE)
    if image is not None:
        glcm_features = extract_glcm_features(image)
        lbp_features = extract_lbp_features(image)
        hog_features = extract_hog_features(image)[:10]  # Reduce HOG feature size for storage

        row = [file_name] + glcm_features + lbp_features.tolist() + hog_features.tolist()
        data.append(row)

# Generate column names dynamically
columns = ["Filename"] + [f"GLCM_{prop}" for prop in GLCM_PROPS] + [f"LBP_{i}" for i in range(10)] + [f"HOG_{i}" for i in range(10)]

# Save to CSV
pd.DataFrame(data, columns=columns).to_csv(OUTPUT_FILE, index=False)
print(f"\nTexture extraction complete! Data saved to {OUTPUT_FILE}")


# Train and Evaluate

## Load Data

In [3]:
df_color = pd.read_csv('resources/color_data.csv')
df_texture = pd.read_csv('resources/texture_data.csv')
df_weight = pd.read_csv('resources/weight_data.csv')
df_color.sort_values(by='Filename', inplace=True, ignore_index=True)
df_texture.sort_values(by='Filename', inplace=True, ignore_index=True)
df_weight.sort_values(by='Filename', inplace=True, ignore_index=True)
df = pd.merge(df_weight, df_color, on='Filename')
df = pd.merge(df, df_texture, on='Filename')

    
df[['Day', 'Temp', 'Rep']] = df['Filename'].str.extract(r'(\d+)_(\d+)_(\d+)')
df[['Day', 'Temp', 'Rep']] = df[['Day', 'Temp', 'Rep']].astype(float).astype('Int64')
df["Yellow"] = ((df["Mean_RGB_R"] + df["Mean_RGB_G"]) - df["Mean_RGB_B"]) / 2
df["Cyan"] = ((df["Mean_RGB_G"] + df["Mean_RGB_B"]) - df["Mean_RGB_R"]) / 2
df["Magenta"] = df["Mean_RGB_R"] + df["Mean_RGB_B"]
df["Brightness"] = (df["Mean_RGB_R"] + df["Mean_RGB_G"] + df["Mean_RGB_B"]) / 3
df["Chroma"] = df[["Mean_RGB_R", "Mean_RGB_G", "Mean_RGB_B"]].max(axis=1) - df[["Mean_RGB_R", "Mean_RGB_G", "Mean_RGB_B"]].min(axis=1)

df.to_csv('resources/combined_data.csv', index=False)

In [4]:
df.columns

Index(['Filename', 'Weight', '%_Weight_Loss', 'Mean_RGB_R', 'Mean_RGB_G',
       'Mean_RGB_B', 'Std_RGB_R', 'Std_RGB_G', 'Std_RGB_B', 'Mean_LAB_L',
       'Mean_LAB_A', 'Mean_LAB_B', 'Std_LAB_L', 'Std_LAB_A', 'Std_LAB_B',
       'Mean_HSV_H', 'Mean_HSV_S', 'Mean_HSV_V', 'Std_HSV_H', 'Std_HSV_S',
       'Std_HSV_V', 'Mean_GRAY_Gray', 'Std_GRAY_Gray', 'GLCM_contrast',
       'GLCM_dissimilarity', 'GLCM_homogeneity', 'GLCM_energy',
       'GLCM_correlation', 'GLCM_ASM', 'LBP_0', 'LBP_1', 'LBP_2', 'LBP_3',
       'LBP_4', 'LBP_5', 'LBP_6', 'LBP_7', 'LBP_8', 'LBP_9', 'Day', 'Temp',
       'Rep', 'Yellow', 'Cyan', 'Magenta', 'Brightness', 'Chroma'],
      dtype='object')

## Linear Regression

In [5]:
# ======================
# 1) Define your feature groups
# ======================
features = {
    "RGB": [
        "Mean_RGB_R", "Std_RGB_R", "Mean_RGB_G", "Std_RGB_G", "Mean_RGB_B", "Std_RGB_B"
    ],
    "Lab": [
        "Mean_LAB_L", "Std_LAB_L", "Mean_LAB_A", "Std_LAB_A", "Mean_LAB_B", "Std_LAB_B"
    ],
    "HSV": [
        "Mean_HSV_H", "Std_HSV_H", "Mean_HSV_S", "Std_HSV_S", "Mean_HSV_V", "Std_HSV_V"
    ],
    "GRAY": [
        "Mean_GRAY_Gray", "Std_GRAY_Gray"
    ],
    "GLCM": [
        "GLCM_contrast", "GLCM_dissimilarity", "GLCM_homogeneity", 
        "GLCM_energy", "GLCM_correlation"
    ],
    "LBP": [
        "LBP_0", "LBP_1", "LBP_2", "LBP_3", "LBP_4", "LBP_5", "LBP_6", "LBP_7", "LBP_8", "LBP_9"
    ],
    "Temp": ["Temp"],
    "Yellow": ["Yellow"],
    "Cyan": ["Cyan"],
    "Magenta": ["Magenta"],
    "Brightness": ["Brightness"],
    "Chroma": ["Chroma"],
    "Day": ["Day"],
}

## Train and Test

### 0. Function for Process Image

In [4]:
import cv2

import numpy as np

def color_extractor(image_path):
    """Extract mean and std color statistics for a single image and return as a dictionary with name tags."""

    # Define color spaces and their channels
    COLOR_SPACES = {
        "RGB": (None, ["R", "G", "B"]),
        "LAB": (cv2.COLOR_BGR2LAB, ["L", "A", "B"]),
        "HSV": (cv2.COLOR_BGR2HSV, ["H", "S", "V"]),
        "GRAY": (cv2.COLOR_BGR2GRAY, ["Gray"])
    }

    def extract_color(image, conversion_code):
        """Convert image to specified color space and return mean & std per channel."""
        img = cv2.cvtColor(image, conversion_code) if conversion_code else image
        # If the image has 3 dimensions (RGB, LAB, HSV), calculate mean and std for each channel
        if img.ndim == 3:
            return np.concatenate([np.mean(img, axis=(0, 1)), np.std(img, axis=(0, 1))]).astype(float)
        # If the image has only 1 channel (GRAY), return mean and std for the grayscale channel
        else:
            return [np.mean(img), np.std(img)]

    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")

    # Initialize the dictionary to store results with name tags
    color_stats_dict = {}

    # Extract color statistics for each color space
    rgb_means = None
    for space, (conv, channels) in COLOR_SPACES.items():
        stats = extract_color(image, conv)
        for i, channel in enumerate(channels):
            color_stats_dict[f"Mean_{space}_{channel}"] = float(stats[i])  # Convert to float
            color_stats_dict[f"Std_{space}_{channel}"] = float(stats[i + len(channels)])  # Convert to float
            if space == "RGB" and channel == "B":  # Make sure we capture RGB values
                rgb_means = stats[:3]  # Store the RGB means (R, G, B)

    # If RGB means are available, calculate the additional color features
    if rgb_means is not None:
        R, G, B = rgb_means
        # Compute additional color features and ensure they are float
        color_stats_dict["Yellow"] = float((R + G - B) / 2)
        color_stats_dict["Cyan"] = float((G + B - R) / 2)
        color_stats_dict["Magenta"] = float(R + B)
        color_stats_dict["Brightness"] = float((R + G + B) / 3)
        color_stats_dict["Chroma"] = float(max(R, G, B) - min(R, G, B))

    return color_stats_dict


In [5]:
import cv2
import numpy as np
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern, hog

# GLCM Features
GLCM_PROPS = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM']

def extract_glcm_features(image_gray):
    """Extracts GLCM features from grayscale image."""
    glcm = graycomatrix(image_gray, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    return {f"GLCM_{prop}": float(graycoprops(glcm, prop).flatten()[0]) for prop in GLCM_PROPS}  # Convert np.float64 to float

def extract_lbp_features(image_gray):
    """Extracts Local Binary Pattern (LBP) histogram features."""
    lbp = local_binary_pattern(image_gray, P=8, R=1, method="uniform")
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 11), range=(0, 10))
    return {f"LBP_{i}": float(hist[i]) for i in range(10)}  # Convert all LBP values to float

def texture_extractor(image_path):
    """Extract texture features from a single image and return as a dictionary with labels and values."""
    
    # Read the image in grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")

    # Extract texture features
    glcm_features = extract_glcm_features(image)
    lbp_features = extract_lbp_features(image)

    # Combine features into a single dictionary
    texture_features = {**glcm_features, **lbp_features}

    return texture_features


### 1. Train

In [8]:

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import joblib
import pandas as pd
import itertools


In [2]:
import joblib

class ModelLoader:
    def __init__(self, model, scaler=None, feature_names=None):
        self.model = model
        self.scaler = scaler
        self.feature_names = feature_names

    def fit(self, X, y):
        if self.scaler:
            X_scaled = self.scaler.fit_transform(X)
        else:
            X_scaled = X  # If no scaler, use raw data
        self.model.fit(X_scaled, y)

    def predict(self, X):
        if self.scaler:
            X_scaled = self.scaler.transform(X)
        else:
            X_scaled = X  # If no scaler, use raw data
        return self.model.predict(X_scaled)

    def save(self, filepath):
        """Save the model, scaler, and feature names with Joblib"""
        with open(filepath, "wb") as f:
            joblib.dump({
                "model": self.model,
                "scaler": self.scaler,
                "feature_names": self.feature_names
            }, f)
        print(f"✅ Model saved at {filepath}")

    @classmethod
    def load(cls, filepath):
        """Load the model with its scaler and feature names"""
        try:
            with open(filepath, "rb") as f:
                data = joblib.load(f)
            return cls(
                model=data["model"],
                scaler=data["scaler"],
                feature_names=data["feature_names"]
            )
        except Exception as e:
            print(f"❌ Error loading model: {e}")
            return None


In [10]:
import itertools
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import joblib
from tqdm import tqdm
import logging

# ตั้งค่า logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()

# สมมติว่า ModelLoader ถูกกำหนดไว้แล้ว
# สมมติว่ามี DataFrame ชื่อ df และ column เป้าหมายคือ "%_Weight_Loss"

# กำหนด feature groups
features = {
    "RGB": ["Mean_RGB_R", "Std_RGB_R", "Mean_RGB_G", "Std_RGB_G", "Mean_RGB_B", "Std_RGB_B"],
    "Lab": ["Mean_LAB_L", "Std_LAB_L", "Mean_LAB_A", "Std_LAB_A", "Mean_LAB_B", "Std_LAB_B"],
    "HSV": ["Mean_HSV_H", "Std_HSV_H", "Mean_HSV_S", "Std_HSV_S", "Mean_HSV_V", "Std_HSV_V"],
    "GRAY": ["Mean_GRAY_Gray", "Std_GRAY_Gray"],
    "GLCM": ["GLCM_contrast", "GLCM_dissimilarity", "GLCM_homogeneity", "GLCM_energy", "GLCM_correlation"],
    "LBP": ["LBP_0", "LBP_1", "LBP_2", "LBP_3", "LBP_4", "LBP_5", "LBP_6", "LBP_7", "LBP_8", "LBP_9"],
    "Temp": ["Temp"],
    "Yellow": ["Yellow"],
    "Cyan": ["Cyan"],
    "Magenta": ["Magenta"],
    "Brightness": ["Brightness"],
    "Chroma": ["Chroma"],
    "Day": ["Day"],
}

# สร้างรายชื่อกลุ่ม feature
feature_groups = list(features.keys())

# เตรียมข้อมูล
selected_cols = [col for group in features.values() for col in group]
X = df[selected_cols]
y = df["%_Weight_Loss"]

# แบ่งข้อมูล train-test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# บันทึก performance
performance_records = []

# ค่าที่ดีที่สุดเริ่มต้น
best_model = None
best_score = -float('inf')

logger.info("เริ่มกระบวนการฝึกโมเดลโดยใช้ feature group combinations...")

# วนลูปตามกลุ่ม feature
for r in tqdm(range(1, len(feature_groups) + 1), desc="Group Combinations", unit="group"):
    for group_combination in tqdm(itertools.combinations(feature_groups, r), desc="Training Models", unit="model", leave=False):
        selected_features = [feature for group in group_combination for feature in features[group]]

        logger.info(f"ฝึกโมเดลโดยใช้กลุ่ม feature: {group_combination}")

        # เลือก feature ตามกลุ่มที่กำหนด
        X_train_comb = X_train[selected_features]
        X_test_comb = X_test[selected_features]

        # สร้าง model
        model = LinearRegression()
        scaler = StandardScaler()
        model_with_features = ModelLoader(model, scaler, selected_features)

        try:
            # Train model
            model_with_features.fit(X_train_comb, y_train)

            # คำนวณค่า metric
            r2_train = model_with_features.model.score(scaler.transform(X_train_comb), y_train)
            y_pred_test = model_with_features.predict(X_test_comb)
            r2_test = r2_score(y_test, y_pred_test)
            mse_test = mean_squared_error(y_test, y_pred_test)

            # บันทึกผลลัพธ์
            performance_records.append({
                'Feature Group': group_combination,
                'R2_Train': r2_train,
                'R2_Test': r2_test,
                'MSE_Test': mse_test
            })

            logger.info(f"Model {group_combination} -> R2_Test: {r2_test:.4f}, MSE_Test: {mse_test:.4f}")

            # อัปเดต best model
            if r2_test > best_score:
                best_score = r2_test
                best_model = model_with_features

        except Exception as e:
            logger.error(f"เกิดข้อผิดพลาดในการฝึกโมเดลสำหรับ {group_combination}: {e}")

# บันทึกผลลัพธ์ลงไฟล์ CSV
performance_df = pd.DataFrame(performance_records)
performance_df.to_csv('output/model_performance.csv', index=False)

# บันทึกโมเดลที่ดีที่สุด
if best_model:
    best_model.save('output/best_model.pkl')
    logger.info("บันทึก best model ไปที่ 'output/best_model.pkl' สำเร็จ")
else:
    logger.warning("ไม่พบโมเดลที่ดีที่สุด")


2025-03-02 16:35:43,463 - INFO - เริ่มกระบวนการฝึกโมเดลโดยใช้ feature group combinations...
Group Combinations:   0%|          | 0/13 [00:00<?, ?group/s]2025-03-02 16:35:43,469 - INFO - ฝึกโมเดลโดยใช้กลุ่ม feature: ('RGB',)
2025-03-02 16:35:43,479 - INFO - Model ('RGB',) -> R2_Test: 0.6934, MSE_Test: 15.9050
2025-03-02 16:35:43,480 - INFO - ฝึกโมเดลโดยใช้กลุ่ม feature: ('Lab',)
2025-03-02 16:35:43,485 - INFO - Model ('Lab',) -> R2_Test: 0.7820, MSE_Test: 11.3078
2025-03-02 16:35:43,485 - INFO - ฝึกโมเดลโดยใช้กลุ่ม feature: ('HSV',)
2025-03-02 16:35:43,490 - INFO - Model ('HSV',) -> R2_Test: 0.5906, MSE_Test: 21.2346
2025-03-02 16:35:43,491 - INFO - ฝึกโมเดลโดยใช้กลุ่ม feature: ('GRAY',)
2025-03-02 16:35:43,498 - INFO - Model ('GRAY',) -> R2_Test: 0.5780, MSE_Test: 21.8906
2025-03-02 16:35:43,498 - INFO - ฝึกโมเดลโดยใช้กลุ่ม feature: ('GLCM',)
2025-03-02 16:35:43,504 - INFO - Model ('GLCM',) -> R2_Test: 0.5356, MSE_Test: 24.0904
2025-03-02 16:35:43,504 - INFO - ฝึกโมเดลโดยใช้กลุ่ม featu

✅ Model saved at output/best_model.pkl


In [None]:
import joblib
loaded_model = ModelLoader.load('output/best_model.pkl')
print("Feature names used:", loaded_model.feature_names)

color_data = color_extractor('resources/images/2_10_6.png')
texture_data= texture_extractor('resources/images/2_10_6.png')
data = {**color_data, **texture_data, "Temp": 10, "Day": 6}
print(data)

Feature names used: ['Mean_LAB_L', 'Std_LAB_L', 'Mean_LAB_A', 'Std_LAB_A', 'Mean_LAB_B', 'Std_LAB_B', 'Mean_HSV_H', 'Std_HSV_H', 'Mean_HSV_S', 'Std_HSV_S', 'Mean_HSV_V', 'Std_HSV_V', 'Mean_GRAY_Gray', 'Std_GRAY_Gray', 'GLCM_contrast', 'GLCM_dissimilarity', 'GLCM_homogeneity', 'GLCM_energy', 'GLCM_correlation', 'LBP_0', 'LBP_1', 'LBP_2', 'LBP_3', 'LBP_4', 'LBP_5', 'LBP_6', 'LBP_7', 'LBP_8', 'LBP_9', 'Magenta', 'Brightness']
{'Mean_RGB_R': 208.98544109473607, 'Std_RGB_R': 57.94502078034905, 'Mean_RGB_G': 216.9807366183484, 'Std_RGB_G': 40.520221546853385, 'Mean_RGB_B': 214.41141625654404, 'Std_RGB_B': 46.40129917906915, 'Mean_LAB_L': 219.45911463936102, 'Std_LAB_L': 40.311268901598304, 'Mean_LAB_A': 125.65788640051277, 'Std_LAB_A': 6.3675931665973975, 'Mean_LAB_B': 131.99258383982357, 'Std_LAB_B': 10.745423124052909, 'Mean_HSV_H': 58.43734687634466, 'Std_HSV_H': 61.97962283891342, 'Mean_HSV_S': 17.724611896245698, 'Std_HSV_S': 41.88510025268844, 'Mean_HSV_V': 217.86581386931655, 'Std_HSV

In [11]:
for feature in loaded_model.feature_names:
    if feature not in data:
        data[feature] = None
data = {feature: data.get(feature, None) for feature in loaded_model.feature_names}
print(data)

{'Mean_LAB_L': 219.45911463936102, 'Std_LAB_L': 40.311268901598304, 'Mean_LAB_A': 125.65788640051277, 'Std_LAB_A': 6.3675931665973975, 'Mean_LAB_B': 131.99258383982357, 'Std_LAB_B': 10.745423124052909, 'Mean_HSV_H': 58.43734687634466, 'Std_HSV_H': 61.97962283891342, 'Mean_HSV_S': 17.724611896245698, 'Std_HSV_S': 41.88510025268844, 'Mean_HSV_V': 217.86581386931655, 'Std_HSV_V': 40.555092973645586, 'Mean_GRAY_Gray': 215.30469506373709, 'Std_GRAY_Gray': 44.04511401883166, 'GLCM_contrast': 33.49108095566873, 'GLCM_dissimilarity': 1.7650954054732615, 'GLCM_homogeneity': 0.6980660415107351, 'GLCM_energy': 0.30064626476243356, 'GLCM_correlation': 0.9914208211744174, 'LBP_0': 415233.0, 'LBP_1': 897636.0, 'LBP_2': 288535.0, 'LBP_3': 1180634.0, 'LBP_4': 984857.0, 'LBP_5': 1825841.0, 'LBP_6': 962754.0, 'LBP_7': 1218903.0, 'LBP_8': 8315832.0, 'LBP_9': 1758095.0, 'Magenta': 423.3968573512801, 'Brightness': 213.45919798987617}


In [10]:
import pandas as pd
loaded_model.predict(pd.DataFrame([data]))

array([5.06389931])