In [2]:
import pandas as pd
import os
import numpy as np
import cv2
from tqdm import tqdm

# Extract Picture Data

## Color Extraction

In [None]:
# Folder containing images
FOLDER_PATH = "resources/images"  # Change this to your folder path
OUTPUT_FILE = "resources/color_data.csv"

# Define color spaces and their channels
COLOR_SPACES = {
    "RGB": (None, ["R", "G", "B"]),
    "LAB": (cv2.COLOR_BGR2LAB, ["L", "A", "B"]),
    "HSV": (cv2.COLOR_BGR2HSV, ["H", "S", "V"]),
    "GRAY": (cv2.COLOR_BGR2GRAY, ["Gray"])
}

def extract_color(image, conversion_code):
    """Convert image to specified color space and return mean & std per channel."""
    img = cv2.cvtColor(image, conversion_code) if conversion_code else image
    # If the image has 3 dimensions (RGB, LAB, HSV), calculate mean and std for each channel
    if img.ndim == 3:
        return np.concatenate([np.mean(img, axis=(0, 1)), np.std(img, axis=(0, 1))]).astype(float)
    # If the image has only 1 channel (GRAY), return mean and std for the grayscale channel
    else:
        return [np.mean(img), np.std(img)]

# Get list of image files
image_files = [f for f in os.listdir(FOLDER_PATH) if f.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'tiff'))]

# Process images with a progress bar
data = []
for file_name in tqdm(image_files, desc="Processing Images", unit="image"):
    image = cv2.imread(os.path.join(FOLDER_PATH, file_name))
    if image is not None:
        row = [file_name] + [val for space, (conv, _) in COLOR_SPACES.itempps() for val in extract_color(image, conv)]
        data.append(row)

# Generate column names dynamically
columns = ["Filename"] + [f"{stat}_{space}_{ch}" for space, (_, chs) in COLOR_SPACES.items() for stat in ["Mean", "Std"] for ch in chs]

# Save results to CSV
pd.DataFrame(data, columns=columns).to_csv(OUTPUT_FILE, index=False)
print(f"\nColor statistics extraction complete! Data saved to {OUTPUT_FILE}")


## Texture Extraction

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern, hog

# Folder containing images
FOLDER_PATH = "resources/images"
OUTPUT_FILE = "resources/texture_data.csv"

# GLCM Features
GLCM_PROPS = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM']

def extract_glcm_features(image_gray):
    """Extracts GLCM features from grayscale image."""
    glcm = graycomatrix(image_gray, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    return [graycoprops(glcm, prop).flatten()[0] for prop in GLCM_PROPS]

def extract_lbp_features(image_gray):
    """Extracts Local Binary Pattern (LBP) histogram features."""
    lbp = local_binary_pattern(image_gray, P=8, R=1, method="uniform")
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 11), range=(0, 10))
    return hist.astype(float)

def extract_hog_features(image_gray):
    """Extracts Histogram of Oriented Gradients (HOG) features."""
    return hog(image_gray, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True)

# Process all images in the folder
data = []
image_files = [f for f in os.listdir(FOLDER_PATH) if f.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'tiff'))]

for file_name in tqdm(image_files, desc="Extracting Textures", unit="image"):
    image = cv2.imread(os.path.join(FOLDER_PATH, file_name), cv2.IMREAD_GRAYSCALE)
    if image is not None:
        glcm_features = extract_glcm_features(image)
        lbp_features = extract_lbp_features(image)
        hog_features = extract_hog_features(image)[:10]  # Reduce HOG feature size for storage

        row = [file_name] + glcm_features + lbp_features.tolist() + hog_features.tolist()
        data.append(row)

# Generate column names dynamically
columns = ["Filename"] + [f"GLCM_{prop}" for prop in GLCM_PROPS] + [f"LBP_{i}" for i in range(10)] + [f"HOG_{i}" for i in range(10)]

# Save to CSV
pd.DataFrame(data, columns=columns).to_csv(OUTPUT_FILE, index=False)
print(f"\nTexture extraction complete! Data saved to {OUTPUT_FILE}")


# Train and Evaluate

## Load Data

In [2]:
df_color = pd.read_csv('resources/color_data.csv')
df_texture = pd.read_csv('resources/texture_data.csv')
df_weight = pd.read_csv('resources/weight_loss_data.csv')
df_color.sort_values(by='Filename', inplace=True, ignore_index=True)
df_texture.sort_values(by='Filename', inplace=True, ignore_index=True)
df_weight.sort_values(by='Filename', inplace=True, ignore_index=True)
df = pd.merge(df_weight, df_color, on='Filename')
df = pd.merge(df, df_texture, on='Filename')

    
df[['Day', 'Temp', 'Rep']] = df['Filename'].str.extract(r'(\d+)_(\d+)_(\d+)')
df[['Day', 'Temp', 'Rep']] = df[['Day', 'Temp', 'Rep']].astype(float).astype('Int64')
df["Yellow"] = ((df["Mean_RGB_R"] + df["Mean_RGB_G"]) - df["Mean_RGB_B"]) / 2
df["Cyan"] = ((df["Mean_RGB_G"] + df["Mean_RGB_B"]) - df["Mean_RGB_R"]) / 2
df["Magenta"] = df["Mean_RGB_R"] + df["Mean_RGB_B"]
df["Brightness"] = (df["Mean_RGB_R"] + df["Mean_RGB_G"] + df["Mean_RGB_B"]) / 3
df["Chroma"] = df[["Mean_RGB_R", "Mean_RGB_G", "Mean_RGB_B"]].max(axis=1) - df[["Mean_RGB_R", "Mean_RGB_G", "Mean_RGB_B"]].min(axis=1)

df.to_csv('resources/combined_data.csv', index=False)

In [3]:
df.columns

Index(['Filename', 'Weight', '%_Weight_Loss', 'Mean_RGB_R', 'Mean_RGB_G',
       'Mean_RGB_B', 'Std_RGB_R', 'Std_RGB_G', 'Std_RGB_B', 'Mean_LAB_L',
       'Mean_LAB_A', 'Mean_LAB_B', 'Std_LAB_L', 'Std_LAB_A', 'Std_LAB_B',
       'Mean_HSV_H', 'Mean_HSV_S', 'Mean_HSV_V', 'Std_HSV_H', 'Std_HSV_S',
       'Std_HSV_V', 'Mean_GRAY_Gray', 'Std_GRAY_Gray', 'GLCM_contrast',
       'GLCM_dissimilarity', 'GLCM_homogeneity', 'GLCM_energy',
       'GLCM_correlation', 'GLCM_ASM', 'LBP_0', 'LBP_1', 'LBP_2', 'LBP_3',
       'LBP_4', 'LBP_5', 'LBP_6', 'LBP_7', 'LBP_8', 'LBP_9', 'Day', 'Temp',
       'Rep', 'Yellow', 'Cyan', 'Magenta', 'Brightness', 'Chroma'],
      dtype='object')

## Linear Regression

In [4]:
# ======================
# 1) Define your feature groups
# ======================
features = {
    "RGB": [
        "Mean_RGB_R", "Std_RGB_R", "Mean_RGB_G", "Std_RGB_G", "Mean_RGB_B", "Std_RGB_B"
    ],
    "Lab": [
        "Mean_LAB_L", "Std_LAB_L", "Mean_LAB_A", "Std_LAB_A", "Mean_LAB_B", "Std_LAB_B"
    ],
    "HSV": [
        "Mean_HSV_H", "Std_HSV_H", "Mean_HSV_S", "Std_HSV_S", "Mean_HSV_V", "Std_HSV_V"
    ],
    "GRAY": [
        "Mean_GRAY_Gray", "Std_GRAY_Gray"
    ],
    "GLCM": [
        "GLCM_contrast", "GLCM_dissimilarity", "GLCM_homogeneity", 
        "GLCM_energy", "GLCM_correlation"
    ],
    "LBP": [
        "LBP_0", "LBP_1", "LBP_2", "LBP_3", "LBP_4", "LBP_5", "LBP_6", "LBP_7", "LBP_8", "LBP_9"
    ],
    "Temp": ["Temp"],
    "Yellow": ["Yellow"],
    "Cyan": ["Cyan"],
    "Magenta": ["Magenta"],
    "Brightness": ["Brightness"],
    "Chroma": ["Chroma"],
    "Day": ["Day"],
}

## Train and Test

### 0. Function for Process Image

In [5]:
import cv2
import numpy as np

def color_extractor(image_path):
    """Extract mean and std color statistics for a single image and return as a dictionary with name tags."""

    # Define color spaces and their channels
    COLOR_SPACES = {
        "RGB": (None, ["R", "G", "B"]),
        "LAB": (cv2.COLOR_BGR2LAB, ["L", "A", "B"]),
        "HSV": (cv2.COLOR_BGR2HSV, ["H", "S", "V"]),
        "GRAY": (cv2.COLOR_BGR2GRAY, ["Gray"])
    }

    def extract_color(image, conversion_code):
        """Convert image to specified color space and return mean & std per channel."""
        img = cv2.cvtColor(image, conversion_code) if conversion_code else image
        # If the image has 3 dimensions (RGB, LAB, HSV), calculate mean and std for each channel
        if img.ndim == 3:
            return np.concatenate([np.mean(img, axis=(0, 1)), np.std(img, axis=(0, 1))]).astype(float)
        # If the image has only 1 channel (GRAY), return mean and std for the grayscale channel
        else:
            return [np.mean(img), np.std(img)]

    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")

    # Initialize the dictionary to store results with name tags
    color_stats_dict = {}

    # Extract color statistics for each color space
    rgb_means = None
    for space, (conv, channels) in COLOR_SPACES.items():
        stats = extract_color(image, conv)
        for i, channel in enumerate(channels):
            color_stats_dict[f"Mean_{space}_{channel}"] = float(stats[i])  # Convert to float
            color_stats_dict[f"Std_{space}_{channel}"] = float(stats[i + len(channels)])  # Convert to float
            if space == "RGB" and channel == "B":  # Make sure we capture RGB values
                rgb_means = stats[:3]  # Store the RGB means (R, G, B)

    # If RGB means are available, calculate the additional color features
    if rgb_means is not None:
        R, G, B = rgb_means
        # Compute additional color features and ensure they are float
        color_stats_dict["Yellow"] = float((R + G - B) / 2)
        color_stats_dict["Cyan"] = float((G + B - R) / 2)
        color_stats_dict["Magenta"] = float(R + B)
        color_stats_dict["Brightness"] = float((R + G + B) / 3)
        color_stats_dict["Chroma"] = float(max(R, G, B) - min(R, G, B))

    return color_stats_dict


In [6]:
import cv2
import numpy as np
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern, hog

# GLCM Features
GLCM_PROPS = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM']

def extract_glcm_features(image_gray):
    """Extracts GLCM features from grayscale image."""
    glcm = graycomatrix(image_gray, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    return {f"GLCM_{prop}": float(graycoprops(glcm, prop).flatten()[0]) for prop in GLCM_PROPS}  # Convert np.float64 to float

def extract_lbp_features(image_gray):
    """Extracts Local Binary Pattern (LBP) histogram features."""
    lbp = local_binary_pattern(image_gray, P=8, R=1, method="uniform")
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 11), range=(0, 10))
    return {f"LBP_{i}": float(hist[i]) for i in range(10)}  # Convert all LBP values to float

def texture_extractor(image_path):
    """Extract texture features from a single image and return as a dictionary with labels and values."""
    
    # Read the image in grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")

    # Extract texture features
    glcm_features = extract_glcm_features(image)
    lbp_features = extract_lbp_features(image)

    # Combine features into a single dictionary
    texture_features = {**glcm_features, **lbp_features}

    return texture_features


### 1. Train

In [7]:

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import joblib
import pandas as pd
import itertools


In [8]:
class ModelWithFeatures:
    def __init__(self, model, scaler, feature_names):
        self.model = model
        self.scaler = scaler
        self.feature_names = feature_names

    def fit(self, X, y):
        X_scaled = self.scaler.fit_transform(X)
        self.model.fit(X_scaled, y)

    def predict(self, X):
        X_scaled = self.scaler.transform(X)
        return self.model.predict(X_scaled)

    def save(self, filepath):
        # Save the model, scaler, and feature names
        joblib.dump(self, filepath)

    @classmethod
    def load(cls, filepath):
        return joblib.load(filepath)

In [3]:
import itertools
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import joblib
from tqdm import tqdm
import logging

# ตั้งค่า logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()

# สมมติว่า ModelWithFeatures ถูกกำหนดไว้แล้ว
# สมมติว่ามี DataFrame ชื่อ df และ column เป้าหมายคือ "%_Weight_Loss"

# กำหนด feature groups
features = {
    "RGB": ["Mean_RGB_R", "Std_RGB_R", "Mean_RGB_G", "Std_RGB_G", "Mean_RGB_B", "Std_RGB_B"],
    "Lab": ["Mean_LAB_L", "Std_LAB_L", "Mean_LAB_A", "Std_LAB_A", "Mean_LAB_B", "Std_LAB_B"],
    "HSV": ["Mean_HSV_H", "Std_HSV_H", "Mean_HSV_S", "Std_HSV_S", "Mean_HSV_V", "Std_HSV_V"],
    "GRAY": ["Mean_GRAY_Gray", "Std_GRAY_Gray"],
    "GLCM": ["GLCM_contrast", "GLCM_dissimilarity", "GLCM_homogeneity", "GLCM_energy", "GLCM_correlation"],
    "LBP": ["LBP_0", "LBP_1", "LBP_2", "LBP_3", "LBP_4", "LBP_5", "LBP_6", "LBP_7", "LBP_8", "LBP_9"],
    "Temp": ["Temp"],
    "Yellow": ["Yellow"],
    "Cyan": ["Cyan"],
    "Magenta": ["Magenta"],
    "Brightness": ["Brightness"],
    "Chroma": ["Chroma"],
    "Day": ["Day"],
}

# สร้างรายชื่อกลุ่ม feature
feature_groups = list(features.keys())

# เตรียมข้อมูล
selected_cols = [col for group in features.values() for col in group]
X = df[selected_cols]
y = df["%_Weight_Loss"]

# แบ่งข้อมูล train-test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# บันทึก performance
performance_records = []

# ค่าที่ดีที่สุดเริ่มต้น
best_model = None
best_score = -float('inf')

logger.info("เริ่มกระบวนการฝึกโมเดลโดยใช้ feature group combinations...")

# วนลูปตามกลุ่ม feature
for r in tqdm(range(1, len(feature_groups) + 1), desc="Group Combinations", unit="group"):
    for group_combination in tqdm(itertools.combinations(feature_groups, r), desc="Training Models", unit="model", leave=False):
        selected_features = [feature for group in group_combination for feature in features[group]]

        logger.info(f"ฝึกโมเดลโดยใช้กลุ่ม feature: {group_combination}")

        # เลือก feature ตามกลุ่มที่กำหนด
        X_train_comb = X_train[selected_features]
        X_test_comb = X_test[selected_features]

        # สร้าง model
        model = LinearRegression()
        scaler = StandardScaler()
        model_with_features = ModelWithFeatures(model, scaler, selected_features)

        try:
            # Train model
            model_with_features.fit(X_train_comb, y_train)

            # คำนวณค่า metric
            r2_train = model_with_features.model.score(scaler.transform(X_train_comb), y_train)
            y_pred_test = model_with_features.predict(X_test_comb)
            r2_test = r2_score(y_test, y_pred_test)
            mse_test = mean_squared_error(y_test, y_pred_test)

            # บันทึกผลลัพธ์
            performance_records.append({
                'Feature Group': group_combination,
                'R2_Train': r2_train,
                'R2_Test': r2_test,
                'MSE_Test': mse_test
            })

            logger.info(f"Model {group_combination} -> R2_Test: {r2_test:.4f}, MSE_Test: {mse_test:.4f}")

            # อัปเดต best model
            if r2_test > best_score:
                best_score = r2_test
                best_model = model_with_features

        except Exception as e:
            logger.error(f"เกิดข้อผิดพลาดในการฝึกโมเดลสำหรับ {group_combination}: {e}")

# บันทึกผลลัพธ์ลงไฟล์ CSV
performance_df = pd.DataFrame(performance_records)
performance_df.to_csv('model_performance.csv', index=False)

# บันทึกโมเดลที่ดีที่สุด
if best_model:
    best_model.save('output/best_model.pkl')
    logger.info("บันทึก best model ไปที่ 'output/best_model.pkl' สำเร็จ")
else:
    logger.warning("ไม่พบโมเดลที่ดีที่สุด")


NameError: name 'df' is not defined

In [None]:
loaded_model = ModelWithFeatures.load('output/model.pkl')
print("Feature names used:", loaded_model.feature_names)

color_data = color_extractor('resources/images/0_20_5.png')
texture_data= texture_extractor('resources/images/0_20_5.png')
data = {**color_data, **texture_data, "Temp": 20, "Day": 0}
print(data)

Feature names used: ['Mean_RGB_R', 'Std_RGB_R', 'Mean_RGB_G', 'Std_RGB_G', 'Mean_RGB_B', 'Std_RGB_B', 'Mean_LAB_L', 'Std_LAB_L', 'Mean_LAB_A', 'Std_LAB_A', 'Mean_LAB_B', 'Std_LAB_B', 'Mean_HSV_H', 'Std_HSV_H', 'Mean_HSV_S', 'Std_HSV_S', 'Mean_HSV_V', 'Std_HSV_V', 'Mean_GRAY_Gray', 'Std_GRAY_Gray', 'GLCM_contrast', 'GLCM_dissimilarity', 'GLCM_homogeneity', 'GLCM_energy', 'GLCM_correlation', 'LBP_0', 'LBP_1', 'LBP_2', 'LBP_3', 'LBP_4', 'LBP_5', 'LBP_6', 'LBP_7', 'LBP_8', 'LBP_9', 'Temp', 'Yellow', 'Cyan', 'Magenta', 'Brightness', 'Chroma', 'Day']
{'Mean_RGB_R': 197.63762544598035, 'Std_RGB_R': 72.37631332056615, 'Mean_RGB_G': 209.1521779640885, 'Std_RGB_G': 52.2815443680636, 'Mean_RGB_B': 204.96656839411216, 'Std_RGB_B': 59.52331688780798, 'Mean_LAB_L': 211.24206311854562, 'Std_LAB_L': 52.17639228085439, 'Mean_LAB_A': 124.40548768735657, 'Std_LAB_A': 7.3633681196553455, 'Mean_LAB_B': 133.7470580984653, 'Std_LAB_B': 12.262759959173898, 'Mean_HSV_H': 50.377384370069564, 'Std_HSV_H': 55.247

In [None]:
for feature in loaded_model.feature_names:
    if feature not in data:
        data[feature] = None
data = {feature: data.get(feature, None) for feature in loaded_model.feature_names}
print(data)


{'Mean_RGB_R': 197.63762544598035, 'Std_RGB_R': 72.37631332056615, 'Mean_RGB_G': 209.1521779640885, 'Std_RGB_G': 52.2815443680636, 'Mean_RGB_B': 204.96656839411216, 'Std_RGB_B': 59.52331688780798, 'Mean_LAB_L': 211.24206311854562, 'Std_LAB_L': 52.17639228085439, 'Mean_LAB_A': 124.40548768735657, 'Std_LAB_A': 7.3633681196553455, 'Mean_LAB_B': 133.7470580984653, 'Std_LAB_B': 12.262759959173898, 'Mean_HSV_H': 50.377384370069564, 'Std_HSV_H': 55.24777561809316, 'Mean_HSV_S': 26.771953662865748, 'Std_HSV_S': 51.623198702150475, 'Mean_HSV_V': 209.88857315422405, 'Std_HSV_V': 52.38797213100764, 'Mean_GRAY_Gray': 206.5764447298121, 'Std_GRAY_Gray': 56.531852008036616, 'GLCM_contrast': 27.928156159212374, 'GLCM_dissimilarity': 1.800941837631362, 'GLCM_homogeneity': 0.6779457129401885, 'GLCM_energy': 0.3581718393771594, 'GLCM_correlation': 0.9956542055843305, 'LBP_0': 452916.0, 'LBP_1': 863278.0, 'LBP_2': 359750.0, 'LBP_3': 1109475.0, 'LBP_4': 1140688.0, 'LBP_5': 1590486.0, 'LBP_6': 943676.0, 'L

In [None]:
loaded_model.predict(pd.DataFrame([data]))

array([17.20768328])