## Step0

In [33]:
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
import xgboost as xgb
import lightgbm as lgb
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
import warnings

warnings.filterwarnings("ignore")

# Load Dataset
df = pd.read_csv("resources/color_texture_weight_data.csv")

# Define feature groups
feature_groups = {
    "RGB": ["Mean_RGB_R", "Mean_RGB_G", "Mean_RGB_B", "Std_RGB_R", "Std_RGB_G", "Std_RGB_B"],
    "LAB": ["Mean_LAB_L", "Mean_LAB_A", "Mean_LAB_B", "Std_LAB_L", "Std_LAB_A", "Std_LAB_B"],
    "HSV": ["Mean_HSV_H", "Mean_HSV_S", "Mean_HSV_V", "Std_HSV_H", "Std_HSV_S", "Std_HSV_V"],
    "GLCM": ['GLCM_contrast', 'GLCM_dissimilarity','GLCM_homogeneity', 'GLCM_energy', 'GLCM_correlation', 'GLCM_ASM'],
    "LBP": ['LBP_0', 'LBP_1', 'LBP_2', 'LBP_3', 'LBP_4', 'LBP_5', 'LBP_6', 'LBP_7', 'LBP_8', 'LBP_9'],
    "Yellow": ["Yellow"],
    "Cyan": ["Cyan"],
    "Magenta": ["Magenta"],
    "Brightness": ["Brightness"],
    "Chroma": ["Chroma"],
}


feature_comb = []
for i in range(1, len(feature_groups) + 1):
    for combination in itertools.combinations(feature_groups.keys(), i):
        selected_features = [feature for group in combination for feature in feature_groups[group]]
        feature_comb.append((combination, selected_features))

# Group data by (Rep, Temp) to track weight loss over time
grouped_data = df.groupby(["Rep", "Temp", "Day"]).agg({
    "%_Weight_Loss": "mean",
    **{feature: "mean" for feature in df.columns if feature not in ["Rep", "Temp", "Day", "Filename"]}
}).reset_index()

print("Number of feature combinations:", len(feature_comb))

Number of feature combinations: 1023


In [15]:
df = pd.read_csv('resources/color_texture_weight_data.csv')
df.drop(columns=['Filename','Weight'], inplace=True)
df.columns

Index(['%_Weight_Loss', 'Mean_RGB_R', 'Mean_RGB_G', 'Mean_RGB_B', 'Std_RGB_R',
       'Std_RGB_G', 'Std_RGB_B', 'Mean_LAB_L', 'Mean_LAB_A', 'Mean_LAB_B',
       'Std_LAB_L', 'Std_LAB_A', 'Std_LAB_B', 'Mean_HSV_H', 'Mean_HSV_S',
       'Mean_HSV_V', 'Std_HSV_H', 'Std_HSV_S', 'Std_HSV_V', 'Mean_GRAY_Gray',
       'Std_GRAY_Gray', 'GLCM_contrast', 'GLCM_dissimilarity',
       'GLCM_homogeneity', 'GLCM_energy', 'GLCM_correlation', 'GLCM_ASM',
       'LBP_0', 'LBP_1', 'LBP_2', 'LBP_3', 'LBP_4', 'LBP_5', 'LBP_6', 'LBP_7',
       'LBP_8', 'LBP_9', 'Day', 'Temp', 'Rep', 'Yellow', 'Cyan', 'Magenta',
       'Brightness', 'Chroma'],
      dtype='object')

## Step1

In [None]:
def train_arima(y_train, y_test):
    try:
        model = ARIMA(y_train, order=(5, 1, 0)).fit()
        pred = model.forecast(steps=len(y_test))
        return r2_score(y_test, pred)
    except:
        return None

In [None]:
def train_sarima(y_train, y_test):
    try:
        model = SARIMAX(y_train, order=(1, 1, 1), seasonal_order=(1, 1, 1, 7)).fit()
        pred = model.forecast(steps=len(y_test))
        return r2_score(y_test, pred)
    except:
        return None

In [None]:
def train_lstm(y_train, y_test):
    try:
        scaler = MinMaxScaler()
        y_train_scaled = scaler.fit_transform(np.array(y_train).reshape(-1, 1))
        y_test_scaled = scaler.transform(np.array(y_test).reshape(-1, 1))

        X_train_lstm, y_train_lstm = y_train_scaled[:-1], y_train_scaled[1:]
        X_test_lstm, y_test_lstm = y_test_scaled[:-1], y_test_scaled[1:]

        X_train_lstm = X_train_lstm.reshape((X_train_lstm.shape[0], X_train_lstm.shape[1], 1))
        X_test_lstm = X_test_lstm.reshape((X_test_lstm.shape[0], X_test_lstm.shape[1], 1))

        model = Sequential([
            LSTM(50, activation='relu', return_sequences=True, input_shape=(X_train_lstm.shape[1], 1)),
            LSTM(50, activation='relu'),
            Dense(1)
        ])
        model.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
        model.fit(X_train_lstm, y_train_lstm, epochs=50, verbose=0, batch_size=8)

        pred = model.predict(X_test_lstm)
        pred = scaler.inverse_transform(pred)
        return r2_score(y_test[1:], pred.flatten())
    except:
        return None

In [None]:
def train_xgb_lgb(X_train, X_test, y_train, y_test, model_cls):
    try:
        model = model_cls(objective="reg:squarederror")
        model.fit(X_train, y_train)
        pred = model.predict(X_test)
        return r2_score(y_test, pred)
    except:
        return None


In [None]:
# Results storage
results = []
best_results = []

# Iterate through each (Rep, Temp) group
for (rep, temp), sub_df in grouped_data.groupby(["Rep", "Temp"]):

    best_model = None
    best_r2 = -np.inf  # Set a low starting value

    for feature_group, selected_features in all_feature_combinations:
        sub_df = sub_df.sort_values("Day")
        X = sub_df[selected_features]
        y = sub_df["%_Weight_Loss"]

        train_size = int(len(sub_df) * 0.8)
        X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
        y_train, y_test = y.iloc[:train_size], y.iloc[train_size:]

        model_scores = {"Rep": rep, "Temp": temp, "Feature_Group": feature_group}

        # Train models
        model_scores["ARIMA"] = train_arima(y_train, y_test)
        model_scores["SARIMA"] = train_sarima(y_train, y_test)
        model_scores["LSTM"] = train_lstm(y_train, y_test)
        model_scores["XGBoost"] = train_xgb_lgb(X_train, X_test, y_train, y_test, xgb.XGBRegressor)
        model_scores["LightGBM"] = train_xgb_lgb(X_train, X_test, y_train, y_test, lgb.LGBMRegressor)

        # Save results
        results.append(model_scores)

        # Identify the best model
        for model_name in ["ARIMA", "SARIMA", "LSTM", "Prophet", "XGBoost", "LightGBM"]:
            if model_scores[model_name] is not None and model_scores[model_name] > best_r2:
                best_r2 = model_scores[model_name]
                best_model = {**model_scores, "Best_Model": model_name, "Best_R2": best_r2}

    if best_model:
        best_results.append(best_model)

# Convert results to DataFrame and save
results_df = pd.DataFrame(results)
results_df.to_csv("forecasting_results.csv", index=False)

best_results_df = pd.DataFrame(best_results)
best_results_df.to_csv("best_forecasting_results.csv", index=False)

print("Forecasting completed! Results saved to 'forecasting_results.csv' and 'best_forecasting_results.csv'")