In [20]:
import pandas as pd

df = pd.read_csv('jena_climate_2009_2016.csv')

time = pd.to_datetime(df.pop('Date Time'), format='%d.%m.%Y %H:%M:%S')
series = df['T (degC)'][5::6]
series.index = time[5::6]

import numpy as np

# function for generating the lagged matrix
def split_sequence(sequence, window_size):
    X = []
    y = []
    # for all indexes
    for i in range(len(sequence)):
        end_idx = i + window_size
        # exit condition
        if end_idx > len(sequence) - 1:
            break
        # get X and Y values
        seq_x, seq_y = sequence[i:end_idx], sequence[end_idx]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)


train = series[:-int(len(series)/10)]
test = series[-int(len(series)/10):]
X_train, y_train = split_sequence(train, window_size=24)

import statsmodels.api as sm

# train Ordinary Least Squares model
X_train = sm.add_constant(X_train)
model = sm.OLS(y_train, X_train)
result = model.fit()

print(result.summary())

  seq_x, seq_y = sequence[i:end_idx], sequence[end_idx]


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.992
Model:                            OLS   Adj. R-squared:                  0.992
Method:                 Least Squares   F-statistic:                 3.376e+05
Date:                Tue, 04 Mar 2025   Prob (F-statistic):               0.00
Time:                        15:06:51   Log-Likelihood:                -70605.
No. Observations:               63058   AIC:                         1.413e+05
Df Residuals:                   63033   BIC:                         1.415e+05
Df Model:                          24                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0317      0.005      6.986      0.0

In [28]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV

# Load dataset
df = pd.read_csv("resources/color_texture_weight_data.csv")

# Define selected features
selected_features = [
    "Mean_RGB_R", "Mean_RGB_G", "Mean_RGB_B", "Std_RGB_R", "Std_RGB_G", "Std_RGB_B",
    "Mean_LAB_L", "Mean_LAB_A", "Mean_LAB_B", "Std_LAB_L", "Std_LAB_A", "Std_LAB_B",
    "Mean_HSV_H", "Mean_HSV_S", "Mean_HSV_V", "Std_HSV_H", "Std_HSV_S", "Std_HSV_V",
    "Mean_GRAY_Gray", "Std_GRAY_Gray",
    "GLCM_contrast", "GLCM_dissimilarity", "GLCM_homogeneity", "GLCM_energy", "GLCM_correlation", "GLCM_ASM",
    "LBP_0", "LBP_1", "LBP_2", "LBP_3", "LBP_4", "LBP_5", "LBP_6", "LBP_7", "LBP_8", "LBP_9",
    "Yellow", "Cyan", "Magenta", "Brightness", "Chroma",
    "Day", "Temp", "Rep"
]

# Filter dataset for Day 0 to 8
df_filtered = df[df["Day"] <= 8][selected_features + ["%_Weight_Loss"]]

# Identify unique tracking experiments (Temp, Rep)
unique_experiments = df_filtered[["Temp", "Rep"]].drop_duplicates()

# Split 80% training and 20% testing based on unique experiments
train_experiments, test_experiments = train_test_split(
    unique_experiments, test_size=0.2, random_state=42
)

# Create train and test sets based on selected experiments
df_train = df_filtered.merge(train_experiments, on=["Temp", "Rep"])
df_test = df_filtered.merge(test_experiments, on=["Temp", "Rep"])

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(df_train[selected_features])
y_train = df_train["%_Weight_Loss"]

X_test = scaler.transform(df_test[selected_features])
y_test = df_test["%_Weight_Loss"]

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model performance
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)

print(f"Training Score: {train_score}")
print(f"Testing Score: {test_score}")

# Make predictions
y_pred = model.predict(X_test)

# Create a DataFrame for actual vs predicted values
predictions_df = pd.DataFrame({"Actual %_Weight_Loss": y_test.values, "Predicted %_Weight_Loss": y_pred})

# Display predictions
print(predictions_df.head())


Training Score: 0.9747823877305761
Testing Score: 0.894765216952948
   Actual %_Weight_Loss  Predicted %_Weight_Loss
0                   0.0                      0.0
1                   0.0                      0.0
2                   0.0                      0.0
3                   0.0                      0.0
4                   0.0                      0.0


In [27]:
!!python3 -m pip install tensorflow[and-cuda]



In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, Attention
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import keras_tuner as kt
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

# Load dataset
df = pd.read_csv("resources/color_texture_weight_data.csv")

# Define feature groups
feature_groups = {
    "RGB Features": ["Mean_RGB_R", "Mean_RGB_G", "Mean_RGB_B", "Std_RGB_R", "Std_RGB_G", "Std_RGB_B"],
    "LAB Features": ["Mean_LAB_L", "Mean_LAB_A", "Mean_LAB_B", "Std_LAB_L", "Std_LAB_A", "Std_LAB_B"],
    "HSV Features": ["Mean_HSV_H", "Mean_HSV_S", "Mean_HSV_V", "Std_HSV_H", "Std_HSV_S", "Std_HSV_V"],
    "Grayscale Features": ["Mean_GRAY_Gray", "Std_GRAY_Gray"],
    "Texture Features": ["GLCM_contrast", "GLCM_dissimilarity", "GLCM_homogeneity", "GLCM_energy", "GLCM_correlation", "GLCM_ASM"],
    "LBP Features": ["LBP_0", "LBP_1", "LBP_2", "LBP_3", "LBP_4", "LBP_5", "LBP_6", "LBP_7", "LBP_8", "LBP_9"],
    "Color Space Features": ["Yellow", "Cyan", "Magenta", "Brightness", "Chroma"],
    "Environmental Variables": ["Day", "Temp", "Rep"]
}

# Process PCA for each feature group and evaluate importance
important_feature_groups = {}
explained_variances = {}
df_pca_all = pd.DataFrame()

for group, features in feature_groups.items():
    df_group = df[features].dropna()
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(df_group)
    
    pca = PCA(n_components=0.95)  # Retain 95% variance
    X_pca = pca.fit_transform(X_scaled)
    explained_variance = sum(pca.explained_variance_ratio_)
    explained_variances[group] = explained_variance
    
    if explained_variance > 0.05:  # Keep groups explaining significant variance
        important_feature_groups[group] = features
        pca_features = [f"{group}_PC{i+1}" for i in range(X_pca.shape[1])]
        df_pca_group = pd.DataFrame(X_pca, columns=pca_features)
        df_pca_all = pd.concat([df_pca_all, df_pca_group], axis=1)

# Create sequences for LSTM
def create_sequences(data, target_column, time_steps=10):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data.iloc[i:i+time_steps].values)
        y.append(data[target_column].iloc[i+time_steps])
    return np.array(X), np.array(y)

# Prepare dataset for LSTM
df_pca_all["%_Weight_Loss"] = df["%_Weight_Loss"].values
time_steps = 10
X, y = create_sequences(df_pca_all, "%_Weight_Loss", time_steps)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter tuning with Keras Tuner
def build_model(hp):
    model = Sequential([
        Bidirectional(LSTM(
            hp.Int('units1', min_value=64, max_value=256, step=32), activation='relu',
            return_sequences=True, kernel_regularizer=l2(hp.Choice('l2_1', [0.0001, 0.001, 0.01])),
            input_shape=(time_steps, X.shape[2])
        )),
        Dropout(hp.Float('dropout1', min_value=0.2, max_value=0.5, step=0.1)),
        
        Bidirectional(LSTM(
            hp.Int('units2', min_value=32, max_value=128, step=32), activation='relu',
            return_sequences=True, kernel_regularizer=l2(hp.Choice('l2_2', [0.0001, 0.001, 0.01]))
        )),
        Dropout(hp.Float('dropout2', min_value=0.2, max_value=0.5, step=0.1)),
        
        LSTM(
            hp.Int('units3', min_value=16, max_value=64, step=16), activation='relu',
            kernel_regularizer=l2(hp.Choice('l2_3', [0.0001, 0.001, 0.01]))
        ),
        Dropout(hp.Float('dropout3', min_value=0.2, max_value=0.5, step=0.1)),
        
        Dense(1, kernel_regularizer=l2(hp.Choice('l2_dense', [0.0001, 0.001, 0.01])))
    ])
    
    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', [0.0001, 0.0005, 0.001])), loss='mse')
    return model

tuner = kt.RandomSearch(
    build_model, objective='val_loss', max_trials=10, executions_per_trial=1, directory='tuning_results'
)

tuner.search(X_train, y_train, epochs=50, validation_data=(X_test, y_test))
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Train the LSTM model with best hyperparameters
model = tuner.hypermodel.build(best_hps)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(X_train, y_train, epochs=150, batch_size=16, validation_data=(X_test, y_test), callbacks=[reduce_lr, early_stopping])

# Make predictions
y_pred = model.predict(X_test)

# Evaluate model performance
loss = model.evaluate(X_test, y_test)
r2 = r2_score(y_test, y_pred)
print(f"Model Loss (MSE): {loss}")
print(f"R2 Score: {r2}")

Trial 10 Complete [00h 00m 28s]
val_loss: 11.744100570678711

Best val_loss So Far: 9.604065895080566
Total elapsed time: 00h 04m 35s
Epoch 1/150
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 513ms/step - loss: 65.2426 - val_loss: 34.3081 - learning_rate: 5.0000e-04
Epoch 2/150
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 38.3896 - val_loss: 27.2683 - learning_rate: 5.0000e-04
Epoch 3/150
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 34.6017 - val_loss: 17.9158 - learning_rate: 5.0000e-04
Epoch 4/150
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 27.4037 - val_loss: 17.6746 - learning_rate: 5.0000e-04
Epoch 5/150
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 22.8046 - val_loss: 18.7992 - learning_rate: 5.0000e-04
Epoch 6/150
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 13.7079 - val_loss: 

: 