In [24]:
import pandas as pd
import os

# --- Constants for CWSI Calculation ---
# Lower Limit (LL) of the temperature difference (Tobj - Tair) for non-stressed conditions
LOWER_LIMIT = -2.06

# Upper Limit (UL) of the temperature difference (Tobj - Tair) for maximum stress/no-transpiration
UPPER_LIMIT = -1.33

def calculate_cwsi(filepath):
    """
    Reads a CSV file, calculates the CWSI, adds it as a *new* column 
    (CWSI), and saves the modified DataFrame back to the original filepath, 
    preserving all existing columns.

    CWSI Formula: CWSI = (dT - LL) / (UL - LL)
    Where:
        dT is (Object_Temp - Ambient_Temp)
        LL is LOWER_LIMIT (-2.06)
        UL is UPPER_LIMIT (-1.33)
    """
    print(f"Starting CWSI calculation for file: '{filepath}'")
    
    if not os.path.exists(filepath):
        print(f"Error: Input file not found at '{filepath}'")
        return

    try:
        # Load the CSV file into a pandas DataFrame
        df = pd.read_csv(filepath)
        
        # Check if the DataFrame has at least 2 columns
        if df.shape[1] < 2:
            print("Error: The CSV must contain at least two columns for temperature data.")
            return

        # Identify the columns: second-to-last is Ambient, last is Object
        ambient_temp_col_index = df.columns[-2]
        object_temp_col_index = df.columns[-1]
        
        # Print identified columns for user confirmation
        print(f"Identified Ambient Temperature Column: '{ambient_temp_col_index}'")
        print(f"Identified Object Temperature Column: '{object_temp_col_index}'")

        # Ensure the columns are numeric before calculation
        ambient_temps = pd.to_numeric(df[ambient_temp_col_index], errors='coerce')
        object_temps = pd.to_numeric(df[object_temp_col_index], errors='coerce')

        # 1. Calculate the Temperature Difference (Delta T)
        # Delta T = T_Object - T_Ambient
        # This is added as an intermediate column for transparency.
        df['Delta_T'] = object_temps - ambient_temps
        
        # 2. Calculate the CWSI
        # CWSI = (Delta_T - LL) / (UL - LL)
        numerator = df['Delta_T'] - LOWER_LIMIT
        denominator = UPPER_LIMIT - LOWER_LIMIT
        
        # Create the new 'CWSI' column. This does not overwrite any existing data.
        df['CWSI'] = numerator / denominator

        # Round CWSI values and clip between 0 and 1
        df['CWSI'] = df['CWSI'].round(3).clip(lower=0.0, upper=1.0)
        
        # Save the resulting DataFrame back to the original file path.
        # This overwrites the file, but includes all the original data PLUS the new columns.
        df.to_csv(filepath, index=False)
        
        print(f"\nCWSI calculation complete. The file '{filepath}' has been updated.")
        print("A new 'CWSI' column and an intermediate 'Delta_T' column have been appended.")
        
        print("\n--- Updated File Preview (Original Columns + New Data) ---")
        print(df.head())
        
    except Exception as e:
        print(f"An unexpected error occurred during processing: {e}")

# --- Script Execution ---
if __name__ == "__main__":
    # Define the input file path (this file will be updated)
    input_file = 'pls.csv'
    
    # Run the calculation
    calculate_cwsi(input_file)


Starting CWSI calculation for file: 'pls.csv'
Identified Ambient Temperature Column: 'Ambient_Temp'
Identified Object Temperature Column: 'Object_Temp'

CWSI calculation complete. The file 'pls.csv' has been updated.
A new 'CWSI' column and an intermediate 'Delta_T' column have been appended.

--- Updated File Preview (Original Columns + New Data) ---
    R   G   B  Ambient_Temp  Object_Temp  Delta_T   CWSI
0  33  75  24         26.71        25.37    -1.34  0.986
1  30  79  28         26.71        25.41    -1.30  1.000
2  27  79  30         26.71        25.37    -1.34  0.986
3  30  76  32         26.71        25.41    -1.30  1.000
4  31  82  31         26.71        25.37    -1.34  0.986


In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# --- CONFIGURATION ---
INPUT_FILEPATH = 'pls.csv'
TARGET_VARIABLE = 'CWSI'
VARIABLE_TO_DROP = 'Delta_T'

# To switch models:
# Set to 'Linear' for standard OLS (Original Least Squares) Regression
# Set to 'Ridge' for Ridge Regression (L2 Regularization) to combat overfitting
MODEL_TYPE = 'Ridge'

# Ridge Hyperparameter: Controls the strength of the penalty.
# A higher alpha means stronger regularization (coefficients are more penalized).
RIDGE_ALPHA = 1.0 
# ---------------------

def run_linear_regression(filepath):
    """
    Loads data, prepares features, scales data, trains the chosen model (Linear or Ridge),
    and prints the model coefficients and performance metrics.
    """
    print(f"--- Loading data from {filepath} ---")
    try:
        df = pd.read_csv(filepath)
    except FileNotFoundError:
        print(f"Error: File not found at {filepath}. Please ensure the CSV is in the same directory.")
        return
    
    # 1. Prepare Features (X) and Target (Y)
    if TARGET_VARIABLE not in df.columns:
        print(f"Error: Target variable '{TARGET_VARIABLE}' not found.")
        return
    
    Y = df[TARGET_VARIABLE]
    X = df.drop(columns=[TARGET_VARIABLE, VARIABLE_TO_DROP], errors='ignore')
    
    print(f"Features (X) used in the model: {list(X.columns)}")
    print(f"Target (Y) variable: {TARGET_VARIABLE}")
    print("-" * 50)


    # 2. Split the data
    X_train_raw, X_test_raw, Y_train, Y_test = train_test_split(
        X, Y, test_size=0.2, random_state=42
    )

    # 3. Standardize the features (CRITICAL for Regularization)
    # Ridge regression is sensitive to the scale of features.
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_raw)
    X_test_scaled = scaler.transform(X_test_raw)

    print("Data standardized for effective regularization.")
    print("-" * 50)
    
    # 4. Initialize and Train the Model
    if MODEL_TYPE == 'Ridge':
        print(f"Training Ridge Regression Model with alpha={RIDGE_ALPHA}...")
        model = Ridge(alpha=RIDGE_ALPHA)
    else:
        print("Training Standard Linear Regression Model...")
        model = LinearRegression()
        
    model.fit(X_train_scaled, Y_train)

    joblib.dump(model, 'linear_regression_model.joblib')
    joblib.dump(scaler, 'linear_regression_scaler.joblib')
    print("\\nModel and Scaler successfully saved to 'linear_regression_model.joblib' and 'linear_regression_scaler.joblib'.")
    
    print("--- Model Training Complete ---")
    print(f"Trained on {len(X_train_scaled)} samples, testing on {len(X_test_scaled)} samples.")
    print("-" * 50)


    # 5. Evaluate the Model Performance
    Y_pred = model.predict(X_test_scaled)
    
    r_squared = r2_score(Y_test, Y_pred)
    mse = mean_squared_error(Y_test, Y_pred)
    rmse = np.sqrt(mse)
    
    print("--- Model Performance on Test Data ---")
    print(f"R-squared (Coefficient of Determination): {r_squared:.4f}")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print("-" * 50)


    # 6. Print Model Coefficients
    print("--- Model Coefficients ---")
    
    # NOTE: Coefficients here are for the SCALED features
    print(f"Intercept: {model.intercept_:.4f}")
    
    coefficients = pd.Series(model.coef_, index=X.columns)
    print("\nFeature Coefficients (on Scaled Data):")
    print(coefficients.to_string())
    
    if MODEL_TYPE == 'Ridge':
        print("\nNote: The coefficients are smaller due to the Ridge (L2) penalty.")


if __name__ == "__main__":
    run_linear_regression(INPUT_FILEPATH)


--- Loading data from pls.csv ---
Features (X) used in the model: ['R', 'G', 'B', 'Ambient_Temp', 'Object_Temp']
Target (Y) variable: CWSI
--------------------------------------------------
Data standardized for effective regularization.
--------------------------------------------------
Training Ridge Regression Model with alpha=1.0...
\nModel and Scaler successfully saved to 'linear_regression_model.joblib' and 'linear_regression_scaler.joblib'.
--- Model Training Complete ---
Trained on 320 samples, testing on 80 samples.
--------------------------------------------------
--- Model Performance on Test Data ---
R-squared (Coefficient of Determination): 0.9201
Mean Squared Error (MSE): 0.0124
Root Mean Squared Error (RMSE): 0.1113
--------------------------------------------------
--- Model Coefficients ---
Intercept: 0.6398

Feature Coefficients (on Scaled Data):
R               0.003825
G               0.125850
B              -0.168224
Ambient_Temp   -0.270247
Object_Temp     0.2443

In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np

# Load the dataset
df = pd.read_csv('pls.csv')

# 1. Define Features (X) and Target (y)
# Features: RGB, AmbientTemp, ObjectTemp
features = ['R', 'G', 'B', 'Ambient_Temp', 'Object_Temp']
X = df[features].values
# Target: CWSI
y = df['CWSI'].values

# 2. Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 3. Feature Scaling
# SVR is sensitive to feature scaling, so we apply StandardScaler.
# We fit the scaler only on the training data.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 4. Create and Train the SVR Model
# kernel='rbf' specifies the Radial Basis Function kernel.
# C and gamma are hyperparameters that often need tuning,
# but we use reasonable defaults here.
svr_rbf = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
print("Training the SVR (RBF) model...")
svr_rbf.fit(X_train_scaled, y_train)

joblib.dump(svr_rbf, 'svr_rbf_model.joblib')
joblib.dump(scaler, 'svr_rbf_scaler.joblib')
print("\\nSVR Model and Scaler successfully saved to 'svr_rbf_model.joblib' and 'svr_rbf_scaler.joblib'.")

print("Training complete.")

# 5. Make Predictions
y_pred = svr_rbf.predict(X_test_scaled)

# 6. Evaluate the Model Performance
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print("\n--- Model Evaluation ---")
print(f"R-squared (R2) Score: {r2:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

# Example of a single prediction:
# Get the features for the first test sample
sample_features = X_test[0]
sample_features_scaled = X_test_scaled[0].reshape(1, -1)
sample_true_cwsi = y_test[0]

# Predict
sample_prediction = svr_rbf.predict(sample_features_scaled)[0]

print("\n--- Example Prediction ---")
print(f"Input Features (R, G, B, AmbientTemp, ObjectTemp): {sample_features}")
print(f"True CWSI: {sample_true_cwsi:.4f}")
print(f"Predicted CWSI: {sample_prediction:.4f}")

Training the SVR (RBF) model...
\nSVR Model and Scaler successfully saved to 'svr_rbf_model.joblib' and 'svr_rbf_scaler.joblib'.
Training complete.

--- Model Evaluation ---
R-squared (R2) Score: 0.9789
Mean Absolute Error (MAE): 0.0445
Root Mean Squared Error (RMSE): 0.0572

--- Example Prediction ---
Input Features (R, G, B, AmbientTemp, ObjectTemp): [ 53.   106.    53.    27.83  26.11]
True CWSI: 0.4660
Predicted CWSI: 0.4597
