Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


Multiple Linear Regression

In [None]:
df = pd.read_csv("drying_time_dataset.csv")  

# Define features (X) and target (y)
X = df.drop(columns=["drying_time"], errors="ignore")
y = df["drying_time"]

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 3. Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 4. Train the multiple linear regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# 5. Make predictions
y_pred = model.predict(X_test_scaled)

# 6. Calculate metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
accuracy = 100 - mape

# 7. Save metrics 
results = pd.DataFrame({
    "Version": ["Version 1: Multiple Linear Regression"],
    "MSE": [mse],
    "RMSE": [rmse],
    "MAE": [mae],
    "R2": [r2],
    "MAPE (%)": [mape],
    "Accuracy (%)": [accuracy]
})
results.to_csv("drying_time_model_comparison.csv", mode="a", index=False, header=False)

# 8. Save model coefficients to 'model_v1_coefficients.csv'
coefficients = pd.DataFrame({
    "Feature": X.columns,
    "Coefficient": model.coef_
})
coefficients.to_csv("model_v1_coefficients.csv", index=False)

print("Version 1: Multiple Linear Regression completed. Metrics and coefficients saved.")
print(f"MSE: {mse:.5f}, RMSE: {rmse:.5f}, MAE: {mae:.5f}, "
      f"R2: {r2:.5f}, MAPE: {mape:.5f}%, Accuracy: {accuracy:.5f}%")

Version 1: Multiple Linear Regression completed. Metrics and coefficients saved.
MSE: 0.91492, RMSE: 0.95652, MAE: 0.75565, R2: 0.19822, MAPE: 14.57828%, Accuracy: 85.42172%


VERSION 2: MLR-Skewness Correction (log transform)


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 1. Load the dataset
df = pd.read_csv("drying_time_dataset.csv")

# 2. Identify and transform skewed features
#    Compute skew and apply log1p for features with abs(skew) > 0.75 (example threshold).
threshold = 0.75
skew_vals = df.drop(columns=["drying_time"], errors="ignore").skew().sort_values(ascending=False)

# Find columns to transform
skewed_features = skew_vals[abs(skew_vals) > threshold].index.tolist()
print("Skewed Features (abs(skew) > 0.75):", skewed_features)

# Apply log(1+x) transform for skewed features (only if the data is non-negative)
for col in skewed_features:
    # Ensure no negative values before log transform.
    if (df[col] < 0).any():
        print(f"Warning: Column '{col}' has negative values; skipping log transform.")
    else:
        df[col] = np.log1p(df[col])
        print(f"Applied log1p transform to '{col}'.")

# 3. Split features and target
X = df.drop(columns=["drying_time"], errors="ignore")
y = df["drying_time"]

# 4. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 5. Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 6. Train Multiple Linear Regression Model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# 7. Make Predictions
y_pred = model.predict(X_test_scaled)

# 8. Calculate Metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
accuracy = 100 - mape

# 9. Save Metrics 
results = pd.DataFrame({
    "Version": ["Version 2: MLR- Skewness Correction (Log Transform)"],
    "MSE": [mse],
    "RMSE": [rmse],
    "MAE": [mae],
    "R2": [r2],
    "MAPE (%)": [mape],
    "Accuracy (%)": [accuracy]
})
results.to_csv("drying_time_model_comparison.csv", mode="a", index=False, header=False)

# 10. Save Coefficients 
coefficients = pd.DataFrame({
    "Feature": X.columns,
    "Coefficient": model.coef_
})
coefficients.to_csv("model_v2_coefficients.csv", index=False)

print("Version 2 (MLR-Skewness Correction) completed. Metrics and coefficients saved.")
print("Skewed features transformed:", skewed_features)
print(f"MSE: {mse:.5f}, RMSE: {rmse:.5f}, MAE: {mae:.5f}, "
      f"R2: {r2:.5f}, MAPE: {mape:.5f}%, Accuracy: {accuracy:.5f}%")

Skewed Features (abs(skew) > 0.75): ['mc_initial', 'temperature', 'humidity']
Applied log1p transform to 'mc_initial'.
Applied log1p transform to 'temperature'.
Applied log1p transform to 'humidity'.
Version 2 (MLR-Skewness Correction) completed. Metrics and coefficients saved.
Skewed features transformed: ['mc_initial', 'temperature', 'humidity']
MSE: 0.92390, RMSE: 0.96120, MAE: 0.76423, R2: 0.19036, MAPE: 14.74842%, Accuracy: 85.25158%


Version 3: Random Forest Regression

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

# 1. Load the dataset
df = pd.read_csv("drying_time_dataset.csv")

# 2. Define features (X) and target (y)
X = df.drop(columns=["drying_time"], errors="ignore")
y = df["drying_time"]

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 4. Scale features 
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Train Random Forest Regressor 
model = RandomForestRegressor(
    n_estimators=100,
    random_state=42
)
model.fit(X_train_scaled, y_train)

# 6. Make Predictions
y_pred = model.predict(X_test_scaled)

# 7. Calculate Metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
accuracy = 100 - mape

# 8. Save Metrics 
results = pd.DataFrame({
    "Version": ["Version 3: Random Forest Regression"],
    "MSE": [mse],
    "RMSE": [rmse],
    "MAE": [mae],
    "R2": [r2],
    "MAPE (%)": [mape],
    "Accuracy (%)": [accuracy]
})
results.to_csv("drying_time_model_comparison.csv", mode="a", index=False, header=False)

# 9. Save Feature Importances
importances = model.feature_importances_
importances_df = pd.DataFrame({
    "Feature": X.columns,
    "Importance": importances
}).sort_values("Importance", ascending=False)
importances_df.to_csv("model_v3_feature_importances.csv", index=False)

joblib.dump(model, 'drying_time_rf_model.joblib')
joblib.dump(scaler, 'drying_time_rf_scaler.joblib')

print("Version 3 (Random Forest Regression) completed. Metrics and importances saved.")
print(f"MSE: {mse:.5f}, RMSE: {rmse:.5f}, MAE: {mae:.5f}, "
      f"R2: {r2:.5f}, MAPE: {mape:.5f}%, Accuracy: {accuracy:.5f}%")




Version 3 (Random Forest Regression) completed. Metrics and importances saved.
MSE: 0.04052, RMSE: 0.20129, MAE: 0.09348, R2: 0.96449, MAPE: 1.90728%, Accuracy: 98.09272%


Version 4: Gradient Boosting Regression

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 1. Load the dataset
df = pd.read_csv("drying_time_dataset.csv")

# 2. Define features (X) and target (y)
X = df.drop(columns=["drying_time"], errors="ignore")
y = df["drying_time"]

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 4. Scale features (for consistency with previous versions)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Train Gradient Boosting Regressor
model = GradientBoostingRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)
model.fit(X_train_scaled, y_train)

# 6. Make Predictions
y_pred = model.predict(X_test_scaled)

# 7. Calculate Metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
accuracy = 100 - mape

# 8. Save Metrics 
results = pd.DataFrame({
    "Version": ["Version 4: Gradient Boosting Regression"],
    "MSE": [mse],
    "RMSE": [rmse],
    "MAE": [mae],
    "R2": [r2],
    "MAPE (%)": [mape],
    "Accuracy (%)": [accuracy]
})
results.to_csv("drying_time_model_comparison.csv", mode="a", index=False, header=False)

# 9.Feature Importances
importances = model.feature_importances_
importances_df = pd.DataFrame({
    "Feature": X.columns,
    "Importance": importances
}).sort_values("Importance", ascending=False)
importances_df.to_csv("model_v4_feature_importances.csv", index=False)

print("Version 4 (Gradient Boosting Regression) completed. Metrics and importances saved.")
print(f"MSE: {mse:.5f}, RMSE: {rmse:.5f}, MAE: {mae:.5f}, "
      f"R2: {r2:.5f}, MAPE: {mape:.5f}%, Accuracy: {accuracy:.5f}%")


Version 4 (Gradient Boosting Regression) completed. Metrics and importances saved.
MSE: 0.04873, RMSE: 0.22075, MAE: 0.14996, R2: 0.95729, MAPE: 2.81998%, Accuracy: 97.18002%


Version 5: XGBoost Regression

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from xgboost import XGBRegressor

# 1. Load the dataset
df = pd.read_csv("drying_time_dataset.csv")

# 2. Define features (X) and target (y)
X = df.drop(columns=["drying_time"], errors="ignore")
y = df["drying_time"]

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 4. Scale features 
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Train XGBoost Regressor
model = XGBRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)
model.fit(X_train_scaled, y_train)

# 6. Make Predictions
y_pred = model.predict(X_test_scaled)

# 7. Calculate Metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
accuracy = 100 - mape

# 8. Save Metrics 
results = pd.DataFrame({
    "Version": ["Version 5: XGBoost Regression"],
    "MSE": [mse],
    "RMSE": [rmse],
    "MAE": [mae],
    "R2": [r2],
    "MAPE (%)": [mape],
    "Accuracy (%)": [accuracy]
})
results.to_csv("drying_time_model_comparison.csv", mode="a", index=False, header=False)

# 9. Feature Importances
importances = model.feature_importances_
importances_df = pd.DataFrame({
    "Feature": X.columns,
    "Importance": importances
}).sort_values("Importance", ascending=False)
importances_df.to_csv("model_v5_feature_importances.csv", index=False)

print("Version 5 (XGBoost Regression) completed. Metrics and importances saved.")
print(f"MSE: {mse:.5f}, RMSE: {rmse:.5f}, MAE: {mae:.5f}, "
      f"R2: {r2:.5f}, MAPE: {mape:.5f}%, Accuracy: {accuracy:.5f}%")


Version 5 (XGBoost Regression) completed. Metrics and importances saved.
MSE: 0.06150, RMSE: 0.24799, MAE: 0.17329, R2: 0.94611, MAPE: 3.21697%, Accuracy: 96.78303%


Version 6: Multilayer Perceptron Regression

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 1. Load the dataset
df = pd.read_csv("drying_time_dataset.csv")

# 2. Define features (X) and target (y)
X = df.drop(columns=["drying_time"], errors="ignore")
y = df["drying_time"]

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 4. Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Configure and train MLP Regressor 
model = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    alpha=0.0001,
    learning_rate='constant',
    max_iter=1000,
    random_state=42
)
model.fit(X_train_scaled, y_train)

# 6. Make predictions
y_pred = model.predict(X_test_scaled)

# 7. Calculate metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
accuracy = 100 - mape

# 8. Save Metrics 
results = pd.DataFrame({
    "Version": ["Version 6: Multilayer Perceptron Regression"],
    "MSE": [mse],
    "RMSE": [rmse],
    "MAE": [mae],
    "R2": [r2],
    "MAPE (%)": [mape],
    "Accuracy (%)": [accuracy]
})
results.to_csv("drying_time_model_comparison.csv", mode="a", index=False, header=False)

# 9. Display / Print
print("Version 6 (MLP Regression) completed. Metrics saved.")
print(f"MSE: {mse:.5f}, RMSE: {rmse:.5f}, MAE: {mae:.5f}, "
      f"R2: {r2:.5f}, MAPE: {mape:.5f}%, Accuracy: {accuracy:.5f}%")


Version 6 (MLP Regression) completed. Metrics saved.
MSE: 0.32014, RMSE: 0.56581, MAE: 0.34729, R2: 0.71945, MAPE: 6.18545%, Accuracy: 93.81455%




Version 7: MLP Regression (Modified)

In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 1. Load the dataset
df = pd.read_csv("drying_time_dataset.csv")

# 2. Define features (X) and target (y)
X = df.drop(columns=["drying_time"], errors="ignore")
y = df["drying_time"]

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 4. Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Configure and train MLP Regressor
#    - deeper hidden layers (200, 100, 50)
#    - early_stopping=True to stop when validation stops improving
#    - alpha=0.001 for stronger regularization
#    - learning_rate_init=0.001 for finer updates
#    - max_iter=2000 for enough training epochs
model = MLPRegressor(
    hidden_layer_sizes=(200, 100, 50),
    activation='relu',
    solver='adam',
    alpha=0.001,
    learning_rate='constant',
    learning_rate_init=0.001,
    max_iter=2000,
    early_stopping=True,      # splits off a fraction of training data for validation
    validation_fraction=0.1,  # 10% of the training set is used as validation
    random_state=42
)
model.fit(X_train_scaled, y_train)

# 6. Make predictions
y_pred = model.predict(X_test_scaled)

# 7. Calculate metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
accuracy = 100 - mape

# 8. Save Metrics 
results = pd.DataFrame({
    "Version": ["Version 7: MLP (Modified)"],
    "MSE": [mse],
    "RMSE": [rmse],
    "MAE": [mae],
    "R2": [r2],
    "MAPE (%)": [mape],
    "Accuracy (%)": [accuracy]
})
results.to_csv("drying_time_model_comparison.csv", mode="a", index=False, header=False)

# 9. Display / Print
print("Version 7 (MLP Modified) completed. Metrics saved.")
print(f"MSE: {mse:.5f}, RMSE: {rmse:.5f}, MAE: {mae:.5f}, "
      f"R2: {r2:.5f}, MAPE: {mape:.5f}%, Accuracy: {accuracy:.5f}%")


Version 7 (MLP Modified) completed. Metrics saved.
MSE: 0.43004, RMSE: 0.65578, MAE: 0.43425, R2: 0.62314, MAPE: 7.85214%, Accuracy: 92.14786%


Version 8: MLP (Adaptive LR + Deeper)

In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 1. Load the dataset
df = pd.read_csv("drying_time_dataset.csv")

# 2. Define features (X) and target (y)
X = df.drop(columns=["drying_time"], errors="ignore")
y = df["drying_time"]

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 4. Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Configure and train MLP Regressor
#    - Hidden layers: (300, 200, 100, 50)
#    - learning_rate='adaptive'
#    - smaller learning_rate_init=0.0005
#    - alpha=0.001 still
#    - max_iter=3000
#    - early_stopping=True
model = MLPRegressor(
    hidden_layer_sizes=(300, 200, 100, 50),
    activation='relu',
    solver='adam',
    alpha=0.001,
    learning_rate='adaptive',
    learning_rate_init=0.0005,
    max_iter=3000,
    early_stopping=True,
    validation_fraction=0.1,
    random_state=42
)
model.fit(X_train_scaled, y_train)

# 6. Make predictions
y_pred = model.predict(X_test_scaled)

# 7. Calculate metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
accuracy = 100 - mape

# 8. Save Metrics 
results = pd.DataFrame({
    "Version": ["Version 8: MLP (Adaptive LR + Deeper)"],
    "MSE": [mse],
    "RMSE": [rmse],
    "MAE": [mae],
    "R2": [r2],
    "MAPE (%)": [mape],
    "Accuracy (%)": [accuracy]
})
results.to_csv("drying_time_model_comparison.csv", mode="a", index=False, header=False)

# 9. Print out results
print("Version 8 (MLP - Adaptive LR + Deeper) completed. Metrics saved.")
print(f"MSE: {mse:.5f}, RMSE: {rmse:.5f}, MAE: {mae:.5f}, "
      f"R2: {r2:.5f}, MAPE: {mape:.5f}%, Accuracy: {accuracy:.5f}%")


Version 8 (MLP - Adaptive LR + Deeper) completed. Metrics saved.
MSE: 0.34348, RMSE: 0.58607, MAE: 0.39578, R2: 0.69900, MAPE: 7.01006%, Accuracy: 92.98994%


Version 9: MLP (Increased Alpha + Wider Layers)

In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 1. Load the dataset
df = pd.read_csv("drying_time_dataset.csv")

# 2. Define features (X) and target (y)
X = df.drop(columns=["drying_time"], errors="ignore")
y = df["drying_time"]

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 4. Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Configure and train MLP Regressor
#    - Increased alpha (0.01) for stronger regularization
#    - Wider hidden layers (512, 256, 128, 64)
#    - learning_rate='adaptive'
#    - max_iter=3000 for full convergence
#    - early_stopping=True
model = MLPRegressor(
    hidden_layer_sizes=(512, 256, 128, 64),
    activation='relu',
    solver='adam',
    alpha=0.01,  # Increased alpha (stronger regularization)
    learning_rate='adaptive',
    learning_rate_init=0.0005,
    max_iter=3000,
    early_stopping=True,
    validation_fraction=0.1,
    random_state=42
)
model.fit(X_train_scaled, y_train)

# 6. Make predictions
y_pred = model.predict(X_test_scaled)

# 7. Calculate metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
accuracy = 100 - mape

# 8. Save Metrics (append) to 'removed_model_comparison.csv'
results = pd.DataFrame({
    "Version": ["Version 9: MLP (Increased Alpha + Wider Layers)"],
    "MSE": [mse],
    "RMSE": [rmse],
    "MAE": [mae],
    "R2": [r2],
    "MAPE (%)": [mape],
    "Accuracy (%)": [accuracy]
})
results.to_csv("drying_time_model_comparison.csv", mode="a", index=False, header=False)

# 9. Print results
print("Version 9 (MLP - Increased Alpha + Wider Layers) completed. Metrics saved.")
print(f"MSE: {mse:.5f}, RMSE: {rmse:.5f}, MAE: {mae:.5f}, "
      f"R2: {r2:.5f}, MAPE: {mape:.5f}%, Accuracy: {accuracy:.5f}%")


Version 9 (MLP - Increased Alpha + Wider Layers) completed. Metrics saved.
MSE: 0.36348, RMSE: 0.60290, MAE: 0.39569, R2: 0.68147, MAPE: 7.07143%, Accuracy: 92.92857%


Version 10: MLP (Higher Alpha + Dropout Simulation)

In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 1. Load the dataset
df = pd.read_csv("drying_time_dataset.csv")

# 2. Define features (X) and target (y)
X = df.drop(columns=["drying_time"], errors="ignore")
y = df["drying_time"]

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 4. Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Configure and train MLP Regressor
#    - Very high alpha (0.05) to simulate dropout-like behavior
#    - Medium-depth architecture (256, 128, 64, 32)
#    - learning_rate='adaptive'
#    - max_iter=3000 for deep learning stability
#    - early_stopping=True
model = MLPRegressor(
    hidden_layer_sizes=(256, 128, 64, 32),
    activation='relu',
    solver='adam',
    alpha=0.05,  # Strong regularization to simulate dropout
    learning_rate='adaptive',
    learning_rate_init=0.0005,
    max_iter=3000,
    early_stopping=True,
    validation_fraction=0.1,
    random_state=42
)
model.fit(X_train_scaled, y_train)

# 6. Make predictions
y_pred = model.predict(X_test_scaled)

# 7. Calculate metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
accuracy = 100 - mape

# 8. Save Metrics 
results = pd.DataFrame({
    "Version": ["Version 10: MLP (Higher Alpha + Dropout Simulation)"],
    "MSE": [mse],
    "RMSE": [rmse],
    "MAE": [mae],
    "R2": [r2],
    "MAPE (%)": [mape],
    "Accuracy (%)": [accuracy]
})
results.to_csv("drying_time_model_comparison.csv", mode="a", index=False, header=False)

# 9. Print results
print("Version 10 (MLP - Higher Alpha + Dropout Simulation) completed. Metrics saved.")
print(f"MSE: {mse:.5f}, RMSE: {rmse:.5f}, MAE: {mae:.5f}, "
      f"R2: {r2:.5f}, MAPE: {mape:.5f}%, Accuracy: {accuracy:.5f}%")


Version 10 (MLP - Higher Alpha + Dropout Simulation) completed. Metrics saved.
MSE: 0.39039, RMSE: 0.62481, MAE: 0.43238, R2: 0.65789, MAPE: 7.79063%, Accuracy: 92.20937%


Version 11: MLP (Batch Normalization + Increased Iterations)

In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 1. Load the dataset
df = pd.read_csv("drying_time_dataset.csv")

# 2. Define features (X) and target (y)
X = df.drop(columns=["drying_time"], errors="ignore")
y = df["drying_time"]

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 4. Scale features 
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Configure and train MLP Regressor
#    - Deeper architecture (512, 256, 128, 64, 32)
#    - Lower alpha (0.01) for better balance
#    - Increased max_iter=5000 for stability
#    - learning_rate='adaptive'
#    - early_stopping=True
model = MLPRegressor(
    hidden_layer_sizes=(512, 256, 128, 64, 32),
    activation='relu',
    solver='adam',
    alpha=0.01,  # Regularization strength
    learning_rate='adaptive',
    learning_rate_init=0.0005,
    max_iter=5000,  # Increased iterations
    early_stopping=True,
    validation_fraction=0.1,
    random_state=42
)
model.fit(X_train_scaled, y_train)

# 6. Make predictions
y_pred = model.predict(X_test_scaled)

# 7. Calculate metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
accuracy = 100 - mape

# 8. Save Metrics 
results = pd.DataFrame({
    "Version": ["Version 11: MLP (Batch Normalization + Increased Iterations)"],
    "MSE": [mse],
    "RMSE": [rmse],
    "MAE": [mae],
    "R2": [r2],
    "MAPE (%)": [mape],
    "Accuracy (%)": [accuracy]
})
results.to_csv("drying_time_model_comparison.csv", mode="a", index=False, header=False)

# 9. Print results
print("Version 11 (MLP - Batch Normalization + Increased Iterations) completed. Metrics saved.")
print(f"MSE: {mse:.5f}, RMSE: {rmse:.5f}, MAE: {mae:.5f}, "
      f"R2: {r2:.5f}, MAPE: {mape:.5f}%, Accuracy: {accuracy:.5f}%")


Version 11 (MLP - Batch Normalization + Increased Iterations) completed. Metrics saved.
MSE: 0.38872, RMSE: 0.62347, MAE: 0.40831, R2: 0.65935, MAPE: 7.18643%, Accuracy: 92.81357%


VERSION 12: MLP (Smaller Learning Rate + More Layers + Momentum)

In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 1. Load the dataset
df = pd.read_csv("drying_time_dataset.csv")

# 2. Define features (X) and target (y)
X = df.drop(columns=["drying_time"], errors="ignore")
y = df["drying_time"]

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 4. Scale features (Batch Normalization requires standardized input)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Configure and train MLP Regressor
#    - More layers (1024, 512, 256, 128, 64, 32)
#    - Smaller learning rate (0.0001) for finer weight updates
#    - Momentum added via 'sgd' solver
#    - Increased max_iter=7000 for better convergence
#    - learning_rate='adaptive'
#    - early_stopping=True
model = MLPRegressor(
    hidden_layer_sizes=(1024, 512, 256, 128, 64, 32),
    activation='relu',
    solver='sgd',  # Switch to stochastic gradient descent for momentum
    alpha=0.01,  # Regularization strength
    learning_rate='adaptive',
    learning_rate_init=0.0001,  # Even smaller learning rate for finer updates
    max_iter=7000,  # Increased iterations for deep training
    early_stopping=True,
    validation_fraction=0.1,
    momentum=0.9,  # Momentum term to accelerate learning
    random_state=42
)
model.fit(X_train_scaled, y_train)

# 6. Make predictions
y_pred = model.predict(X_test_scaled)

# 7. Calculate metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)  # Ensure NumPy is used for sqrt
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
accuracy = 100 - mape

# 8. Save Metrics 
results = pd.DataFrame({
    "Version": ["Version 12: MLP (Smaller LR + More Layers + Momentum)"],
    "MSE": [mse],
    "RMSE": [rmse],
    "MAE": [mae],
    "R2": [r2],
    "MAPE (%)": [mape],
    "Accuracy (%)": [accuracy]
})
results.to_csv("drying_time_model_comparison.csv", mode="a", index=False, header=False)

# 9. Print results
print("Version 12 (MLP - Smaller LR + More Layers + Momentum) completed. Metrics saved.")
print(f"MSE: {mse:.5f}, RMSE: {rmse:.5f}, MAE: {mae:.5f}, "
      f"R2: {r2:.5f}, MAPE: {mape:.5f}%, Accuracy: {accuracy:.5f}%")


Version 12 (MLP - Smaller LR + More Layers + Momentum) completed. Metrics saved.
MSE: 0.74768, RMSE: 0.86468, MAE: 0.63512, R2: 0.34478, MAPE: 12.26130%, Accuracy: 87.73870%


Version 13: MLP (LBFGS solver)

In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 1. Load dataset
df = pd.read_csv("drying_time_dataset.csv")
X = df.drop(columns=["drying_time"], errors="ignore")
y = df["drying_time"]

# 2. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 3. Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 4. Configure MLP Regressor (LBFGS solver)
#    - LBFGS is a quasi-Newton method that can yield good results on modest data sizes
#    - alpha=0.002 for moderate regularization
#    - hidden_layer_sizes=(300,200,100,50) for deeper capacity
model = MLPRegressor(
    hidden_layer_sizes=(300, 200, 100, 50),
    activation='relu',
    solver='lbfgs',      # LBFGS solver
    alpha=0.002,
    max_iter=2000,       # LBFGS can converge faster, but give it enough iterations
    random_state=42
)

# 5. Fit the model
model.fit(X_train_scaled, y_train)

# 6. Predictions
y_pred = model.predict(X_test_scaled)

# 7. Evaluate
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
accuracy = 100 - mape

# 8. Save Results
results = pd.DataFrame({
    "Model": ["Version 13: MLP (LBFGS solver)"],
    "MSE": [mse],
    "RMSE": [rmse],
    "MAE": [mae],
    "R2": [r2],
    "MAPE (%)": [mape],
    "Accuracy (%)": [accuracy]
})
results.to_csv("drying_time_model_comparison.csv", mode="a", index=False, header=False)

print("Version 13 MLP (LBFGS solver) completed. Metrics saved.")
print(f"MSE: {mse:.5f}, RMSE: {rmse:.5f}, MAE: {mae:.5f}, R2: {r2:.5f}, MAPE: {mape:.5f}%, Accuracy: {accuracy:.5f}%")


Version 13 MLP (LBFGS solver) completed. Metrics saved.
MSE: 0.09957, RMSE: 0.31554, MAE: 0.14322, R2: 0.91275, MAPE: 2.39092%, Accuracy: 97.60908%
