In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
from tabulate import tabulate
from sklearn.metrics import r2_score, mean_squared_error, mean_squared_log_error, mean_absolute_error, make_scorer
from sklearn.model_selection import train_test_split

data = pd.read_csv('csv/final_dataset.csv')
print(data.columns)

# Separate features and response variables
X = data.iloc[:, 2:]                                # features
Y = data['temp_measured']                           # response variable: geothermal reservoir measured temperature
print(f'Features of dataset: {X.columns}')
print(f'Number of compenents in features: {X.shape[1]}')
print(Y.head(10))

Index(['well_sample', 'temp_measured', 'pH', 'Na ', 'K', 'Ca', 'Mg', 'Cl',
       'SO4'],
      dtype='object')
Features of dataset: Index(['pH', 'Na ', 'K', 'Ca', 'Mg', 'Cl', 'SO4'], dtype='object')
Number of compenents in features: 7
0    137
1    137
2    137
3    137
4    150
5    116
6    165
7    140
8    115
9    115
Name: temp_measured, dtype: int64


In [2]:
### Scikit-learn MLP Implementation ..... probar RandomizedSearchCV u Optuna

from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler, RobustScaler

start_time_mlp = time.time()

x_train_mlp, x_test_mlp, y_train_log_mlp, y_test_log_mlp = train_test_split(X, np.log(Y), test_size=0.2, random_state=42)

scaler = RobustScaler()
x_train_mlp = scaler.fit_transform(x_train_mlp)
x_test_mlp = scaler.transform(x_test_mlp)

mlp = MLPRegressor(
    hidden_layer_sizes=(1024, 512, 256),
    activation='relu',
    solver='adam',
    alpha=0.001,  # Regularización L2
    learning_rate='adaptive',
    learning_rate_init=0.0001,
    max_iter=1000,
    early_stopping=True,
    validation_fraction=0.2,
    n_iter_no_change=20,
    random_state=42
)

mlp.fit(x_train_mlp, y_train_log_mlp)

end_time_mlp = time.time()

y_pred_log_mlp = mlp.predict(x_test_mlp)
y_pred_test_mlp = np.exp(y_pred_log_mlp)
y_test_mlp_orig = np.exp(y_test_log_mlp)

def mean_relative_squared_error(Y_true, Y_pred):
    return np.mean(((Y_true - Y_pred) / Y_true) ** 2)


r2_mlp = r2_score(y_test_mlp_orig, y_pred_test_mlp)
mse_mlp = mean_squared_error(y_test_mlp_orig, y_pred_test_mlp)
mslr_mlp = mean_squared_log_error(y_test_mlp_orig, y_pred_test_mlp)
mae_mlp = mean_absolute_error(y_test_mlp_orig, y_pred_test_mlp)
mrse_mlp = mean_relative_squared_error(y_test_mlp_orig, y_pred_test_mlp)

training_time_mlp = end_time_mlp - start_time_mlp

mlp_metrics = {
    'Eval_metrics': ['R2 Score', 'MSE', 'MAE', 'MSLE', 'MRSE', 'Training time'],
    'MLP Regressor': [r2_mlp, mse_mlp, mslr_mlp, mae_mlp, mrse_mlp, training_time_mlp]
}

df_mlp = pd.DataFrame(mlp_metrics)
df_mlp.to_csv('metrics/metrics_mlp.csv', index=False)

print(tabulate(df_mlp.round(4), headers='keys', tablefmt='pretty', showindex=False))

+---------------+---------------+
| Eval_metrics  | MLP Regressor |
+---------------+---------------+
|   R2 Score    |    0.5649     |
|      MSE      |   3282.9729   |
|      MAE      |    0.1494     |
|     MSLE      |    37.1181    |
|     MRSE      |    0.2125     |
| Training time |     29.01     |
+---------------+---------------+


In [3]:
### Neural Network implementation (Keras)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import RobustScaler  # Better for handling outliers


start_time_k = time.time()

x_train_k, x_test_k, y_train_log_k, y_test_log_k = train_test_split(X, np.log(Y), test_size=0.2, random_state=36)

scaler = RobustScaler()
x_train_k = scaler.fit_transform(x_train_k)
x_test_k = scaler.transform(x_test_k)

# Define neural networks architecture with LeakyReLU activation
model = Sequential([
    # Input layer
    Dense(512, input_dim=x_train_k.shape[1], kernel_regularizer=l2(0.01)),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.3),
    
    # Hidden layers
    Dense(256, kernel_regularizer=l2(0.01)),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.3),
    
    Dense(128, kernel_regularizer=l2(0.01)),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.3),
    
    #Dense(64, kernel_regularizer=l2(0.01)),
    #LeakyReLU(alpha=0.1),
    #BatchNormalization(),
    #Dropout(0.3),
    
    # Output layer
    Dense(1, activation='linear')
])

model.compile(optimizer=Adam(learning_rate=0.001), 
              loss='mean_squared_error', 
              metrics=['mean_absolute_error'])

early_stop = EarlyStopping(monitor='val_loss',
                           patience=20, 
                           restore_best_weights=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', 
                              actor=0.2, 
                              patience=10, 
                              min_lr=1e-4)

training = model.fit(x_train_k, y_train_log_k, epochs=2000, validation_split=0.2, batch_size=20,
                     verbose=0, callbacks=[early_stop, reduce_lr])

end_time_k = time.time()

#model.save('keras_nn_model.h5')
#print("Model saved to 'keras_nn_model.h5'.")

y_pred_test_log_k = model.predict(x_test_k)
y_pred_train_log_k = model.predict(x_train_k)

y_pred_test_k = np.exp(y_pred_test_log_k) 
y_pred_train_k = np.exp(y_pred_train_log_k)
y_train_k = np.exp(y_train_log_k)
y_test_k = np.exp(y_test_log_k)

Y_test_k = np.squeeze(y_test_k)
Y_pred_test_k = np.squeeze(y_pred_test_k)

training_time_k = end_time_k - start_time_k

def mean_relative_squared_error(y_true, y_pred_test):
    return np.mean(((y_true - y_pred_test)/y_true)**2)

r2_k = r2_score(y_test_k, y_pred_test_k)
mse_k = mean_squared_error(y_test_k, y_pred_test_k)
mae_k = mean_absolute_error(y_test_k, y_pred_test_k)
mslr_k = mean_squared_log_error(y_test_k, y_pred_test_k)
mrse_k = mean_relative_squared_error(Y_test_k, Y_pred_test_k)


eval_metrics_k = {
    'Eval_metrics': ['R2 Score', 'MSE', 'MAE', 'MSLR', 'MRSE', 'Training time'],
    'Elastic-Net Model': [r2_k, mse_k, mae_k, mslr_k, mrse_k, training_time_k]
}

df_metrics_k = pd.DataFrame(eval_metrics_k)
df_metrics_k.to_csv('metrics/metrics_nn.csv', index=False)

print(tabulate(df_metrics_k.round(4), headers='keys', tablefmt='pretty', showindex=False))

### reducir learning_rate de Adam de 0,01 a 0,001 mejor el r2, 
### de 0,001 a 0,0001 no lo mejoro y aumento mucho el tiempo.
### Agregar input layer de 1024 no mejoro nada y aumento el tiempo
### Con RobustScaler mejoro respecto de StandardScaler.
### Prueba l2 de 0.01 a 0.001, no mejora nada, queda 0.01

2025-08-25 11:27:21.826822: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-08-25 11:27:21.889732: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-08-25 11:27:21.908086: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-08-25 11:27:22.025914: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(activity_regularizer=activity_regu

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
+---------------+-------------------+
| Eval_metrics  | Elastic-Net Model |
+---------------+-------------------+
|   R2 Score    |      0.5374       |
|      MSE      |     3301.1821     |
|      MAE      |      40.4601      |
|     MSLR      |      0.1441       |
|     MRSE      |      0.2365       |
| Training time |      45.4058      |
+---------------+-------------------+


In [None]:
### Neural Network implementation (Keras) with Hyperparameter Tuning

import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
from tabulate import tabulate

# Import Keras and TensorFlow components
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_squared_log_error
import keras_tuner as kt


x_train_k, x_test_k, y_train_log_k, y_test_log_k = train_test_split(X, np.log(Y), test_size=0.2, random_state=36)

scaler = RobustScaler()
x_train_k = scaler.fit_transform(x_train_k)
x_test_k = scaler.transform(x_test_k)

# 1. Define the model-building function for KerasTuner
def build_model(hp):
    """Builds a Keras model with tunable hyperparameters."""
    model = Sequential()
    
    # Tune the number of units in the first Dense layer
    hp_units_1 = hp.Int('units_1', min_value=256, max_value=1024, step=128)
    model.add(Dense(units=hp_units_1, input_dim=x_train_k.shape[1], kernel_regularizer=l2(0.001)))
    model.add(LeakyReLU(alpha=0.1))
    model.add(BatchNormalization())
    # Tune the dropout rate
    hp_dropout_1 = hp.Float('dropout_1', min_value=0.1, max_value=0.5, step=0.1)
    model.add(Dropout(rate=hp_dropout_1))

    # Tune the number of hidden layers and their units
    for i in range(hp.Int('num_layers', 1, 3)):
        hp_units = hp.Int(f'units_{i+2}', min_value=64, max_value=512, step=64)
        model.add(Dense(units=hp_units, kernel_regularizer=l2(0.001)))
        model.add(LeakyReLU(alpha=0.1))
        model.add(BatchNormalization())
        hp_dropout = hp.Float(f'dropout_{i+2}', min_value=0.1, max_value=0.5, step=0.1)
        model.add(Dropout(rate=hp_dropout))
        
    model.add(Dense(1, activation='linear'))

    # Tune the learning rate for the optimizer
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
                  loss='huber',
                  metrics=['mean_absolute_error'])
    
    return model

# 2. Instantiate the tuner
tuner = kt.Hyperband(
    build_model,
    objective='val_mean_absolute_error',
    max_epochs=100,  # Max epochs to train a model for
    factor=3,
    directory='keras_tuner_dir',
    project_name='temperature_prediction'
)

# Define an early stopping callback to prevent overfitting during the search
stop_early = EarlyStopping(monitor='val_loss', patience=15)

# 3. Run the hyperparameter search
print("Starting hyperparameter search...")
tuner.search(x_train_k, y_train_log_k, epochs=100, validation_split=0.2, callbacks=[stop_early], verbose=1)

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. 
Optimal number of units in the first layer: {best_hps.get('units_1')}
Optimal learning rate: {best_hps.get('learning_rate')}.
""")

# 4. Build and train the final model with the best hyperparameters
start_time_k = time.time()
model = tuner.hypermodel.build(best_hps)

# Define callbacks for the final training run
final_early_stop = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True, verbose=1)
final_reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=20, min_lr=1e-6, verbose=1)
final_checkpoint = ModelCheckpoint('best_nn_model_tuned.keras', monitor='val_loss', save_best_only=True, verbose=1)

history = model.fit(
    x_train_k, 
    y_train_log_k, 
    epochs=1000,  # Train for a sufficient number of epochs
    validation_split=0.2,
    batch_size=32,
    callbacks=[final_early_stop, final_reduce_lr, final_checkpoint],
    verbose=2
)
end_time_k = time.time()

# Load the best model weights saved by the checkpoint
model.load_weights('best_nn_model_tuned.keras')
print("Best tuned model loaded.")

# 5. Evaluate the final model
y_pred_test_log_k = model.predict(x_test_k)
y_pred_test_k = np.exp(y_pred_test_log_k).ravel()
y_test_k_orig = np.exp(y_test_log_k).ravel()

training_time_k = end_time_k - start_time_k

def mean_relative_squared_error(y_true, y_pred):
    return np.mean(((y_true - y_pred) / y_true)**2)

r2_k = r2_score(y_test_k_orig, y_pred_test_k)
mse_k = mean_squared_error(y_test_k_orig, y_pred_test_k)
mae_k = mean_absolute_error(y_test_k_orig, y_pred_test_k)
mslr_k = mean_squared_log_error(y_test_k_orig, y_pred_test_k)
mrse_k = mean_relative_squared_error(y_test_k_orig, y_pred_test_k)

eval_metrics_k = {
    'Eval_metrics': ['R2 Score', 'MSE', 'MAE', 'MSLE', 'MRSE', 'Training time'],
    'Tuned Neural Network': [r2_k, mse_k, mae_k, mslr_k, mrse_k, training_time_k]
}

df_metrics_k = pd.DataFrame(eval_metrics_k)
print("\n--- Final Model Performance ---")
print(tabulate(df_metrics_k.round(4), headers='keys', tablefmt='pretty', showindex=False))

'''# Plot training history
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['mean_absolute_error'], label='Training MAE')
plt.plot(history.history['val_mean_absolute_error'], label='Validation MAE')
plt.title('Model MAE')
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.legend()

plt.tight_layout()
plt.savefig('nn_tuned_training_history.png')
plt.show()
'''

In [None]:
### PyTorch Neural Network Implementation
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import time
import matplotlib.pyplot as plt

# Set device for GPU acceleration if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Prepare data
data = pd.read_csv('csv/final_dataset.csv')
X = data.iloc[:, 2:]
Y = data['temp_measured']

x_train, x_test, y_train_log, y_test_log = train_test_split(X, np.log(Y), test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# Convert to PyTorch tensors and move to device
x_train_tensor = torch.FloatTensor(x_train_scaled).to(device)
y_train_tensor = torch.FloatTensor(y_train_log.values).reshape(-1, 1).to(device)
x_test_tensor = torch.FloatTensor(x_test_scaled).to(device)
y_test_tensor = torch.FloatTensor(y_test_log.values).reshape(-1, 1).to(device)

# Improved neural network architecture
class ImprovedTemperatureMLP(nn.Module):
    def __init__(self, input_size, dropout_rate=0.3):
        super(ImprovedTemperatureMLP, self).__init__()
        
        self.input_layer = nn.Sequential(
            nn.Linear(input_size, 1024),
            nn.BatchNorm1d(1024),
            nn.GELU(),
            nn.Dropout(dropout_rate)
        )
        
        self.hidden_layers = nn.ModuleList([
            nn.Sequential(
                nn.Linear(1024, 512),
                nn.BatchNorm1d(512),
                nn.GELU(),
                nn.Dropout(dropout_rate)
            ),
            nn.Sequential(
                nn.Linear(512, 256),
                nn.BatchNorm1d(256),
                nn.GELU(),
                nn.Dropout(dropout_rate)
            ),
            nn.Sequential(
                nn.Linear(256, 128),
                nn.BatchNorm1d(128),
                nn.GELU(),
                nn.Dropout(dropout_rate * 0.5)  # Reduced dropout for final layers
            ),
            nn.Sequential(
                nn.Linear(128, 64),
                nn.BatchNorm1d(64),
                nn.GELU(),
                nn.Dropout(dropout_rate * 0.3)
            )
        ])
        
        self.output_layer = nn.Linear(64, 1)
        
        # Initialize weights
        self.apply(self._init_weights)
    
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            nn.init.xavier_uniform_(module.weight)
            if module.bias is not None:
                nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        x = self.input_layer(x)
        
        for layer in self.hidden_layers:
            residual = x if x.size(1) == layer[0].in_features else None
            x = layer(x)
            # Add residual connection where dimensions match
            if residual is not None and x.size(1) == residual.size(1):
                x = x + residual
        
        return self.output_layer(x)

# Enhanced early stopping class
class EarlyStopping:
    def __init__(self, patience=25, min_delta=1e-6, restore_best_weights=True):
        self.patience = patience
        self.min_delta = min_delta
        self.restore_best_weights = restore_best_weights
        self.best_loss = None
        self.counter = 0
        self.best_weights = None
        
    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.save_checkpoint(model)
        elif val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
            self.save_checkpoint(model)
        else:
            self.counter += 1
            
        if self.counter >= self.patience:
            if self.restore_best_weights:
                model.load_state_dict(self.best_weights)
            return True
        return False
    
    def save_checkpoint(self, model):
        self.best_weights = model.state_dict().copy()

# Create validation split
x_train_val, x_val, y_train_val, y_val = train_test_split(
    x_train_tensor, y_train_tensor, test_size=0.2, random_state=42
)

# Initialize model
model = ImprovedTemperatureMLP(x_train_scaled.shape[1]).to(device)

# Loss function and optimizer with improved settings
criterion = nn.SmoothL1Loss()  # More robust than Huber for regression
optimizer = optim.AdamW(model.parameters(), lr=0.01, weight_decay=1e-4, eps=1e-8)

# Learning rate scheduler
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
    optimizer, T_0=50, T_mult=2, eta_min=1e-6
)

# Create DataLoaders
train_dataset = TensorDataset(x_train_val, y_train_val)
val_dataset = TensorDataset(x_val, y_val)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=0)

# Training with validation
start_time = time.time()
train_losses = []
val_losses = []
learning_rates = []
early_stopping = EarlyStopping(patience=30, min_delta=1e-6)

print("Starting training...")

for epoch in range(1000):  # Reduced max epochs with better early stopping
    # Training phase
    model.train()
    train_loss = 0
    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        train_loss += loss.item()
    
    avg_train_loss = train_loss / len(train_loader)
    train_losses.append(avg_train_loss)
    
    # Validation phase
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            val_loss += loss.item()
    
    avg_val_loss = val_loss / len(val_loader)
    val_losses.append(avg_val_loss)
    
    # Step scheduler
    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']
    learning_rates.append(current_lr)
    
    # Print progress
    if epoch % 25 == 0:
        print(f'Epoch {epoch:4d} | Train Loss: {avg_train_loss:.6f} | Val Loss: {avg_val_loss:.6f} | LR: {current_lr:.2e}')
    
    # Early stopping check
    if early_stopping(avg_val_loss, model):
        print(f'Early stopping at epoch {epoch}')
        break

end_time = time.time()
training_time_pytorch = end_time - start_time

# Final evaluation
model.eval()
with torch.no_grad():
    # Test predictions
    y_pred_log = model(x_test_tensor).cpu().numpy()
    y_pred_test = np.exp(y_pred_log).ravel()
    y_test_orig = np.exp(y_test_log).ravel()
    
    # Training predictions for comparison
    y_pred_train_log = model(x_train_tensor).cpu().numpy()
    y_pred_train = np.exp(y_pred_train_log).ravel()
    y_train_orig = np.exp(y_train_log).ravel()

# Calculate comprehensive metrics
def mean_relative_squared_error(y_true, y_pred):
    return np.mean(((y_true - y_pred) / y_true) ** 2)

# Test metrics
r2_pytorch = r2_score(y_test_orig, y_pred_test)
mse_pytorch = mean_squared_error(y_test_orig, y_pred_test)
mae_pytorch = mean_absolute_error(y_test_orig, y_pred_test)
msle_pytorch = mean_squared_log_error(y_test_orig, y_pred_test)
mrse_pytorch = mean_relative_squared_error(y_test_orig, y_pred_test)

# Training metrics (to check for overfitting)
r2_train = r2_score(y_train_orig, y_pred_train)

# Store results
pytorch_metrics = {
    'Eval_metrics': ['R2 Score', 'MSE', 'MAE', 'MSLE', 'MRSE', 'Training time', 'R2 Train'],
    'PyTorch Model': [r2_pytorch, mse_pytorch, mae_pytorch, msle_pytorch, mrse_pytorch, training_time_pytorch, r2_train]
}

pytorch_df_metrics = pd.DataFrame(pytorch_metrics)
pytorch_df_metrics.to_csv('pytorch_improved_metrics.csv', index=False)

print("\n" + "="*60)
print("IMPROVED PYTORCH NEURAL NETWORK RESULTS")
print("="*60)
print(tabulate(pytorch_df_metrics.round(4), headers='keys', tablefmt='pretty', showindex=False))

# Plot training curves
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

# Loss curves
ax1.plot(train_losses, label='Training Loss', color='blue', alpha=0.7)
ax1.plot(val_losses, label='Validation Loss', color='red', alpha=0.7)
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training and Validation Loss')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Learning rate
ax2.plot(learning_rates, color='green')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Learning Rate')
ax2.set_title('Learning Rate Schedule')
ax2.set_yscale('log')
ax2.grid(True, alpha=0.3)

# Predictions vs Actual (Test set)
ax3.scatter(y_test_orig, y_pred_test, alpha=0.6, color='blue')
ax3.plot([y_test_orig.min(), y_test_orig.max()], [y_test_orig.min(), y_test_orig.max()], 'r--', lw=2)
ax3.set_xlabel('Actual Temperature')
ax3.set_ylabel('Predicted Temperature')
ax3.set_title(f'Test Set: Actual vs Predicted (R² = {r2_pytorch:.4f})')
ax3.grid(True, alpha=0.3)

# Residuals plot
residuals = y_test_orig - y_pred_test
ax4.scatter(y_pred_test, residuals, alpha=0.6, color='purple')
ax4.axhline(y=0, color='red', linestyle='--', linewidth=2)
ax4.set_xlabel('Predicted Temperature')
ax4.set_ylabel('Residuals')
ax4.set_title('Residuals Plot')
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('pytorch_improved_training_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

# Save model
torch.save({
    'model_state_dict': model.state_dict(),
    'scaler': scaler,
    'model_config': {
        'input_size': x_train_scaled.shape[1],
        'dropout_rate': 0.3
    }
}, 'pytorch_improved_model.pth')

print(f"\nModel saved as 'pytorch_improved_model.pth'")
print(f"Training completed in {training_time_pytorch:.2f} seconds")