In [1]:
# Clear all variables
%reset -f
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import math
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from scipy.stats import truncnorm
from torch.distributions import Normal
import nnwosd as wosd
import importlib
importlib.reload(wosd)

#load function for fitting SFM by mle
from sfm_mle import estimate


n_samples=200

# True weights and bias
b_true = 2
w_true = torch.tensor([.2])

# Generate output with Gaussian noise
noise_std_v = .9
noise_std_u = .2

noise_v = torch.from_numpy(np.random.normal(0, noise_std_v, size=(n_samples, 1)).astype(np.float32))
noise_u = torch.from_numpy(abs(np.random.normal(0, noise_std_u, size=(n_samples, 1)).astype(np.float32)))
#case 1: linear

X = torch.from_numpy((np.random.uniform(0.01, 3, size=(n_samples, 1)).astype(np.float32)))
log_X = torch.log(X)

# y =  (X ** w_true) * b_true * np.exp( noise_v - noise_u)  # y = 5x + 10 + N(0, 2) - HN(0, 0.5)
log_y = w_true * log_X + np.log(b_true) + (noise_v - noise_u)


#fit linear SFM

# Import Data
y = log_y.numpy().flatten()  # Output
x1 = log_X.numpy().flatten()


true_vals = [b_true, w_true, noise_std_u, noise_std_v]
coefs, sterr, logMLE = estimate(y, x1, b_true, w_true, noise_std_u, noise_std_v)


# fit nn sfm with 4 different activation functions


# 2. Standardize input and output data
scaler_X = StandardScaler()
scaler_y = StandardScaler()

# Fit scalers on the data and transform
X_standardized = torch.tensor(scaler_X.fit_transform(log_X), dtype=torch.float32)
y_standardized = torch.tensor(scaler_y.fit_transform(log_y), dtype=torch.float32)

# Instantiate the model
input_size = 1  # Number of input features
hidden_sizes = [32,8]  # Concave-Relu Hidden layer sizes
output_size = 1 # 

# Variables to track the best loss and model parameters
best_loss = float('inf')  # Initialize best loss to infinity
best_model_state = None    # To save the model state

# List to store loss values for plotting
loss_values = []

# Train the deep learning model by minimizing the NLL loss
epochs = 1000

# activation_fun = nn.ReLU()
# activation_fun = nn.ELU()
# activation_fun = wosd.FlippedLeakRELU(alpha=0.8)
# activation_fun = wosd.FlippedELU(alpha=0.8)

activations = {
    "ReLU": nn.ReLU(),
    "ELU": nn.ELU(),
    "FlippedReLU": wosd.FlippedLeakRELU(alpha=0.8),
    "FlippedELU": wosd.FlippedELU(alpha=0.8)
}

# Which activations need weight clamping
clamp_activations = ["FlippedReLU", "FlippedELU"]

fitted_models = {}

for name, activation_fun in activations.items():
    print(f"\nTraining model with {name} activation...")

    # Instantiate model and loss
    model = wosd.MLP(input_size, hidden_sizes, output_size, activation_func=activation_fun)
    nll_loss = wosd.GaussianNLLLoss(sigma_v=noise_std_v, sigma_u=noise_std_u)
    optimizer = optim.Adam(list(model.parameters()) + [nll_loss.log_std_v, nll_loss.log_std_u], lr=0.01)

    best_loss = float('inf')
    best_model_state = None
    loss_values = []

    for epoch in range(epochs):
        model.train()
        y_pred = model(X_standardized)
        loss = nll_loss(y_pred, y_standardized)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Apply weight constraints for self-defined activations
        if name in clamp_activations:
            with torch.no_grad():
                for layer in model.layers:
                    layer.weight.data.clamp_(min=0)
                model.output.weight.clamp_(min=0)

        loss_values.append(loss.item())

        if loss.item() < best_loss:
            best_loss = loss.item()
            best_model_state = model.state_dict()

    # Load best model
    model.load_state_dict(best_model_state)

    # Save everything in dictionary
    fitted_models[name] = {
        "model": model,
        "loss_history": loss_values,
        "std_v_est": torch.exp(nll_loss.log_std_v).item() * scaler_y.scale_,
        "std_u_est": torch.exp(nll_loss.log_std_u).item() * scaler_y.scale_
    }

print("Training completed for all activations.")

# Get all results


Training model with ReLU activation...

Training model with ELU activation...

Training model with FlippedReLU activation...

Training model with FlippedELU activation...
Training completed for all activations.


In [34]:
# e_sfm = log_y - (coefs[0] + coefs[1] * log_X)

# e_sfm_np = e_sfm.detach().numpy()   # detach from graph, convert to NumPy
# E_sfm_correction = np.mean(np.exp(e_sfm_np))

# y_sfm_mean = ((X.numpy() ** coefs[1]) * np.exp(coefs[0])) * E_sfm_correction

In [3]:
res_tf = log_y - (b_true + w_true * log_X)
e_tf_np = res_tf.detach().numpy()
E_tf_correction=np.mean(np.exp(e_tf_np))

y_tf = ((X**w_true)*b_true).numpy()*E_tf_correction

e_sfm = log_y - (coefs[0] + coefs[1] * log_X)

e_sfm_np = e_sfm.detach().numpy()   # detach from graph, convert to NumPy
E_sfm_correction = np.mean(np.exp(e_sfm_np))

y_sfm_mean = ((X.numpy() ** coefs[1]) * np.exp(coefs[0])) * E_sfm_correction

RMSE_sfm = (np.mean(((y_sfm_mean  -y_tf)/y_tf)**2))
BIAS_sfm = np.mean(np.abs(y_sfm_mean -y_tf/y_tf))

sigma_v_sfm = np.sqrt(coefs[3])
sigma_u_sfm = np.sqrt(coefs[2])

bias_v_sfm = np.abs(sigma_v_sfm-noise_std_v)
bias_u_sfm = np.abs(sigma_u_sfm-noise_std_u)

#TE
vectorized_TE_fun = np.vectorize(wosd.TE_fun)

TE_true = vectorized_TE_fun(residuals=((np.log(y_tf)-(w_true * log_X + np.log(b_true)).numpy())),sig_v=noise_std_v,sig_u=noise_std_u)
TE_sfm = vectorized_TE_fun(residuals=(np.log(y_sfm_mean)-log_y.numpy()),sig_v=sigma_v_sfm,sig_u=sigma_u_sfm)

Bias_TE_sfm = np.mean(np.abs(TE_sfm-TE_true))

# RMSE for NN models
RMSE_nn = {}
BIAS_nn = {}
bias_v_nn = {}
bias_u_nn = {}
Bias_TE_nn = {}

for name, info in fitted_models.items():
    model = info['model']
    with torch.no_grad():
        y_pred_std = model(X_standardized)
        # Add inefficiency correction if needed
        std_u_est = info['std_u_est']
        y_pred_std = y_pred_std + np.sqrt(2/np.pi)*std_u_est
        # Inverse transform to original scale
        y_pred_original = scaler_y.inverse_transform(y_pred_std.numpy())
        
        y_original = scaler_y.inverse_transform(y_standardized.numpy())
        residuals_nn= y_original - y_pred_original
        E_nn_correction = np.mean(np.exp(residuals_nn))
        
    
    # Calculate RMSE relative to true values
    rmse = np.sqrt(np.mean(((np.exp(y_pred_original)*E_nn_correction - y_tf)/y_tf)**2))
    RMSE_nn[name] = rmse
    # Calculate absolute relative bias
    bias = np.mean(np.abs((np.exp(y_pred_original)*E_nn_correction - y_tf)/y_tf))
    BIAS_nn[name] = bias
    
    sigma_v_nn=fitted_models[name]["std_v_est"]
    sigma_u_nn=fitted_models[name]["std_u_est"]
    
    bias_v_nn[name] = np.abs(sigma_v_nn - noise_std_v).item()
    bias_u_nn[name] = np.abs(sigma_u_nn - noise_std_u).item()
    
    # Calculate the bias of TE
    TE_nn = vectorized_TE_fun(residuals=(y_pred_original-log_y.numpy()),sig_v=sigma_v_nn,sig_u=sigma_u_nn)
    Bias_TE_nn[name] = np.mean(np.abs(TE_nn-TE_true)) 
    
    print(f"NN ({name}): RMSE = {rmse:.4f}, BIAS = {bias:.4f}, bias_v_nn = {bias_v_nn[name]:.4f}, bias_u_nn = {bias_u_nn[name]:.4f},TE_bias_nn = {Bias_TE_nn[name]:.4f}")



RMSE_nn['sfm']   =float(RMSE_sfm) 
BIAS_nn['sfm']   =float(BIAS_sfm   ) 
bias_v_nn['sfm'] =float(bias_v_sfm ) 
bias_u_nn['sfm'] =float(bias_u_sfm )  
Bias_TE_nn['sfm']=float(Bias_TE_sfm) 

NN (ReLU): RMSE = 2.7517, BIAS = 2.6676, bias_v_nn = 0.0561, bias_u_nn = 0.0125,TE_bias_nn = 0.0270
NN (ELU): RMSE = 2.7862, BIAS = 2.6949, bias_v_nn = 0.0528, bias_u_nn = 0.0117,TE_bias_nn = 0.0267
NN (FlippedReLU): RMSE = 2.6901, BIAS = 2.6888, bias_v_nn = 0.0360, bias_u_nn = 0.0080,TE_bias_nn = 0.0243
NN (FlippedELU): RMSE = 2.7006, BIAS = 2.6901, bias_v_nn = 0.0375, bias_u_nn = 0.0083,TE_bias_nn = 0.0246
