In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
df=pd.read_csv("San Diego_featured_data.csv")
df.head()
y= df[['All sky irradiance']]
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(df)

scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten() 
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size = 0.2,shuffle=False)
import time as time
start= time.time()
cat_model = CatBoostRegressor(learning_rate=0.1,depth=2,l2_leaf_reg=1,iterations=1500)
xgb_model = XGBRegressor(tree_method='approx',subsamples=0.7,n_estimators=300,max_depth=9,learning_rate=0.1,colsample_bytree=1)
rf_model = RandomForestRegressor(n_estimators=60,min_samples_split=2,min_samples_leaf=2,max_samples=0.75,max_features=8,max_depth=8)
cat_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
rf_model.fit(X_train, y_train)
cat_pred_train = cat_model.predict(X_train).reshape(-1, 1)
xgb_pred_train = xgb_model.predict(X_train).reshape(-1, 1)
rf_pred_train = rf_model.predict(X_train).reshape(-1, 1)
cat_pred_test = cat_model.predict(X_test).reshape(-1, 1)
xgb_pred_test = xgb_model.predict(X_test).reshape(-1, 1)
rf_pred_test = rf_model.predict(X_test).reshape(-1, 1)
# Stack the predictions as inputs to the gating model
train_preds = np.hstack([cat_pred_train, xgb_pred_train,rf_pred_train])
test_preds = np.hstack([cat_pred_test, xgb_pred_test,rf_pred_test])
# Define the Gating Network
class GatingNetwork(nn.Module):
    def __init__(self, input_size):
        super(GatingNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 3)  # 3 output weights, one for each model
        self.softmax = nn.Softmax(dim=1)  # Ensure weights sum to 1

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.softmax(x)
        return x
train_preds_tensor = torch.tensor(train_preds, dtype=torch.float32)
test_preds_tensor = torch.tensor(test_preds, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)
gating_model = GatingNetwork(input_size=train_preds.shape[1])
optimizer = optim.Adam(gating_model.parameters(), lr=0.1)
criterion = nn.L1Loss()


# Train the gating model
epochs = 500
for epoch in range(epochs):
    gating_model.train()
    optimizer.zero_grad()
    
    # Get the dynamic weights
    weights = gating_model(train_preds_tensor)
    
    # Compute the final prediction (weighted sum of base model predictions)
    final_pred = (weights[:, 0].unsqueeze(1) * train_preds_tensor[:, 0].unsqueeze(1)) + \
                 (weights[:, 1].unsqueeze(1) * train_preds_tensor[:, 1].unsqueeze(1)) + \
                 (weights[:, 2].unsqueeze(1) * train_preds_tensor[:, 2].unsqueeze(1))
    
    # Calculate loss
    loss = criterion(final_pred, y_train_tensor)
    loss.backward()
    optimizer.step()

    if epoch % 50 == 0:
        print(f'Epoch {epoch}/{epochs}, Loss: {loss.item()}')
end= time.time()
convergence_time= end-start
# Evaluate on test data
start=time.time()
gating_model.eval()
with torch.no_grad():
    test_weights = gating_model(test_preds_tensor)
    
    final_test_pred = (test_weights[:, 0].unsqueeze(1) * test_preds_tensor[:, 0].unsqueeze(1)) + \
                      (test_weights[:, 1].unsqueeze(1) * test_preds_tensor[:, 1].unsqueeze(1)) + \
                      (test_weights[:, 2].unsqueeze(1) * test_preds_tensor[:, 2].unsqueeze(1))
end=time.time()
inference_time= end-start

0:	learn: 0.9134141	total: 57.5ms	remaining: 1m 26s
1:	learn: 0.8416578	total: 58ms	remaining: 43.4s
2:	learn: 0.7745282	total: 58.2ms	remaining: 29s
3:	learn: 0.7165301	total: 58.5ms	remaining: 21.9s
4:	learn: 0.6605078	total: 58.7ms	remaining: 17.6s
5:	learn: 0.6102301	total: 58.9ms	remaining: 14.7s
6:	learn: 0.5664314	total: 59.2ms	remaining: 12.6s
7:	learn: 0.5243119	total: 59.4ms	remaining: 11.1s
8:	learn: 0.4865797	total: 59.6ms	remaining: 9.87s
9:	learn: 0.4549733	total: 59.8ms	remaining: 8.91s
10:	learn: 0.4214188	total: 60ms	remaining: 8.12s
11:	learn: 0.3929913	total: 60.2ms	remaining: 7.46s
12:	learn: 0.3647427	total: 60.3ms	remaining: 6.9s
13:	learn: 0.3406752	total: 60.5ms	remaining: 6.42s
14:	learn: 0.3191715	total: 60.7ms	remaining: 6.01s
15:	learn: 0.2986798	total: 61ms	remaining: 5.66s
16:	learn: 0.2798691	total: 61.2ms	remaining: 5.34s
17:	learn: 0.2624175	total: 61.4ms	remaining: 5.06s
18:	learn: 0.2482287	total: 61.6ms	remaining: 4.8s
19:	learn: 0.2353383	total: 61.

Parameters: { "subsamples" } are not used.

  return F.l1_loss(input, target, reduction=self.reduction)


Epoch 0/500, Loss: 1.1346564292907715
Epoch 50/500, Loss: 1.1346206665039062
Epoch 100/500, Loss: 1.1346206665039062
Epoch 150/500, Loss: 1.1346206665039062
Epoch 200/500, Loss: 1.1346206665039062
Epoch 250/500, Loss: 1.1346206665039062
Epoch 300/500, Loss: 1.1346206665039062
Epoch 350/500, Loss: 1.1346206665039062
Epoch 400/500, Loss: 1.1346206665039062
Epoch 450/500, Loss: 1.1346206665039062


In [2]:
print("convergence time:", convergence_time)
print("inference time:", inference_time)

convergence time: 3.3418798446655273
inference time: 0.0003650188446044922


In [2]:
final_test_pred.shape

torch.Size([384, 1])

In [3]:
predictions = scaler_y.inverse_transform(final_test_pred)

In [4]:
Actual = scaler_y.inverse_transform(y_test.reshape(-1, 1))

In [5]:
Calculated =pd.DataFrame(predictions, columns = ['Prediction'])
Actual=pd.DataFrame(Actual, columns=["Actual"])

In [6]:
import os
import pandas as pd

# Concatenate Calculated and Actual DataFrames along the columns
combined = pd.concat([Calculated, Actual], axis=1)

# Define the path to the existing folder
save_dir = "/Users/ubaidahmed/Desktop/Daily_SI_forecasting /San Diego/Models/Proposed approach"

# Define the filename within the specified folder
filename = os.path.join(save_dir, "Proposed_approach_output.csv")

# Save the combined DataFrame to CSV
combined.to_csv(filename, index=False)

print(f"File saved to {filename}")


File saved to /Users/ubaidahmed/Desktop/Daily_SI_forecasting /San Diego/Models/Proposed approach/Proposed_approach_output.csv


# Error Calculation

In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv("Proposed_approach_output.csv")
features= list(df)
features

['Prediction', 'Actual']

In [3]:
df=df*1000

In [4]:
z= df["Actual"]
x= df["Prediction"]

In [5]:
import math
from sklearn.metrics import mean_squared_error
MSE=mean_squared_error(z,x)
RMSE= math.sqrt(MSE)
from sklearn.metrics import mean_absolute_error
MAE= mean_absolute_error(z,x)
from sklearn.metrics import mean_absolute_percentage_error
MAPE= (mean_absolute_percentage_error(z,x))*100
print("Mean Square Error is:",MSE)
print("Root Mean Square Error is:",RMSE)
print("Mean Absolute Error is:",MAE)
print("Mean Absolute Percentage Error is:",MAPE)

Mean Square Error is: 296.83899322541714
Root Mean Square Error is: 17.229016026036344
Mean Absolute Error is: 5.269963009707643
Mean Absolute Percentage Error is: 0.12236149659523472


In [6]:
max=x.max()
min=x.min()
mean=x.mean()

print("Max",max)
print("Min",min)
print('Mean',mean)

Max 8628.064040448982
Min 1145.4307382911454
Mean 5226.612199618157


In [7]:
NRMSE= (RMSE/(max-min))*100
print("Normalized Root Mean Square Error:",NRMSE)

Normalized Root Mean Square Error: 0.23025337912881355


In [8]:
NRMSE= (RMSE/(mean))*100
print("Normalized Root Mean Square Error:",NRMSE)

Normalized Root Mean Square Error: 0.32964022139035015


In [9]:
relative_errors = (z - x) / z
squared_relative_errors = relative_errors ** 2
abs_relative_errors = np.abs(relative_errors)

In [10]:
msre = np.mean(squared_relative_errors)
rmsre = np.sqrt(msre)
mare = np.mean(abs_relative_errors)
rmspe = np.sqrt(np.mean((abs_relative_errors * 100) ** 2))

In [11]:
print("Mean Square Relative Error is:",msre)
print("Root Mean Square Relative Error is:",rmsre)
print("Mean Absolute Relative Error is:",mare)
print("Root Mean Square Percentage Error is:",rmspe)

Mean Square Relative Error is: 2.398097379724497e-05
Root Mean Square Relative Error is: 0.004897037246871313
Mean Absolute Relative Error is: 0.0012236149659523472
Root Mean Square Percentage Error is: 0.4897037246871313
