# import the data

$$
f=x^2 + \sqrt {y} + z
$$

In [18]:
import pandas as pd

In [19]:
df = pd.read_csv("data/result_testdata.csv", delimiter=",")
df

Unnamed: 0,x,y,z,f
0,0.876300,0.087160,0.678294,1.741426
1,0.506543,0.427771,0.117943,1.028571
2,0.469181,0.208947,0.862431,1.539668
3,0.997632,0.038999,0.628393,1.821145
4,0.680144,0.432733,0.571025,1.691445
...,...,...,...,...
569,0.213704,0.183989,0.577722,1.052330
570,0.727660,0.021602,0.820556,1.497021
571,0.452470,0.848223,0.896740,2.022459
572,0.470553,0.443268,0.204643,1.091847


# build the dual network

https://machinelearningmastery.com/develop-your-first-neural-network-with-pytorch-step-by-step/

In [20]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch import autograd
import math
from sklearn.metrics import mean_squared_error

In [21]:
X = df.iloc[:,:3]
y = df.iloc[:,3]
# y = df.iloc[:,:3].sum(axis=1) 

In [22]:
X = torch.tensor(X.values.astype(np.float32), requires_grad=True)
y = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)

In [23]:
X.shape

torch.Size([574, 3])

In [24]:
y.shape

torch.Size([574, 1])

In [25]:
class EstimateF(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden11 = nn.Linear(3, 160)
        self.act11 = nn.ReLU()
        self.hidden12 = nn.Linear(160, 80)
        self.act12 = nn.ReLU()
        self.output1 = nn.Linear(80, 16)
        self.act_output1 =  nn.Linear(16, 1)
        self.hidden21 = nn.Linear(3, 160)
        self.act21 = nn.ReLU()
        self.hidden22 = nn.Linear(160, 80)
        self.act22 = nn.ReLU()
        self.output2 = nn.Linear(80, 16)
        self.act_output2 =  nn.Linear(16, 3)
    def forward(self, x):
        x1 = self.act11(self.hidden11(x))
        x1 = self.act12(self.hidden12(x1))
        x1 = self.act_output1(self.output1(x1))
        x2 = self.act21(self.hidden21(x))
        x2 = self.act22(self.hidden22(x2))
        x2 = self.act_output2(self.output2(x2))
        return x1,x2
 
model = EstimateF()
print(model)

EstimateF(
  (hidden11): Linear(in_features=3, out_features=160, bias=True)
  (act11): ReLU()
  (hidden12): Linear(in_features=160, out_features=80, bias=True)
  (act12): ReLU()
  (output1): Linear(in_features=80, out_features=16, bias=True)
  (act_output1): Linear(in_features=16, out_features=1, bias=True)
  (hidden21): Linear(in_features=3, out_features=160, bias=True)
  (act21): ReLU()
  (hidden22): Linear(in_features=160, out_features=80, bias=True)
  (act22): ReLU()
  (output2): Linear(in_features=80, out_features=16, bias=True)
  (act_output2): Linear(in_features=16, out_features=3, bias=True)
)


In [26]:
loss_fn = nn.MSELoss()  
optimizer = optim.Adam(model.parameters(), lr=0.001)

n_epochs = 10000
batch_size = 10

for epoch in range(n_epochs):
    for i in range(0, len(X), batch_size):
        Xbatch = X[i:i+batch_size].clone()
        # Xbatch.requires_grad=True
        y_pred1, y_pred2 = model(Xbatch)
        ybatch1 = y[i:i+batch_size]
        ybatch2 = autograd.grad(y_pred1,Xbatch,torch.ones([Xbatch.shape[0], 1]), retain_graph=True, create_graph=True)[0] #first derivative
        loss = 10* loss_fn(y_pred1, ybatch1) +  loss_fn(torch.pow(Xbatch,y_pred2-torch.ones(y_pred2.size())).mul(y_pred2), ybatch2)
        # if epoch==0 and i==0:
        #     # print(y_pred1)
        #     print(Xbatch)
        #     print(y_pred2)
        #     # print(loss_fn(y_pred1, ybatch))
        #     print(y_pred2-torch.ones(y_pred2.size()))
        #     print(torch.pow(Xbatch,y_pred2-torch.ones(y_pred2.size())))
        #     print(torch.pow(Xbatch,y_pred2-torch.ones(y_pred2.size())).mul(y_pred2))                   
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Finished epoch {epoch}, latest loss {loss}')

Finished epoch 0, latest loss 0.4282228946685791
Finished epoch 1, latest loss 0.6196480393409729
Finished epoch 2, latest loss 0.5867209434509277
Finished epoch 3, latest loss 0.5328330993652344
Finished epoch 4, latest loss 0.5325113534927368
Finished epoch 5, latest loss 0.5482013821601868
Finished epoch 6, latest loss 0.505256175994873
Finished epoch 7, latest loss 0.5098952054977417
Finished epoch 8, latest loss 0.48541033267974854
Finished epoch 9, latest loss 0.5548424124717712
Finished epoch 10, latest loss 0.27047982811927795
Finished epoch 11, latest loss 0.3501126766204834
Finished epoch 12, latest loss 0.19827201962471008
Finished epoch 13, latest loss 0.31714001297950745
Finished epoch 14, latest loss 0.280417263507843
Finished epoch 15, latest loss 0.3274608850479126
Finished epoch 16, latest loss 0.32911449670791626
Finished epoch 17, latest loss 0.27322477102279663
Finished epoch 18, latest loss 0.22039178013801575
Finished epoch 19, latest loss 0.19895721971988678
Fini

In [27]:
# compute accuracy (no_grad is optional)
# with torch.no_grad():
#     y_pred = model(X)

X.requires_grad_()
X.retain_grad()
y_pred1,y_pred2 = model(X)

trainScore = math.sqrt(mean_squared_error(y_pred1.detach().numpy(),y.detach().numpy()))
print('Train Score: %.2f RMSE' % (trainScore))

Train Score: 0.02 RMSE


# approximate the function terms

$$
f=x^2 + \sqrt {y} + z
$$

$$
f_x'=2x
$$

$$
f_y'=\frac{1}{2} * y ^ {-\frac{1}{2}}
$$

$$
f_z'=1
$$

In [28]:
X[0]

tensor([0.8763, 0.0872, 0.6783], grad_fn=<SelectBackward0>)

In [29]:
y[0]

tensor([1.7414])

In [30]:
y1,y2=model(X[0])
y1

tensor([1.7358], grad_fn=<AddBackward0>)

## get the dirivative values of 3 terms

which is 2, 0.5, and 1

In [31]:
y_pred1,y_pred2=model(X)
y_pred2.mean(dim=0)

tensor([1.9966, 0.4676, 0.9951], grad_fn=<MeanBackward1>)

good match!