In [1]:
# What version of Python do you have?
import sys

import torch
import pandas as pd
import sklearn as sk

print(f"PyTorch Version: {torch.__version__}")
print()
print(f"Python {sys.version}")
print(f"Pandas {pd.__version__}")
print(f"Scikit-Learn {sk.__version__}")
print("GPU is", "available" if torch.cuda.is_available() else "NOT AVAILABLE")

PyTorch Version: 1.10.1

Python 3.7.11 (default, Jul 27 2021, 14:32:16) 
[GCC 7.5.0]
Pandas 1.3.5
Scikit-Learn 1.0.1
GPU is available


In [2]:
import torch 
import math

In [4]:
dtype = torch.float
device = torch.device("cpu")

In [5]:
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

In [6]:
# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

In [7]:
learning_rate = 1e-6

In [9]:
#forward pass
for t in range (2000):
    y_predict=a + b * x + c * x ** 2 + d * x ** 3
    #compute loss 
    loss = (y_predict - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)
        
    #backward pass
    grad_y_predict = 2.0 * (y_predict - y)
    grad_a = grad_y_predict.sum()
    grad_b = (grad_y_predict * x).sum()
    grad_c = (grad_y_predict * x ** 2).sum()
    grad_d = (grad_y_predict * x ** 3).sum()
    
    #updating gradients using gradient descent
        
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d
    
print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')


99 260.4604797363281
199 175.35650634765625
299 119.04248046875
399 81.77507019042969
499 57.11119079589844
599 40.78739929199219
699 29.982818603515625
799 22.830890655517578
899 18.096446990966797
999 14.962103843688965
1099 12.886877059936523
1199 11.512773513793945
1299 10.602783203125
1399 10.000144958496094
1499 9.601006507873535
1599 9.3366060256958
1699 9.161437034606934
1799 9.04538631439209
1899 8.968481063842773
1999 8.917506217956543
Result: y = -0.0021989361848682165 + 0.8472151756286621 x + 0.00037935248110443354 x^2 + -0.09197544306516647 x^3


In [10]:
path = '/home/ahisham/Documents/pytorch_endtoend/simple_tensor'

In [12]:
#PyTorch: Defining new autograd functions

In [13]:
import torch
import math

In [14]:
#define model mathematical function 
#y=a+bP3(c+dx)
#where P3(x)=1/2(5x^3-3x)




In [15]:
dtype = torch.float

In [16]:
device=torch.device("cpu")

In [19]:
class LegendrePolynomial3(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        """
        In the forward pass we receive a Tensor containing the input and return
        a Tensor containing the output. ctx is a context object that can be used
        to stash information for backward computation. You can cache arbitrary
        objects for use in the backward pass using the ctx.save_for_backward method.
        """
        ctx.save_for_backward(input)
        return 0.5 * (5 * input ** 3 - 3 * input)
    
    @staticmethod
    def backward(ctx, grad_output):
        """
        In the backward pass we receive a Tensor containing the gradient of the loss
        with respect to the output, and we need to compute the gradient of the loss
        with respect to the input.
        """
        input, = ctx.saved_tensors
        return grad_output * 1.5 * (5 * input ** 2 - 1)

In [20]:
#create random tensors 
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

In [21]:
a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)

In [23]:
learning_rate = 5e-6
for t in range(2000):
    # To apply our Function, we use Function.apply method. We alias this as 'P3'.
    P3 = LegendrePolynomial3.apply

    # Forward pass: compute predicted y using operations; we compute
    # P3 using our custom autograd operation.
    y_pred = a + b * P3(c + d * x)

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    # Use autograd to compute the backward pass.
    loss.backward()
     # Update weights using gradient descent
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        # Manually zero the gradients after updating weights
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')

99 nan
199 nan
299 nan
399 nan
499 nan
599 nan
699 nan
799 nan
899 nan
999 nan
1099 nan
1199 nan
1299 nan
1399 nan
1499 nan
1599 nan
1699 nan
1799 nan
1899 nan
1999 nan
Result: y = nan + nan * P3(nan + nan x)
