# Import Torch 

In [2]:
import torch
import numpy as np
import pandas as pd

# Tensors

In its github page it says that Pytorch 2 important features first is tensors. A tensor is a number,vector,matrix or a n-dimensional array. So Tensor is like a numpy array and Pytorch is kind of Numpy on steroids and can work on GPUs. The second thing is the auto_grad which allows us to take gradients. Actually these two are the building blocks of any neural network. Data & Gradients.

In [5]:
#Create number 4 (integer) as a tensor, it can take floats etc. as well
t1 = torch.tensor(4)

In [6]:
type(t1)

torch.Tensor

In [7]:
t1.dtype

torch.int64

In [8]:
t1 = torch.tensor(12.0)

In [9]:
t1.dtype

torch.float32

In [10]:
#Check torch.tensor?
#It can take list,tuples,numpy arrays as argument

In [11]:
t1 = torch.tensor(np.zeros((12,2)))

In [12]:
t1

tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]], dtype=torch.float64)

In [13]:
t1 = torch.tensor(4)

In [14]:
#Vector
#A vector as in numpy and as in linear algebra. As in numpy, an vector/matric etc
#a numpy array or a pytorch tensor needs to be in one data type
#Thus once I give the first number as 1.0 (float) all the data converted to float 
t2 = torch.tensor([1.0,2,3,4])
t2

tensor([1., 2., 3., 4.])

In [15]:
t2.dtype

torch.float32

In [16]:
#Matrix
t3 = torch.tensor([[1,2,3],
                   [4,5,6]])
t3

tensor([[1, 2, 3],
        [4, 5, 6]])

In [17]:
#3 Dimensional Array
#Think it like a cube or -dikdörtgenler prizması-
#In space we gather around the matrices and they have like depth 
#We combine them in a new axis.
t4 = torch.tensor([
                    [[1,2,3],
                     [4,5,6]], # Matrix One
                     [[7,8,9],
                    [10,11,12]] # Matrix Two
                    ])

In [18]:
t4

tensor([[[ 1,  2,  3],
         [ 4,  5,  6]],

        [[ 7,  8,  9],
         [10, 11, 12]]])

In [19]:
#2 Matrices with shape 2 rows 3 columns
#Another perspective is the shape gives it from the outer most array
#In the outer most array there are 2 elements which are also arrays 
#Inside every one of those 2 elements (arrays) we have 2 more arrays with each have 3 elements
#Thus 3 dimensional array.
#This is useful since it is how we defined it in the first place. 
#Shape works just like in numpy
#3 Dimensional Array
t4.shape

torch.Size([2, 2, 3])

In [20]:
#Matrix
#2 rows 3 columns
t3.shape

torch.Size([2, 3])

In [21]:
#Vector
#4 rows (If think it as column vector) or 4 columns (for a row vector)
t2.shape

torch.Size([4])

In [22]:
#For scalar numbers it has no shape 0 dimensional (?) I guess.
t1.shape

torch.Size([])

# Tensor Operations and Gradients

Once we have tensors arrays or matrices as in linear algebra now we will combine them. Linear combination of the vectors and matrices really is the back bone of everything.

In [25]:
#Create tensors
x = torch.tensor(3.)
w = torch.tensor(4.,requires_grad = True)
b = torch.tensor(5.,requires_grad = True)

In [26]:
x,w,b

(tensor(3.), tensor(4., requires_grad=True), tensor(5., requires_grad=True))

In [27]:
y = w*x + b
y

tensor(17., grad_fn=<AddBackward0>)

In [28]:
#The special thing is here that when requires_grad is set to true
#Then pytorch go and calculates the derivative of result easily.
#y.backward() goes and searches for the elements it has been created which are x,w,b
#And for the ones that with the requires_grad is true it calculates dy/dw, dy/db etc.
#The partial derivatives
#Backward seems logical since it goes back and takes the derivatives wrt to its predecessors
y.backward()

In [29]:
#After this method is called then the derivative is recorded to the tensors.
#x's gradient is none since we did not set requires_grad to True above
print("dy/dw is: " + str(w.grad))
print("dy/dx is: " + str(x.grad))
print("dy/db is: " + str(b.grad))

dy/dw is: tensor(3.)
dy/dx is: None
dy/db is: tensor(1.)


In [30]:
#Try backward()
z = torch.tensor(12.,requires_grad = True)
q = torch.tensor(3.,requires_grad = True)
f = 2*z + 3*q
k = 2*f + 4

In [31]:
z

tensor(12., requires_grad=True)

In [32]:
q

tensor(3., requires_grad=True)

In [33]:
f

tensor(33., grad_fn=<AddBackward0>)

In [34]:
k

tensor(70., grad_fn=<AddBackward0>)

In [35]:
k.backward()

In [36]:
#It does go to the prior steps
#Why f not work ?
f.grad

  f.grad


In [37]:
z.grad

tensor(4.)

In [38]:
q.grad

tensor(6.)

# Interoperability

In [40]:
 #Pytorch has direct support for numpy.

In [41]:
np_arr = np.array([[1,2.0],[3,4]])

In [42]:
np_arr

array([[1., 2.],
       [3., 4.]])

In [43]:
np_arr.dtype

dtype('float64')

In [44]:
t_np = torch.from_numpy(np_arr)

In [45]:
t_np

tensor([[1., 2.],
        [3., 4.]], dtype=torch.float64)

In [46]:
#The dtypes are the same!
#Also we can convert from tensors to numpy arrays as well

In [47]:
np_from_tensor = t_np.numpy()

In [48]:
np_from_tensor

array([[1., 2.],
       [3., 4.]])

In [49]:
type(np_from_tensor)

numpy.ndarray

# Some Useful Attributes & Practice

In [51]:
#Create random tensors from a uniform distribution
rand_tensor = torch.rand((2,3))
rand_tensor

tensor([[0.4797, 0.3248, 0.2510],
        [0.5877, 0.4688, 0.4364]])

In [52]:
#Similar to Numpy one and 0 matrices
ones_tensor = torch.ones((2,3))
ones_tensor

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [53]:
zeros_tensor = torch.zeros((2,3))
zeros_tensor

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [54]:
#Very Numpy-Like
tensor = torch.ones(4, 4)
print(f"First row: {tensor[0]}")
print(f"First column: {tensor[:, 0]}")
print(f"Last column: {tensor[..., -1]}")
tensor[:,1] = 0
print(tensor)

First row: tensor([1., 1., 1., 1.])
First column: tensor([1., 1., 1., 1.])
Last column: tensor([1., 1., 1., 1.])
tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [55]:
# This computes the matrix multiplication between two tensors. y1, y2, y3 will have the same value
# ``tensor.T`` returns the transpose of a tensor

#Matrix Multiplication
y1 = ones_tensor @ rand_tensor.T
y1

tensor([[1.0555, 1.4930],
        [1.0555, 1.4930]])

In [56]:
#Matrix Multiplication
y2 = ones_tensor.matmul(rand_tensor.T)
y2

tensor([[1.0555, 1.4930],
        [1.0555, 1.4930]])

In [57]:
y3 = torch.rand_like(y1)
y3

tensor([[0.5493, 0.2210],
        [0.6969, 0.6684]])

In [58]:
torch.matmul(ones_tensor, rand_tensor.T, out=y3)
y3

tensor([[1.0555, 1.4930],
        [1.0555, 1.4930]])

In [59]:
# This computes the element-wise product. z1, z2, z3 will have the same value
z1 = tensor * tensor
z2 = tensor.mul(tensor)

z3 = torch.rand_like(tensor)
torch.mul(tensor, tensor, out=z3)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

In [60]:
z1

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

In [61]:
z2

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

# Creating Linear Regression with Torch's Tensors and Gradients 

## Create Fake Data

In [64]:
#First I will create Fake Data Using Numpy
#Create normal random numbers - feature 1
norm_rand = np.random.normal(loc=100.0,scale = 4.0,size = 1000).reshape(1000,1)
#feature 2
norm_rand_2 = np.random.normal(loc=12.0,scale = 10.0,size = 1000).reshape(1000,1)
#Bind those together
X = np.concatenate([norm_rand,norm_rand_2],axis=1)
X = X.T
W = np.array([12.0,-4.0]).reshape(2,1)
b = np.array([-23.0])

In [65]:
W.shape

(2, 1)

In [66]:
X.shape

(2, 1000)

In [67]:
#A Linearly Created Data
y = W.T @ X + b

In [68]:
y.shape

(1, 1000)

In [69]:
#Create the Pandas Data Frame to make it more visual
data = pd.DataFrame(X.T)
data.columns = ["Feature_1","Feature_2"]
data["Target"] = y.T
#data

## Build The Regression Model

Since we created the data itself we know the w1,w2 and b but let us forget that, they will be used to compare our model's quality later.
Say that in real life we had a data like this and want to create a regression model. Since it is created via numpy we dont need any preparation,cleaning etc. Only convert them to tensors.

In [72]:
data.head()

Unnamed: 0,Feature_1,Feature_2,Target
0,100.630166,20.503881,1102.546469
1,102.499316,11.845502,1159.609782
2,91.251646,1.638753,1065.464733
3,98.158853,11.406419,1109.280562
4,104.465894,9.791046,1191.426547


In [73]:
Input_t = torch.from_numpy(np.array(data[["Feature_1","Feature_2"]]))

In [74]:
Input_t.shape

torch.Size([1000, 2])

In [75]:
Input_t.dtype

torch.float64

In [76]:
Target_t = torch.from_numpy(np.array(data[["Target"]]))

In [77]:
Target_t.shape

torch.Size([1000, 1])

In [78]:
Target_t.dtype

torch.float64

In [79]:
Input_t

tensor([[100.6302,  20.5039],
        [102.4993,  11.8455],
        [ 91.2516,   1.6388],
        ...,
        [ 99.4448,  16.7060],
        [105.9178,   0.9631],
        [100.0590,   9.7229]], dtype=torch.float64)

### Linear Regression Functions

First we will need a function to execute linear regression given weigths and data.

#### Linear Model

In [296]:
#Takes data, weights and bias
#For simplicity it does not check dimensions we will provide it in the right directions
def lin_reg (X_,w_,b_):
    #Check shapes
    result = X_ @ w_
    result = result + b_

    return result

    

In [298]:
#Let us Define random weigths and bias
rand_w = torch.randn(2,1,requires_grad=True,dtype = torch.float64)
rand_b = torch.randn(1,requires_grad=True,dtype = torch.float64)

In [300]:
#For random weights and biases it calculates a linear regression approximation
pd.DataFrame(lin_reg(Input_t,rand_w,rand_b).detach().numpy()).head()

Unnamed: 0,0
0,-22.04533
1,-37.522691
2,-48.018274
3,-35.752294
4,-42.072639


In [304]:
asd = lin_reg(Input_t,rand_w,rand_b)

In [86]:
data["Target"].head()

0    1102.546469
1    1159.609782
2    1065.464733
3    1109.280562
4    1191.426547
Name: Target, dtype: float64

#### Loss Function 

As the loss function, I will use least squares methodology. Thus we will try to minimize mean squared error

In [89]:
def model_loss(preds,reals):
    squared_loss = (preds-reals)**2
    mean_sq_loss = torch.sum(squared_loss)/ squared_loss.numel()
    return mean_sq_loss
    

In [90]:
model_loss(lin_reg(Input_t,rand_w,rand_b),Target_t)

tensor(1205818.4543, dtype=torch.float64, grad_fn=<DivBackward0>)

#### Compute Gradients & Gradient Descent

Now since we have the model and the loss we will need to find the correct weights and bias by taking the gradient of the loss
w.r.t weights and bias. That way we will set the weights and bias in such as way that we will decrease the loss function.

Since loss is  a function of the linear model and target data.
And linear model is a function of weights & bias.The input features and the target is the data. So the changing part of the model are weights and bias. Thus we try to set them so that the error decreases.

In [266]:
loss = model_loss(lin_reg(Input_t,rand_w,rand_b),Target_t)

In [268]:
#This function calculates the derivative of the Loss w.r.t. to the tensors that has the requires_grad = True
#We set those true for w and b
loss.backward()

In [270]:
#Derivates of W and b w.r.t to the loss

In [272]:
rand_w.grad

tensor([[-219590.7200],
        [ -25671.3118]], dtype=torch.float64)

In [274]:
rand_b.grad

tensor([-2193.5898], dtype=torch.float64)

In [276]:
#A small but important detail is that pytorch accumulates the derivatives.
#So we need to make them 0 before calculating the gradient for the next step

In [278]:
rand_w.grad.zero_()

tensor([[0.],
        [0.]], dtype=torch.float64)

In [280]:
rand_w.grad

tensor([[0.],
        [0.]], dtype=torch.float64)

In [282]:
rand_b.grad.zero_()

tensor([0.], dtype=torch.float64)

In [188]:
#Now we can define the Gradient Descent function

In [316]:
def Grad_Desc(X_,w_,b_,Y_,iter =10,alpha=0.00001):

    #w_init = w_
    #b_init = b_

    i = 0
    while i<iter:
        print(i)
        #Calculate the current predictions with current w_,b_
        current_preds = lin_reg(X_,w_,b_)
        #print("Current predictions are : " +  str(current_preds))
        current_preds.retain_grad()
        #Calculate the loss with current w_,b_
        current_loss = model_loss(current_preds,Y_)
        print("Current loss is : " +  str(current_loss))
        #Now Calculate the gradients
        current_loss.backward()
        with torch.no_grad():
            
            w_ = w_ - alpha*w_.grad
            b_ = b_ - alpha*b_.grad


            #Reset the gradients
            #w_.grad.zero_()
            #b_.grad.zero_()
        i+=1


    return w_,b_


    

In [318]:
Grad_Desc(Input_t,rand_w,rand_b,Target_t)

0
Current loss is : tensor(1361263.7246, dtype=torch.float64, grad_fn=<DivBackward0>)
1


RuntimeError: can't retain_grad on Tensor that has requires_grad=False