In [1]:
import numpy as np
import pandas as pd
import torch
import os
import utils

### Data Manipulation

Get Started

In [2]:
x = torch.arange(12, dtype = float)
x

tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.],
       dtype=torch.float64)

In [3]:
x.shape

torch.Size([12])

In [4]:
x.numel()

12

In [5]:
x.reshape(3, 4)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]], dtype=torch.float64)

In [6]:
torch.zeros((2, 3, 4))

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])

In [7]:
torch.randn((2, 3))

tensor([[-1.9293, -2.1463, -0.0823],
        [ 0.2771, -2.0305, -1.3379]])

Operations

In [8]:
x = torch.tensor([1.0, 2, 4, 8])
y = torch.tensor([2, 2, 2, 2])
x + y, x - y, x * y, x / y, x ** y

(tensor([ 3.,  4.,  6., 10.]),
 tensor([-1.,  0.,  2.,  6.]),
 tensor([ 2.,  4.,  8., 16.]),
 tensor([0.5000, 1.0000, 2.0000, 4.0000]),
 tensor([ 1.,  4., 16., 64.]))

In [9]:
torch.exp(x)

tensor([2.7183e+00, 7.3891e+00, 5.4598e+01, 2.9810e+03])

In [10]:
X = torch.arange(12).reshape(3, 4)
Y = -1 * X
torch.cat((X, Y), dim = 0), torch.cat((X, Y), dim = 1)

(tensor([[  0,   1,   2,   3],
         [  4,   5,   6,   7],
         [  8,   9,  10,  11],
         [  0,  -1,  -2,  -3],
         [ -4,  -5,  -6,  -7],
         [ -8,  -9, -10, -11]]),
 tensor([[  0,   1,   2,   3,   0,  -1,  -2,  -3],
         [  4,   5,   6,   7,  -4,  -5,  -6,  -7],
         [  8,   9,  10,  11,  -8,  -9, -10, -11]]))

In [11]:
X == Y

tensor([[ True, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [12]:
X.sum()

tensor(66)

Broadcasting

Since a and b are  3×1  and  1×2  matrices respectively, their shapes do not match up if we want to add them. We broadcast the entries of both matrices into a larger  3×2  matrix as follows: for matrix a it replicates the columns and for matrix b it replicates the rows before adding up both elementwise.

In [13]:
a = torch.arange(3).reshape(-1, 1)
b = torch.arange(2)
a + b

tensor([[0, 1],
        [1, 2],
        [2, 3]])

Indexing and Slicing

In [14]:
X = torch.arange(12)
X[-1], X[1:3]

(tensor(11), tensor([1, 2]))

In [15]:
X[3] = 44444
X

tensor([    0,     1,     2, 44444,     4,     5,     6,     7,     8,     9,
           10,    11])

Saving Memory

In [16]:
Y = torch.ones((12,))
before = id(Y)
Y = Y + X
id(Y) == before

False

In [17]:
Z = torch.zeros((12,))
print(id(Z))
Z[:] = X + Y
print(id(Z))

140308597439600
140308597439600


### Data Preprocessing

In [18]:
os.makedirs(os.path.join('..', 'data'), exist_ok=True)
data_file = os.path.join('..', 'data', 'house_tiny.csv')
with open(data_file, 'w') as f:
    f.write('NumRooms,Alley,Price\n')  # Column names
    f.write('NA,Pave,127500\n')  # Each row represents a data example
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')
    f.write('NA,NA,140000\n')
    
data = pd.read_csv(data_file)
print(data)

   NumRooms Alley   Price
0       NaN  Pave  127500
1       2.0   NaN  106000
2       4.0   NaN  178100
3       NaN   NaN  140000


In [19]:
inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]
inputs = inputs.fillna(inputs.mean())
print(inputs)

   NumRooms Alley
0       3.0  Pave
1       2.0   NaN
2       4.0   NaN
3       3.0   NaN


  inputs = inputs.fillna(inputs.mean())


In [20]:
inputs = pd.get_dummies(inputs, dummy_na=True)
print(inputs)

   NumRooms  Alley_Pave  Alley_nan
0       3.0           1          0
1       2.0           0          1
2       4.0           0          1
3       3.0           0          1


### Automatic Differentiation

Simple Example

In [21]:
x = torch.arange(4.0)
x

tensor([0., 1., 2., 3.])

In [22]:
x.requires_grad_(True)
x.grad

In [23]:
y = 2 * torch.dot(x, x)
y

tensor(28., grad_fn=<MulBackward0>)

In [24]:
y.backward()
x.grad

tensor([ 0.,  4.,  8., 12.])

In [25]:
x.grad == 4 * x

tensor([True, True, True, True])

In [26]:
x.grad.zero_()
y = x.sum()
y.backward()
x.grad

tensor([1., 1., 1., 1.])

Non-Scalar Variables

In [27]:
x.grad.zero_
y = x * x 
y.sum().backward()
x.grad

tensor([1., 3., 5., 7.])

Detaching Computation

In [28]:
x.grad.zero_()
y = x * x
u = y.detach()
z = u * x 

z.sum().backward()
x.grad == u

tensor([True, True, True, True])