This code is mainly by Edward Raff - https://github.com/EdwardRaff/Inside-Deep-Learning

Modified by Arunesh Sinha

In [5]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from tqdm.autonotebook import tqdm
import pandas as pd

In [None]:
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('png', 'pdf')

  set_matplotlib_formats('png', 'pdf')


In [None]:
import torch
print(torch.__version__)

2.1.0+cu121


In [None]:
!python --version

Python 3.10.12


In [6]:
torch_scalar = torch.tensor(3.14)
torch_vector = torch.tensor([1, 2, 3, 4])
torch_matrix = torch.tensor([[1, 2,],
                             [3, 4,],
                             [5, 6,],
                             [7, 8,]])
#You don't have to format it like I did, thats just for clarity
torch_tensor3d = torch.tensor([
                            [
                            [ 1,  2,  3],
                            [ 4,  5,  6],
                            ],
                            [
                            [ 7,  8,  9],
                            [10, 11, 12],
                            ],
                            [
                            [13, 14, 15],
                            [16, 17, 18],
                            ],
                            [
                            [19, 20, 21],
                            [22, 23, 24],
                            ]
                              ])

In [7]:
print(torch_scalar.shape)
print(torch_vector.shape)
print(torch_matrix.shape)
print(torch_tensor3d.shape)

torch.Size([])
torch.Size([4])
torch.Size([4, 2])
torch.Size([4, 2, 3])


In [8]:
x_np = np.random.random((4,4))
print(x_np)

[[0.73190066 0.11149239 0.28763394 0.56862894]
 [0.39209059 0.01129645 0.6908148  0.37352965]
 [0.0271468  0.33238854 0.52720093 0.95715262]
 [0.78433522 0.27155264 0.28596557 0.77748357]]


In [9]:
x_pt = torch.tensor(x_np)
print(x_pt)

tensor([[0.7319, 0.1115, 0.2876, 0.5686],
        [0.3921, 0.0113, 0.6908, 0.3735],
        [0.0271, 0.3324, 0.5272, 0.9572],
        [0.7843, 0.2716, 0.2860, 0.7775]], dtype=torch.float64)


In [10]:
print(x_np.dtype, x_pt.dtype)

float64 torch.float64


In [11]:
#Lets force them to be 32-bit floats
x_np = np.asarray(x_np, dtype=np.float32)
x_pt = torch.tensor(x_np, dtype=torch.float32)
print(x_np.dtype, x_pt.dtype)

float32 torch.float32


In [12]:
b_np = (x_np > 0.5)
print(b_np)
print(b_np.dtype)

[[ True False False  True]
 [False False  True False]
 [False False  True  True]
 [ True False False  True]]
bool


In [13]:
b_pt = (x_pt > 0.5)
print(b_pt)
print(b_pt.dtype)

tensor([[ True, False, False,  True],
        [False, False,  True, False],
        [False, False,  True,  True],
        [ True, False, False,  True]])
torch.bool


In [14]:
np.sum(x_np)

7.1306133

In [15]:
torch.sum(x_pt)

tensor(7.1306)

In [16]:
np.transpose(x_np)

array([[0.73190063, 0.3920906 , 0.0271468 , 0.7843352 ],
       [0.11149239, 0.01129645, 0.33238855, 0.27155265],
       [0.28763393, 0.6908148 , 0.52720094, 0.28596556],
       [0.56862897, 0.37352964, 0.9571526 , 0.7774836 ]], dtype=float32)

In [17]:
# torch transpose not same as numpy transpose, remember always use https://pytorch.org/docs/stable/index.html
torch.transpose(x_pt, 0, 1)

tensor([[0.7319, 0.3921, 0.0271, 0.7843],
        [0.1115, 0.0113, 0.3324, 0.2716],
        [0.2876, 0.6908, 0.5272, 0.2860],
        [0.5686, 0.3735, 0.9572, 0.7775]])

In [18]:
print(torch.transpose(torch_tensor3d, 0, 2).shape)

torch.Size([3, 2, 4])


In [19]:
import timeit
x = torch.rand(2**11, 2**11)
time_cpu = timeit.timeit("x@x", globals=globals(), number=100)

In [20]:
print("Is CUDA available? :", torch.cuda.is_available())
device = torch.device("cuda")

Is CUDA available? : True


In [21]:
x = x.to(device)
time_gpu = timeit.timeit("x@x", globals=globals(), number=100)

In [None]:
x = torch.rand(128, 128).to(device)
y = torch.rand(128, 128)
x*y

RuntimeError: ignored

In [None]:
x = x.cpu()
x*y

tensor([[0.5144, 0.0896, 0.4019,  ..., 0.3814, 0.6939, 0.0585],
        [0.2224, 0.0064, 0.2374,  ..., 0.1612, 0.0391, 0.0336],
        [0.2608, 0.0194, 0.1412,  ..., 0.1075, 0.3709, 0.6880],
        ...,
        [0.6719, 0.0172, 0.3221,  ..., 0.5748, 0.0060, 0.0114],
        [0.0341, 0.1724, 0.4793,  ..., 0.0041, 0.0944, 0.0432],
        [0.2677, 0.4993, 0.5069,  ..., 0.0169, 0.1544, 0.4584]])

In [None]:
# A helper function to move various types of objects to a device
def moveTo(obj, device):
    """
    obj: the python object to move to a device, or to move its contents to a device
    device: the compute device to move objects to
    """
    if isinstance(obj, list):
        return [moveTo(x, device) for x in obj]
    elif isinstance(obj, tuple):
        return tuple(moveTo(list(obj), device))
    elif isinstance(obj, set):
        return set(moveTo(list(obj), device))
    elif isinstance(obj, dict):
        to_ret = dict()
        for key, value in obj.items():
            to_ret[moveTo(key, device)] = moveTo(value, device)
        return to_ret
    elif hasattr(obj, "to"):
        return obj.to(device)
    else:
        return obj

some_tensors = [torch.tensor(1), torch.tensor(2)]
print(some_tensors)
print(moveTo(some_tensors, device))

In [24]:
# _ gives inplace versions of functions (do not use inplace operations, at least when you need gradients)
# inplace operations can help save memory, if used properly
op1 = torch.ones(3,3)
op2 = torch.rand(3,3)
print(op1)
print(op2)
print(op1.add(op2))
print(op1)
print(op1.add_(op2)) # inplace operation, not recommended

print(op1)
print(op1 + 1)
print(op1)
op1 += 1 # inplace operation, not recommended
print(op1)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[0.4313, 0.3838, 0.5842],
        [0.4525, 0.5164, 0.4564],
        [0.2383, 0.1894, 0.1602]])
tensor([[1.4313, 1.3838, 1.5842],
        [1.4525, 1.5164, 1.4564],
        [1.2383, 1.1894, 1.1602]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[1.4313, 1.3838, 1.5842],
        [1.4525, 1.5164, 1.4564],
        [1.2383, 1.1894, 1.1602]])
tensor([[1.4313, 1.3838, 1.5842],
        [1.4525, 1.5164, 1.4564],
        [1.2383, 1.1894, 1.1602]])


In [25]:
x = torch.rand(1, requires_grad=True)
print(x)

tensor([0.5311], requires_grad=True)


In [28]:
y = x + 2
print(y)
z = y*y
#z = y.mean()

#y.retain_grad()
z.backward()
print(x.grad) # dz/dx
#print(y.grad)
#x.gra

# No grad - 3 known ways
#x.requires_grad_(False)
#x.detach()
with torch.no_grad():
  y = x*x
  #print(y)

tensor([2.5311], grad_fn=<AddBackward0>)
tensor([15.1864])


In [29]:
x = torch.rand(2, requires_grad=True)
print(x)

tensor([0.6923, 0.2652], requires_grad=True)


In [30]:
y = x + 2
print(y)
#z = y*y
z = y.mean()

y.retain_grad()
z.backward()
print(x.grad) # dz/dx
print(y.grad)

# No grad - 3 known ways
#x.requires_grad_(False)
#x.detach()
with torch.no_grad():
  y = x*x
  #print(y)

tensor([2.6923, 2.2652], grad_fn=<AddBackward0>)
tensor([0.5000, 0.5000])
tensor([0.5000, 0.5000])


In [None]:
!python --version

Python 3.8.16


In [None]:
#@title
def f(x):
    return torch.pow((x-2.0), 2)

x_axis_vals = np.linspace(-7,9,100)
y_axis_vals = f(torch.tensor(x_axis_vals)).numpy()

sns.lineplot(x=x_axis_vals, y=y_axis_vals, label='$f(x)=(x-2)^2$')
#@title
def fP(x): #Defining the derivative of f(x) manually
    return 2*x-4

y_axis_vals_p = fP(torch.tensor(x_axis_vals)).numpy()

#First, lets draw a black line at 0, so that we can easily tell if something is positive or negative
sns.lineplot(x=x_axis_vals, y=[0.0]*len(x_axis_vals), label="0", color='black')
sns.lineplot(x=x_axis_vals, y=y_axis_vals, label='$f(x) = (x-2)^2$')
sns.lineplot(x=x_axis_vals, y=y_axis_vals_p, label="$f'(x)=2 x - 4$")
#@title
x = torch.tensor([-3.5], requires_grad=True)
print(x.grad)
#@title
value = f(x)
print(value)
#@title
value.backward()
print(x.grad)
#@title
x = torch.tensor([-3.5], requires_grad=True)

x_cur = x.clone()
x_prev = x_cur*100 #Make the initial "previous" solution larger
epsilon = 1e-5
eta = 0.1

while torch.linalg.norm(x_cur-x_prev) > epsilon:
    x_prev = x_cur.clone() #We need to make a clone here so that x_prev and x_cur don't point to the same object

    #Compute our function, gradient, and update
    value = f(x)
    value.backward()
    x.data -= eta * x.grad
    x.grad.zero_() #We need to zero out the old gradient, as py-torch will not do that for us

    #What are we currently now?
    x_cur = x.data

print(x_cur)
#@title
x_param = torch.nn.Parameter(torch.tensor([-3.5]), requires_grad=True)
#@title
optimizer = torch.optim.SGD([x_param], lr=eta)
#@title
for epoch in range(60):
    optimizer.zero_grad() #x.grad.zero_()
    loss_incurred  = f(x_param)
    loss_incurred.backward()
    optimizer.step() #x.data -= eta * x.grad
print(x_param.data)
#@title
from torch.utils.data import Dataset
from sklearn.datasets import fetch_openml

# Load data from https://www.openml.org/d/554
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
print(X.shape)
#@title
class SimpleDataset(Dataset):

    def __init__(self, X, y):
        super(SimpleDataset, self).__init__()
        self.X = X
        self.y = y

    def __getitem__(self, index):
        #This "work" could have gone in the constructor, but you should get into
        inputs = torch.tensor(self.X[index,:], dtype=torch.float32)
        targets = torch.tensor(int(self.y[index]), dtype=torch.int64)
        return inputs, targets

    def __len__(self):
        return self.X.shape[0]
#Now we can make a PyTorch dataset
dataset = SimpleDataset(X, y)
#@title
print("Length: ", len(dataset))
example, label = dataset[0]
print("Features: ", example.shape) #Will return 784
print("Label of index 0: ", label)
#@title
plt.imshow(example.reshape((28,28)))
#@title
train_size = int(len(dataset)*0.8)
test_size = len(dataset)-train_size

train_dataset, test_dataset = torch.utils.data.random_split(dataset, (train_size, test_size))
print("{} examples for training and {} for testing".format(len(train_dataset), len(test_dataset)))

NameError: ignored