In [19]:
import torch
import numpy as np

## PyTorch exercises

### Tensors

1. Make a tensor of size (2, 17)
2. Make a torch.FloatTensor of size (3, 1)
3. Make a torch.LongTensor of size (5, 2, 1)
  - fill the entire tensor with 7s
4. Make a torch.ByteTensor of size (5,)
  - fill the middle 3 indices with ones such that it records [0, 1, 1, 1, 0]
5. Perform a matrix multiplication of two tensors of size (2, 4) and (4, 2). Then do it in-place.
6. Do element-wise multiplication of two randomly filled $(n_1,n_2,n_3)$ tensors. Then store the result in an Numpy array.

### Forward-prop/backward-prop
1. Create a Tensor that `requires_grad` of size (5, 5).
2. Sum the values in the Tensor.
3. Multiply the tensor by 2 and assign the result to a new python variable (i.e. `x = result`)
4. Sum the variable's elements and assign to a new python variable
5. Print the gradients of all the variables
6. Now perform a backward pass on the last variable (NOTE: for each new python variable that you define, call `.retain_grad()`)
7. Print all gradients again

### Deep-forward NNs
1. Look at Lab 3. In Exercise 12 there, you had to build an $L$-layer neural network with the following structure: *[LINEAR -> RELU]$\times$(L-1) -> LINEAR -> SIGMOID*. Reimplement the manual code in PyTorch.
2. Compare test accuracy using different optimizers: SGD, Adam, Momentum.

#### Tensors



In [20]:
# 1. Make a tensor of size (2, 17)
tensor_1 = torch.empty(2, 17)
print("1. Tensor of size (2, 17):\n", tensor_1)
print("Shape:", tensor_1.shape)

1. Tensor of size (2, 17):
 tensor([[-3.6371e-38,  1.9408e-42,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Shape: torch.Size([2, 17])


In [21]:
# 2. Make a torch.FloatTensor of size (3, 1)
tensor_2 = torch.FloatTensor(3, 1)
print("\n2. FloatTensor of size (3, 1):\n", tensor_2)
print("Shape:", tensor_2.shape)
print("Type:", tensor_2.dtype)


2. FloatTensor of size (3, 1):
 tensor([[-3.5455e-38],
        [ 1.9408e-42],
        [ 1.3090e+00]])
Shape: torch.Size([3, 1])
Type: torch.float32


In [22]:
# 3. Make a torch.LongTensor of size (5, 2, 1)
#    fill the entire tensor with 7s
tensor_3 = torch.LongTensor(5, 2, 1)
tensor_3.fill_(7)
print("\n3. LongTensor of size (5, 2, 1) filled with 7s:\n", tensor_3)
print("Shape:", tensor_3.shape)
print("Type:", tensor_3.dtype)


3. LongTensor of size (5, 2, 1) filled with 7s:
 tensor([[[7],
         [7]],

        [[7],
         [7]],

        [[7],
         [7]],

        [[7],
         [7]],

        [[7],
         [7]]])
Shape: torch.Size([5, 2, 1])
Type: torch.int64


In [23]:
# 4. Make a torch.ByteTensor of size (5,)
#    fill the middle 3 indices with ones such that it records [0, 1, 1, 1, 0]
tensor_4 = torch.ByteTensor(5).zero_()
tensor_4[1:4] = 1
print("\n4. ByteTensor [0, 1, 1, 1, 0]:\n", tensor_4)
print("Shape:", tensor_4.shape)
print("Type:", tensor_4.dtype)


4. ByteTensor [0, 1, 1, 1, 0]:
 tensor([0, 1, 1, 1, 0], dtype=torch.uint8)
Shape: torch.Size([5])
Type: torch.uint8


In [24]:
# 5. Perform a matrix multiplication of two tensors of size (2, 4) and (4, 2). 
#    Then do it in-place.
tensor_5a = torch.randn(2, 4)
tensor_5b = torch.randn(4, 2)
result_5 = torch.matmul(tensor_5a, tensor_5b)
print("\n5. Matrix multiplication result (2, 2):\n", result_5)
print("Shape:", result_5.shape)

output_tensor = torch.empty(2,2)
torch.matmul(tensor_5a, tensor_5b, out=output_tensor)
print("5. Matrix multiplication result (using out=, effectively in-place for output_tensor):\n", output_tensor)


5. Matrix multiplication result (2, 2):
 tensor([[ 0.7433,  0.2292],
        [-3.1451,  5.0505]])
Shape: torch.Size([2, 2])
5. Matrix multiplication result (using out=, effectively in-place for output_tensor):
 tensor([[ 0.7433,  0.2292],
        [-3.1451,  5.0505]])


In [25]:
# 6. Do element-wise multiplication of two randomly filled (n1,n2,n3) tensors.
#    Then store the result in an Numpy array.
n1, n2, n3 = 2, 3, 4
tensor_6a = torch.randn(n1, n2, n3)
tensor_6b = torch.randn(n1, n2, n3)
result_6_torch = tensor_6a * tensor_6b 
print(f"\n6. Element-wise multiplication of two ({n1},{n2},{n3}) tensors (first few elements):\n", result_6_torch[0,0,:])

result_6_numpy = result_6_torch.numpy()
print("Result stored in NumPy array (first few elements):\n", result_6_numpy[0,0,:])
print("Type of result_6_numpy:", type(result_6_numpy))


6. Element-wise multiplication of two (2,3,4) tensors (first few elements):
 tensor([ 0.2107, -1.2684,  0.1010,  0.0406])
Result stored in NumPy array (first few elements):
 [ 0.21072632 -1.268359    0.10100806  0.0405582 ]
Type of result_6_numpy: <class 'numpy.ndarray'>


#### Forward-prop/backward-prop

In [26]:
# 1. Create a Tensor that `requires_grad` of size (5, 5).
tensor_fp_1 = torch.randn(5, 5, requires_grad=True)
print("1. Tensor with requires_grad=True:\n", tensor_fp_1)

1. Tensor with requires_grad=True:
 tensor([[-1.2291,  1.7068,  1.3085,  0.6111, -0.0413],
        [ 0.7736,  0.1475,  0.4648,  2.0190,  1.4789],
        [-0.4838, -0.7430,  0.6678,  1.4539, -1.2124],
        [-1.7750, -1.1107, -1.0405, -0.8410,  2.0359],
        [ 0.5419, -0.1954, -0.2859, -1.3401,  0.1933]], requires_grad=True)


In [27]:
# 2. Sum the values in the Tensor.
sum_fp_2 = torch.sum(tensor_fp_1)
print("\n2. Sum of values:\n", sum_fp_2)


2. Sum of values:
 tensor(3.1049, grad_fn=<SumBackward0>)


In [28]:
# 3. Multiply the tensor by 2 and assign the result to a new python variable (i.e. `x = result`)
x_fp_3 = tensor_fp_1 * 2
x_fp_3.retain_grad() 
print("\n3. Tensor multiplied by 2 (x):\n", x_fp_3)


3. Tensor multiplied by 2 (x):
 tensor([[-2.4582,  3.4136,  2.6169,  1.2222, -0.0826],
        [ 1.5472,  0.2951,  0.9295,  4.0380,  2.9577],
        [-0.9677, -1.4860,  1.3356,  2.9078, -2.4247],
        [-3.5500, -2.2213, -2.0810, -1.6820,  4.0719],
        [ 1.0838, -0.3908, -0.5718, -2.6802,  0.3867]], grad_fn=<MulBackward0>)


In [29]:
# 4. Sum the variable's elements and assign to a new python variable
sum_x_fp_4 = x_fp_3.sum()
sum_x_fp_4.retain_grad()
print("\n4. Sum of x's elements:\n", sum_x_fp_4)


4. Sum of x's elements:
 tensor(6.2098, grad_fn=<SumBackward0>)


In [30]:
# 5. Print the gradients of all the variables
print("\n5. Gradients before backward pass:")
print("   Gradient of tensor_fp_1:", tensor_fp_1.grad)
print("   Gradient of sum_fp_2 (leaf w.r.t this operation, but not in the new graph):", sum_fp_2.grad) 
print("   Gradient of x_fp_3:", x_fp_3.grad)
print("   Gradient of sum_x_fp_4:", sum_x_fp_4.grad)


5. Gradients before backward pass:
   Gradient of tensor_fp_1: None
   Gradient of sum_fp_2 (leaf w.r.t this operation, but not in the new graph): None
   Gradient of x_fp_3: None
   Gradient of sum_x_fp_4: None


  print("   Gradient of sum_fp_2 (leaf w.r.t this operation, but not in the new graph):", sum_fp_2.grad)


In [31]:
# 6. Now perform a backward pass on the last variable
sum_x_fp_4.backward()


In [None]:
# 7. Print all gradients again
print("\n7. Gradients after backward pass:")
print("   Gradient of tensor_fp_1:\n", tensor_fp_1.grad)
print("   Gradient of sum_fp_2 (still None as it's not in the current graph path):", sum_fp_2.grad)
print("   Gradient of x_fp_3:\n", x_fp_3.grad) 
print("   Gradient of sum_x_fp_4:\n", sum_x_fp_4.grad) 


7. Gradients after backward pass:
   Gradient of tensor_fp_1:
 tensor([[2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.]])
   Gradient of sum_fp_2 (still None as it's not in the current graph path): None
   Gradient of x_fp_3:
 tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])
   Gradient of sum_x_fp_4:
 tensor(1.)


  print("   Gradient of sum_fp_2 (still None as it's not in the current graph path):", sum_fp_2.grad)


#### Deep-forward NNs

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F 

class LLayerNetwork(nn.Module):
    def __init__(self, layer_dims):
        super(LLayerNetwork, self).__init__()
        self.layers = nn.ModuleList()
        self.layer_dims = layer_dims
        num_layers = len(layer_dims)

        for i in range(num_layers - 2): 
            self.layers.append(nn.Linear(layer_dims[i], layer_dims[i+1]))
            self.layers.append(nn.ReLU())
        
        self.layers.append(nn.Linear(layer_dims[num_layers - 2], layer_dims[num_layers - 1]))
        self.layers.append(nn.Sigmoid())

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset 

class LLayerNetwork(nn.Module):
    def __init__(self, layer_dims):
        super(LLayerNetwork, self).__init__()
        self.layers = nn.ModuleList()
        num_layers = len(layer_dims)
        for i in range(num_layers - 2):
            self.layers.append(nn.Linear(layer_dims[i], layer_dims[i+1]))
            self.layers.append(nn.ReLU())
        self.layers.append(nn.Linear(layer_dims[num_layers - 2], layer_dims[num_layers - 1]))
        self.layers.append(nn.Sigmoid())

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x


input_size = 20
hidden1_size = 15
hidden2_size = 10
output_size = 1 
layer_dimensions = [input_size, hidden1_size, hidden2_size, output_size]

X_train_np = np.random.rand(100, input_size).astype(np.float32)
y_train_np = np.random.randint(0, 2, (100, 1)).astype(np.float32)
X_test_np = np.random.rand(50, input_size).astype(np.float32)
y_test_np = np.random.randint(0, 2, (50, 1)).astype(np.float32)

X_train = torch.from_numpy(X_train_np)
y_train = torch.from_numpy(y_train_np)
X_test = torch.from_numpy(X_test_np)
y_test = torch.from_numpy(y_test_np)

train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=10)

def train_model(model, train_loader, optimizer, criterion, epochs=10):
    model.train()
    for epoch in range(epochs):
        for batch_idx, (data, target) in enumerate(train_loader):
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

def test_model(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = (output > 0.5).float() 
            correct += pred.eq(target.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return accuracy

criterion = nn.BCELoss() 

optimizers_to_compare = {
    "SGD": lambda params: optim.SGD(params, lr=0.01),
    "Adam": lambda params: optim.Adam(params, lr=0.001),
    "SGD_Momentum": lambda params: optim.SGD(params, lr=0.01, momentum=0.9)
}

results = {}
num_epochs = 20 

print("Comparing Optimizers:")
for opt_name, opt_func in optimizers_to_compare.items():
    print(f"\nTraining with {opt_name}...")
    model_pytorch = LLayerNetwork(layer_dimensions)
    optimizer = opt_func(model_pytorch.parameters())
    
    train_model(model_pytorch, train_loader, optimizer, criterion, epochs=num_epochs)
    
    accuracy = test_model(model_pytorch, test_loader, criterion)
    results[opt_name] = accuracy
    print(f"Test Accuracy with {opt_name}: {accuracy:.2f}%")

print("\n--- Optimizer Comparison Results ---")
for opt_name, acc in results.items():
    print(f"{opt_name}: {acc:.2f}% Test Accuracy")

Comparing Optimizers:

Training with SGD...
Test Accuracy with SGD: 48.00%

Training with Adam...
Test Accuracy with Adam: 46.00%

Training with SGD_Momentum...
Test Accuracy with SGD_Momentum: 58.00%

--- Optimizer Comparison Results ---
SGD: 48.00% Test Accuracy
Adam: 46.00% Test Accuracy
SGD_Momentum: 58.00% Test Accuracy
