Exercice 2

- Compute gradient manually

See Assignment 2 directory for the code.

Exercice 3

- Two-layer neural network
- Gradient implementation
- Iterative gradient descent

See Assignment 3 directory for the code.

Exercice 4

- Stochastic gradient descent

See Assignment 4 directory for the code.

In [None]:
def min_max_scaler(train_data, test_data):

    # Compute the correct statistics
    min_val = torch.min(train_data)
    max_val = torch.max(train_data)

    # Scale the training data
    train_data_scaled = (train_data - min_val) / (max_val - min_val)

    # Scale the test data using the same min and max values
    test_data_scaled = (test_data - min_val) / (max_val - min_val)

    return train_data_scaled, test_data_scaled, min_val, max_val


def inverse_min_max_scaler(scaled_data, min_val, max_val):

    # Revert the scaling
    original_data = scaled_data * (max_val - min_val) + min_val

    return original_data

def standardize(X_train, X_val):
    # compute statistics
    mean = torch.mean(X_train, dim=0)
    std = torch.std(X_train, dim=0)

    # standardize both X_train and X_val
    X_train = (X_train - mean) / std
    X_val = (X_val - mean) / std

    return X_train, X_val

label_emb = label_emb / np.linalg.norm(label_emb, axis=1, keepdims=True)



Exercice 5

- Binary cross-entropy loss
- Binary classification network
- Categorical cross-entropy loss
- Categorical classification network

In [None]:
import torch
 
loss = torch.nn.BCELoss()
loss = torch.nn.CrossEntropyLoss()

def Network(D, K, O):
    return torch.nn.Sequential(
        torch.nn.Linear(D, K),
        torch.nn.Tanh(),
        torch.nn.Linear(K, O)
    )

optimizer = torch.optim.SGD(Network().parameters(), lr=0.01)

Exercice 6

- Sequential networks
- Convolutional neural networks
- Pooling layers and stride
- Flattening layers
- Device

In [None]:
import torchvision

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

dataset = torchvision.datasets.MNIST(
    root='data/',
    download=True,
    transform=torchvision.transforms.ToTensor()
)

dataloder = torch.utils.data.DataLoader(
    dataset,
    batch_size=32,
    shuffle=True
)

def fully_connected(D, K1, K2, O):
    return torch.nn.Sequential(
        torch.nn.Flatten(),
        torch.nn.Linear(D, K1),
        torch.nn.Sigmoid(),
        torch.nn.Linear(K1, K2),
        torch.nn.Sigmoid(),
        torch.nn.Linear(K2, O),
    )

def convolutional(Q1, Q2, O):
    return torch.nn.Sequential(
        torch.nn.Conv2d(1, Q1, kernel_size=7, stride=1, padding=0),
        torch.nn.MaxPool2d(kernel_size=2, stride=2),
        torch.nn.Sigmoid(),
        torch.nn.Conv2d(Q1, Q2, kernel_size=5, stride=1, padding=2),
        torch.nn.MaxPool2d(kernel_size=2, stride=2),
        torch.nn.Sigmoid(),
        torch.nn.Flatten(),
        torch.nn.Linear(Q2*5*5, O),
    )


Exercice 7

- Pre-trained networks
- Extract deep features

In [None]:
imagenet_transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),                                    # Resize the image such that the shorter side has size 256
    torchvision.transforms.CenterCrop(224),                                # Take the center crop of size 224x224
    torchvision.transforms.ToTensor(),                                     # Convert the image into a tensor
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],           # Normalize pixel values with mean
                                     std=[0.229, 0.224, 0.225])            # Normalize pixel values with standard deviation
])

from torchvision.datasets import ImageFolder

dataset = ImageFolder('data/', transform=imagenet_transform)

network = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.DEFAULT)
# Make sure to freeze all the layers of the network.
for param in network.parameters():
    param.requires_grad = False  # Freeze all the layers of the network

features = network.fc.in_features



Exercice 8

- Network Module definition
- Autograd Function

In [None]:
class AdaptedSoftMax(torch.autograd.Function):

    # implement the forward propagation
    @staticmethod
    def forward(ctx, logits, targets):
        # compute the log probabilities via log_softmax
        log_probs = torch.nn.functional.log_softmax(logits, dim=1)
        # save required values for backward pass
        ctx.save_for_backward(logits, targets)
        # compute loss
        loss = -torch.sum(log_probs * targets, dim=1).mean()
        return loss

    # implement Jacobian
    @staticmethod
    def backward(ctx, result):
        # get results stored from forward pass
        logits, targets = ctx.saved_tensors
        # compute derivative of loss w.r.t. the logits
        y = torch.nn.functional.softmax(logits, dim=1)
        dJ_dz = result * (y - targets)
        # return the derivatives; none for derivative for the targets
        return dJ_dz, None

# DO NOT REMOVE!
# here we set the adapted softmax function to be used later
adapted_softmax = AdaptedSoftMax.apply


class Network (torch.nn.Module):
    def __init__(self, Q1, Q2, K, O):
        # call base class constrcutor
        super(Network,self).__init__()
        # define convolutional layers
        self.conv1 = torch.nn.Conv2d(1, Q1, 7, stride=1, padding=0)
        self.conv2 = torch.nn.Conv2d(Q1, Q2, 5, stride=1, padding=2)
        # pooling and activation functions will be re-used for the different stages
        self.pool = torch.nn.MaxPool2d(2, stride=2)
        self.act = torch.nn.PReLU()
        # define fully-connected layers
        self.flatten = torch.nn.Flatten()
        self.fc1 = torch.nn.Linear(Q2*5*5, K)
        self.fc2 = torch.nn.Linear(K, O)

    def forward(self,x):
        # compute first layer of convolution, pooling and activation
        a = self.act(self.pool(self.conv1(x)))
        # compute second layer of convolution, pooling and activation
        a = self.act(self.pool(self.conv2(a)))
        # get the deep features as the output of the first fully-connected layer
        deep_features = self.fc1(self.flatten(a))
        # get the logits as the output of the second fully-connected layer
        logits = self.fc2(deep_features)
        # return both the logits and the deep features
        return logits, deep_features

Exercise 9

- Combining submodules
- MSE loss



In [None]:
class MixedDataset(torch.utils.data.Dataset):
    def __init__(self, root='./data', purpose="train", transform=None, anomaly_size=2000):
        # load MNIST dataset based on "purpose"
        self.mnist_dataset = torchvision.datasets.MNIST(root=root, train=(purpose=="train"), download=True, transform=transform)

        # load FashionMNIST dataset when "purpose" is "anomaly_detection" and randomly select samples with size "anomaly_size"
        fashion_mnist_dataset = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
        idx = np.random.choice(len(fashion_mnist_dataset), anomaly_size, replace=False)
        self.fashion_mnist_dataset = torch.utils.data.Subset(fashion_mnist_dataset, idx)

        self.dataset = self.mnist_dataset if purpose != "anomaly_detection" else torch.utils.data.ConcatDataset([self.mnist_dataset, self.fashion_mnist_dataset])

    def __len__(self):
        # return length of the desired dataset based on its purpose
        return len(self.dataset)

    def __getitem__(self, idx):
        # perform appropriate actions on the data, target, and its data type indicator (return 1 for regular and -1 for anomalous)
        image, target = self.dataset[idx]
        data_type = 1 if idx < len(self.mnist_dataset) else -1

        return image, target, data_type
    
loss = torch.nn.MSELoss()

Exercise 10

- Custom dataset
- Sequence item prediction


In [None]:
def create_sequences_targets(data: torch.Tensor, S):

    # Initialize empty lists to hold the input sequences and the corresponding target values
    X, T = [], []

    # Go through the data to extract sequences based on S
    for i in range(len(data)-S):
        X.append(data[i:i+S])
        T.append(data[i+S])

    # Convert lists of sequences and targets into PyTorch tensors
    return torch.stack(X), torch.stack(T)

class Dataset(torch.utils.data.Dataset):
    def __init__(self, data, S):

        # store the data and targets as required
        self.X, self.T = create_sequences_targets(data, S)

    def __getitem__(self, index):
        # return input and target value for the given index
        return self.X[index], self.T[index]

    def __len__(self):
        # return the length of this dataset
        return len(self.X)

Exercise 11

-Backpropagation to input

In [None]:
# tell autograd that we need the gradient for the input
x.requires_grad = True
# forward input
z = network(x)
# compute loss and gradient
J = loss(z, t)
J.backward()

# get the gradient
gradient = x.grad

def FGS(x, t, network, loss, alpha=0.3):
    # tell autograd that we need the gradient for the input
    x.requires_grad = True
    # forward input
    z = network(x)
    # compute loss and gradient
    J = loss(z, t)
    J.backward()

    # get the gradient
    gradient = x.grad
    # create FGS adversarial sample
    adversarial_sample = torch.clamp(x + alpha * torch.sign(gradient), 0, 1)

    return adversarial_sample

def FGV(x, t, network, loss, alpha=0.6):
    # tell autograd that we need the gradient for the input
    x.requires_grad = True
    # forward input
    z = network(x)
    # compute loss and gradient
    J = loss(z, t)
    J.backward()

    # get the gradient
    gradient = x.grad
    # create FGV adversarial sample
    # max_abs_gradient = torch.max(torch.abs(gradient.view(gradient.shape[0], -1)), dim=1, keepdim=True)[0].view(-1, 1, 1, 1)
    max_abs_gradient = torch.max(torch.abs(gradient))
    adversarial_sample = torch.clamp(x + alpha * gradient / max_abs_gradient, 0, 1)

    return adversarial_sample

for epoch in range(epochs): 
# DO NOT FORGET: 
    optimizer.zero_grad() 
    Z = network(X_train) 
    J = loss(Z, T_train) 
    J.backward() 
    perform parameter update
    optimizer.step()  
    compute validation accuracy 
    with torch.no_grad():
    Z = network(X_val)  
    acc = accuracy(Z, T_val)

network.train()
for x,t in train_loader:
    x = x.to(device)
    t = t.to(device)
    # compute output for current batch
    z = network(x)
    # compute loss
    J = loss(z,t)
    # compute gradient
    optimizer.zero_grad()
    J.backward()
    optimizer.step()

network.eval()
# iterate over validation set samples
loss = 0
for x,t in validation_loader:
    x = x.to(device)
    t = t.to(device)
    with torch.no_grad():
        # classify original samples
        z = network(x)
        loss += loss(z,t).item() * x.size(0)
        
        