# Regression with PyTorch on Alvis
This will introduce the very basics of using PyTorch on Alvis.

As usual with Python the first step will be to load the necessary packages.

In [None]:
import torch
from torch import nn

# For performance set precision,
# see https://www.c3se.chalmers.se/documentation/applications/pytorch/#performance-and-precision
torch.set_float32_matmul_precision("high")

## Constructing the data
In this step we generate a very simple dataset.

In [None]:
def f_true(x, slope=0.5, bias=0.3):
    '''The true underlying relation.'''
    return slope * x + bias

def get_data(n_points, noise_level=0.1, true_function=f_true, **tf_kwargs):
    '''Generates noisy data from true_function.
    Arguments:
        n_points (int): Number of datapoints to generate
        noise_level (float): Std of gaussian noise to be added
        true_function (callable): The noiseless underlying function
        **function_kwargs: Optional key-word arguments passed to true_function
    '''
    x = 2 * torch.rand(n_points, 1) - 1
    y = true_function(x, **tf_kwargs) + noise_level * torch.randn(n_points, 1)
    return x, y

In [None]:
x, y = get_data(300)

### Take a look at the data
As this is a notebook we can use the fact that we can easily take a look at graphical objects.

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.figure()
plt.plot(x, y, '.', label="Data")
x_plot = torch.linspace(-1, 1, 20)
plt.plot(x_plot, f_true(x_plot), label="Noiseless relation")
plt.xlabel("X")
plt.ylabel("Y")

plt.legend();

## Constructing the model

In [None]:
class LinearModel(nn.Module):
    '''A PyTorch linear regression model.'''
    def __init__(self, in_features, out_features):
        super().__init__()
        # In this function initialize objects that we want to use later
        self.linear = nn.Linear(in_features, out_features)
        
    def forward(self, x):
        # Here we define the forward pass
        # PyTorch will keep track of the computational graph in the background,
        # which means we don't have to worry about implementing the backwards pass
        return self.linear(x)

# Instantiate the model
model = LinearModel(in_features=1, out_features=1)


Note that in this simple case, we could have simply done
```python
model = nn.Linear(in_features=1, out_features=1)
```
directly, but we will build on this simple model later.


## Training the model
Here we will use gradient descent to train our regression model on the data we have generated.

In [None]:
def train(model, loss_function, optimizer, n_epochs=20):
    '''Training the model.'''
    # Notify model to use training settings, used in possible dropout layers etc.
    model.train()
    for epoch in range(n_epochs):
        print(f"Epoch {epoch + 1:2d}/{n_epochs}", end="")
        
        # Reset optimizer
        optimizer.zero_grad()

        # Forward pass
        y_pred = model(x)
        loss = loss_function(y_pred, y)
        
        print(f"\tLoss {loss:.4g}")
        
        # Backward pass
        loss.backward()
        optimizer.step()

# Specify loss function and link optimizer with model parameters
loss_function = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.3)

# Start the training
train(model, loss_function, optimizer)

## Evaluating the model
Here we generate new data that we can use to evaluate the model performance.

In [None]:
def eval(x_val, y_val, model, metric):
    '''Evaluating the model'''
    model.eval()
    # We don't need to calculate any gradients
    with torch.no_grad():
        return metric(model(x_val), y_val)


loss = eval(*get_data(100), model, loss_function)
print(f"Test loss: {loss:.4g}")

As a side note, if you are doing computations with tensors that you are not planning
to perform backpropagation or differentiation over, then you can detach them from
the current graph with
```python
my_free_tensor = my_tensor.detach()
```
or simply specify that they do not require gradients directly
```python
# For specific tensor
my_tensor.requires_grad = False

# For an entire context
with torch.no_grad():
    validation_accuracy = (validation_labels == predicted_labels).float().mean()
```
This will reduce the load of these computations.

### Visualising model predictions

In [None]:
plt.figure()
plt.plot(x, y, '.', label="Data")
x_plot = torch.linspace(-1, 1, 20).unsqueeze(1)
plt.plot(x_plot, f_true(x_plot), label="Noiseless relation")
plt.xlabel("X")
plt.ylabel("Y")

# Add model prediction
model.eval()
with torch.no_grad():
    plt.plot(x_plot, model(x_plot), label="Predicted relation")

plt.legend();


## Your own model
In PyTorch the main way to construct a neural network model is by inheriting
from PyTorch
[Module](https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module).
In many cases it is enough to implement a forward method, this is what you will
do now.

### Excercises
1. (Optional) Modify `MyModel` to be a linear regression model with a fixed bias of 0.3, this can be done in several
different ways. Depending on your approach you might want to take a look at the
options for the
[Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html#torch.nn.Linear)
layer.

In [None]:
class MyModel(nn.Module):
    
    def __init__(self):
        super().__init__()
        # Your code here
    
    
    def forward(self):
        '''Forward method of the module.'''
        # Your code here

2. Train your model.

In [None]:
# Your code here
my_model = MyModel()


In [None]:
def check_model(model):
    '''Verify model performance
    
    This is a help function to see if your model does what it is supposed to do.
    '''
    x, y = get_data(1000)
    try:
        model(x)
    except Exception as e:
        print("Your model doesn't seem to handle input tensors of shape", x.size())
        raise e

    # Check bias
    bias_failure = False
    def check_bias(model):
        model_bias = model(torch.zeros(1, 1))
        if not torch.isclose(model_bias, torch.Tensor([[0.3]])):
            nonlocal bias_failure
            bias_failure = True
            print(f"Failure: The bias is {model_bias.item()}, not 0.3")
    check_bias(model)
    
    # Check that training changes the model performance
    from copy import deepcopy
    model_copy = deepcopy(model)
    optimizer = torch.optim.SGD(model_copy.parameters(), lr=10)
    out1 = model_copy(x)
    nn.MSELoss()(out1, 2 * y).backward()
    optimizer.step()
    out2 = model_copy(x)
    if torch.allclose(out1, out2):
        print("Failure: The training doesn't seem to affect model performance")
    
    # Check bias after training
    if not bias_failure:
        print("Checking model performance after training...")
        check_bias(model_copy)

In [None]:
check_model(my_model)

## Running on a single GPU
For this example you will need access to a GPU, on Alvis there are four T4 GPUs
available on the login node, to see their status you can use the command
`nvidia-smi`. If they seem to be available then you can go ahead and use one of
them for the following excercises, otherwise you will have to submit a job.

You can use the Alvis OnDemand portal or submit a job manually with sbatch.

If you are going to submit a job you can modify the `jobscript.sh` file, if you
have forgotten what to think about when constructing a job script you can take a
look at part 1 and/or the introduction slides.

Now for the actual coding. In PyTorch the way to move computations to the GPU is
to move the objects that are part of the computation to the GPU. First create a
variable for the device you want to use
```python
dev = torch.device("cuda:0") 
```
you can change the zero to any other GPU that is available. Note that even if
you only have access to a part of a node the GPUs you have access to will still
always start from 0.

The second step is to move the data and model to the GPU this can be done by
calling
```pytorch
x_gpu = x.to(dev)
y_gpu = y.to(dev)
model = model.to(dev)
```
note that you can't use tensors on the GPU to plot with, for these you will have to send them to CPU first.

### Excercises
1. Use `nvidia-smi` to find out about current GPU usage

In [None]:
%%bash
nvidia-smi

2. Decide if you will do the following excercises on the log-in node or if you
will submit a job
3. Modify `train_gpu()`

In [None]:
# You should modify this block so that it runs on a GPU
# and later also change the amount of data to train on

x, y = get_data(300)

def train_gpu(model, loss_function, optimizer, n_epochs=20):
    '''Training the model.'''
    # Notify model to use training settings, used in possible dropout layers etc.
    model.train()
    for epoch in range(n_epochs):
        print(f"Epoch {epoch + 1:2d}/{n_epochs}", end="")
        
        # Reset optimizer
        optimizer.zero_grad()

        # Forward pass
        y_pred = model(x)
        loss = loss_function(y_pred, y)
        
        print(f"\tLoss {loss:.4g}")
        
        # Backward pass
        loss.backward()
        optimizer.step()

# Specify loss function and link optimizer with model parameters
loss_function = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.3)

# Start the training
train(model, loss_function, optimizer)

4. When you think you've succeded submit it with the jobscript.sh
5. Redo the GPU training but now do it with 1 billion data points. Compare the Grafana plots (that is, the page generated by `job_stats.py`)
6. Use `sacct` in a terminal to find the job ID and then run `job_stats.py JOB_ID`
after substituting in the job ID. Look at the generated link. Are you using the GPU well?