<a href="https://colab.research.google.com/github/ZahraDehghani99/Pytorch-utils/blob/main/Pytorch_utils_new_version.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Train, valid and test function in Pytorch


In [None]:
from tqdm.auto import tqdm

def train(model, train_dataloader):
  progress_bar = tqdm(range(num_training_steps))

  model.train()
  total_loss = 0
  for batch in train_dataloader:
      batch = {k: v.to(device) for k, v in batch.items()}
      outputs = model(**batch)
      loss = outputs.loss
      loss.backward()

      optimizer.step()
      optimizer.zero_grad()
      progress_bar.update(1)
      total_loss += loss.item()
  return total_loss


def evaluate(model, valid_dataloader):
  model.eval()
  total_val_loss = 0
  total_correct = 0
  total_samples = 0

  with torch.no_grad():
    for batch in valid_dataloader:
      for i in batch:
        batch[i] = batch[i].to(device)
      outputs = model(**batch)
      loss = outputs.loss

      total_val_loss += loss.item()

      # Calculate accuracy
      _, predicted_labels = torch.max(outputs.logits, dim=1)
      predicted_labels = predicted_labels.to(device)
      correct = (predicted_labels == batch["labels"]).sum().item()
      total_correct += correct
      total_samples += batch["labels"].size(0)

  return total_val_loss, total_correct, total_samples

def test_model(model, test_dataloader):
  y_pred_test = []
  y_actual_test = []
  total_correct, total_samples = 0, 0

  model.eval()
  with torch.no_grad():
    n_true = 0
    n_total = 0

    for batch in test_dataloader:
      for i in batch:
        batch[i] = batch[i].to(device)
      outputs = model(**batch)
      _, y_predtest = torch.max(outputs.logits,1)

      y_predtest = y_predtest.to(device)
      correct = (y_predtest == batch["labels"]).sum().item()
      total_correct += correct
      total_samples += batch["labels"].size(0)

      for i in range(len(y_predtest)):
        y_pred_test.append(y_predtest[i])
        y_actual_test.append(batch["labels"][i])

    print(f'Test accuracy : {(total_correct/total_samples) *100 :.2f}%')
    return y_pred_test, y_actual_test

## Training loop

In [None]:
for epoch in range(num_epochs):

  total_loss_train = train(model, train_dataloader)
  avg_loss_train = total_loss_train/ len(train_dataloader)
  total_val_loss, total_correct, total_samples = evaluate(model, valid_dataloader)
  avg_loss_valid = total_val_loss /len(valid_dataloader)
  accuracy = total_correct / total_samples

  print(f'Epoch {epoch+1} : Train loss : {avg_loss_train} | Valid loss : {avg_loss_valid} | Valid accuracy : {accuracy}')
  # Log the loss and accuracy values at the end of each epoch
  wandb.log({
      "Epoch": epoch,
      "Train Loss": avg_loss_train,
      "Valid Loss": avg_loss_valid,
      "Valid Acc": accuracy})

# Train, valid and test functions in Pytorch with Tensorboard

In [None]:
# Define a funciton to calculate accuracy for multi class classification model
def binary_accuracy(y_pred, y_act):

  y_pred_softmax = torch.softmax(y_pred, 1)
  _, y_pred_tag = torch.max(y_pred_softmax,1)

  correct = (y_pred_tag == y_act).float()  # Show which elements are the same with 1
  acc = correct.sum()/len(correct)
  acc = torch.round(acc * 100)

  return acc

In [None]:
# define a function for training a model
def train(model1, train_loader):

  train_epoch_loss = 0
  train_epoch_acc = 0

  # train the model
  model1.train()

  loop = tqdm(enumerate(train_loader), total = n_total_step, leave = False)

  for i, batch in loop:

    optimizer.zero_grad()

    # retrieve text and number of words
    text, text_lengths = batch.preprocessing_text

    # convert to 1D tensor
    y_pred = model1(text).squeeze()

    loss_train = criterion(y_pred, batch.label)

    acc_train = binary_accuracy(y_pred, batch.label)

    loss_train.backward()

    optimizer.step()

    train_epoch_loss += loss_train.item()
    train_epoch_acc += acc_train.item()

    loop.set_description(f'Epoch : [{epoch}/{num_epoch}]')
    loop.set_postfix(loss = loss_train.item(), accuracy = acc_train.item())

  train_loss.append(train_epoch_loss/len(train_loader))
  train_acc.append(train_epoch_acc/len(train_loader))

  # if you want to monitor your model in tensorboard
  writer.add_scalar('training loss',train_epoch_loss/len(train_loader) , epoch)
  writer.add_scalar('training accuracy',train_epoch_acc/len(train_loader) , epoch)

  return train_loss, train_acc


# define a function for evaluating model
def evaluate(model1, valid_loader):

  # validation
  with torch.no_grad():
    model1.eval()

    val_epoch_loss = 0
    val_epoch_acc = 0

    for batch in valid_loader:

      text, text_length = batch.preprocessing_text

      y_pred_val = model1(text).squeeze()
      loss_valid = criterion(y_pred_val, batch.label)
      acc_valid = binary_accuracy(y_pred_val, batch.label)

      val_epoch_loss += loss_valid.item()
      val_epoch_acc += acc_valid.item()

    valid_loss.append(val_epoch_loss/len(valid_loader))
    valid_acc.append(val_epoch_acc/len(valid_loader))

  return valid_loss, valid_acc


In [None]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('model_Ham')

In [None]:
# %reload_ext tensorboard
# %tensorboard --logdir 'model_Ham'

In [None]:
# define a function for training a model
def train_fun(model1, train_loader):

  train_epoch_loss = 0
  train_epoch_acc = 0

  # train the model
  model1.train()

  loop = tqdm(enumerate(train_loader), total = n_total_step, leave = False)

  for i, batch in loop:

    optimizer.zero_grad()

    # retrieve text and number of words
    text, text_lengths = batch.preprocessing_text

    text = text.to(device)
    text_lengths = text_lengths.to(device)

    # convert to 1D tensor
    y_pred = model1(text, text_lengths).squeeze()

    # because our labels in form of float tensor, we should convert them to long tensor(int)
    label = batch.label.to(torch.long)

    loss_train = criterion(y_pred, label)

    acc_train = multi_class_acc(y_pred, label)

    loss_train.backward()

    optimizer.step()

    train_epoch_loss += loss_train.item()
    train_epoch_acc += acc_train.item()

    loop.set_description(f'Epoch : [{epoch}/{num_epoch}]')
    loop.set_postfix(loss = loss_train.item(), accuracy = acc_train.item())

  train_loss.append(train_epoch_loss/len(train_loader))
  train_acc.append(train_epoch_acc/len(train_loader))

  # if you want to monitor your model in tensorboard
  writer.add_scalar('training loss',train_epoch_loss/len(train_loader) , epoch)
  writer.add_scalar('training accuracy',train_epoch_acc/len(train_loader) , epoch)

  return train_loss, train_acc


# define a function for evaluating model
def evaluate(model1, valid_loader):

  # validation
  with torch.no_grad():
    model1.eval()

    val_epoch_loss = 0
    val_epoch_acc = 0

    for batch in valid_loader:

      text, text_length = batch.preprocessing_text

      text = text.to(device)
      text_length = text_length.to(device)

      y_pred_val = model1(text, text_length).squeeze()
      label = batch.label.to(torch.long)
      loss_valid = criterion(y_pred_val, label)
      acc_valid =  multi_class_acc(y_pred_val, label)

      val_epoch_loss += loss_valid.item()
      val_epoch_acc += acc_valid.item()

    valid_loss.append(val_epoch_loss/len(valid_loader))
    valid_acc.append(val_epoch_acc/len(valid_loader))

  return valid_loss, valid_acc

In [None]:
!nvidia-smi

## Training loop

In [None]:
from tqdm import tqdm

train_loss, valid_loss = [], []
train_acc, valid_acc = [], []

for epoch in range(num_epoch):

  train_loss, train_acc = train(model1, train_loader)
  writer.flush()
  valid_loss, valid_acc = evaluate(model1, valid_loader)

  scheduler.step()

In [None]:
torch.save(model1, '/content/drive/MyDrive/AI-Internship/model1LSTM_hamshahri') # model1 with 10 epochs

In [None]:
#Training and validation loss
plt.plot(train_loss, label='Training loss')
plt.plot(valid_loss, label='Validation loss')
plt.legend(frameon=False)
plt.xlabel('epoch')
plt.ylabel('loss value')
plt.title("Loss function for each epoch")
plt.show()

In [None]:
#Training and validation accuracy
plt.plot(train_acc, label='Training accuracy')
plt.plot(valid_acc, label='Validation accuracy')
plt.legend(frameon=False)
plt.xlabel('epoch')
plt.ylabel('accuracy value')
plt.title("Accuracy for each epoch")
plt.show()

In [None]:
print(f'training loss on last epoch : {train_loss[-1]}')
print(f'validation loss on last epoch : {valid_loss[-1]}')
print('-'*20)
print(f'training accuracy on last epoch : {train_acc[-1]}')
print(f'validation accuracy on last epoch : {valid_acc[-1]}')

In [None]:
def test(model1, test_loader):
  y_pred_test = []
  y_actual_test = []

  model1.eval()
  with torch.no_grad():
    n_true = 0
    n_total = 0
    n_class_correct = [0 for i in range(num_class)]
    n_class_sample = [0 for i in range(num_class)]

    for batch in test_loader:

      text, text_length = batch.preprocessing_text

      output = model1(text, text_length).squeeze()

      _, y_predtest = torch.max(output,1)

      n_true += (y_predtest == batch.label).sum()
      n_total += batch.label.size(0)

      for i in range(len(y_predtest)):
        y_pred_test.append(y_predtest[i])
        y_actual_test.append(batch.label[i].item())

      for i in range(len(y_predtest)):
        lb = int(batch.label[i].item())
        pred = y_predtest[i]
        if pred == lb:
          n_class_correct[lb] += 1
        n_class_sample[lb] += 1

    print(f'accuracy of network on the {len(test_loader)} texts is : {(n_true/n_total) *100}')

    for i in range(len(classes)):
      acc = 100.0 * n_class_correct[i] / n_class_sample[i]
      print(f'Accuracy of {classes[i]}: {acc} %')

  return y_pred_test, y_actual_test



In [None]:
y_pred_test, y_actual_test = test(model1, test_loader)