<a href="https://colab.research.google.com/github/avikumart/LLM-GenAI-Transformers-Notebooks/blob/main/LLMs_from_Scratch/pytorch_for_llms.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch

print(torch.__version__)

2.8.0+cu126


In [2]:
print(torch.cuda.is_available())

True


In [3]:
ten1 = torch.tensor([1,23,5])
ten2 = torch.tensor([1,3,5])

In [4]:
print(ten1 + ten2)

tensor([ 2, 26, 10])


In [5]:
ten1 = ten1.to("cuda")
ten2 = ten2.to("cuda")

In [6]:
print(ten1 + ten2)

tensor([ 2, 26, 10], device='cuda:0')


In [7]:
X_train = torch.tensor([
    [12,3.4,1],
    [2.4,-23,4],
    [3,5,8],
    [3.4,5.6,5.7],
    [2.7,4.5,3.4]
])

y_train = torch.tensor([0, 0, 0, 1, 1])

X_test = torch.tensor([
    [-0.8, 2.8, 3.4],
    [2.6, -1.6, 2.7],
])

y_test = torch.tensor([0, 1])

## Pytorch ML modeling on dummy dataset

In [8]:
from torch.utils.data import Dataset

# create toydataset from above data
class ToyDataset(Dataset):
  def __init__(self, X, y):
    self.features = X
    self.labels = y

  def __len__(self):
    return self.labels.shape[0]

  def __getitem__(self, idx):
    return self.features[idx], self.labels[idx]

train_ds = ToyDataset(X_train, y_train)
test_ds = ToyDataset(X_test, y_test)

In [9]:
from torch.utils.data import DataLoader

torch.manual_seed(123)

train_loader = DataLoader(train_ds, batch_size=2, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=2, shuffle=False)

In [17]:
# Develop a sample neural network for the toy dataset
import torch.nn as nn

class NeuralNetwork(nn.Module):
  def __init__(self, num_inputs, num_outputs):
    super().__init__()

    self.layers = torch.nn.Sequential(
        torch.nn.Linear(num_inputs, 15),
        torch.nn.ReLU(),
        torch.nn.Linear(15, 8),
        torch.nn.ReLU(),
        torch.nn.Linear(8, num_outputs)
    )

  def forward(self, x):
    return self.layers(x)

In [19]:
# train the neural net from on dummy data by setting the training epochs
import torch.nn.functional as F

torch.manual_seed(123)
model = NeuralNetwork(num_inputs=3, num_outputs=2)

# set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# optimizer and run thorugh epochs
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)


num_epochs = 5
for epoch in range(num_epochs):
  model.train()

  for batch_idx, (features, labels) in enumerate(train_loader):
    features, labels = features.to(device), labels.to(device)
    logits = model(features)
    loss = F.cross_entropy(logits, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print('Epoch: %d | Batch: %d | Loss: %.4f' % (epoch, batch_idx, loss.item()))

  model.eval()

Epoch: 0 | Batch: 0 | Loss: 3.0720
Epoch: 0 | Batch: 1 | Loss: 1.2976
Epoch: 0 | Batch: 2 | Loss: 1.5625
Epoch: 1 | Batch: 0 | Loss: 1.0317
Epoch: 1 | Batch: 1 | Loss: 0.6467
Epoch: 1 | Batch: 2 | Loss: 2.3318
Epoch: 2 | Batch: 0 | Loss: 0.8345
Epoch: 2 | Batch: 1 | Loss: 0.3237
Epoch: 2 | Batch: 2 | Loss: 0.6689
Epoch: 3 | Batch: 0 | Loss: 0.7645
Epoch: 3 | Batch: 1 | Loss: 0.5330
Epoch: 3 | Batch: 2 | Loss: 0.1146
Epoch: 4 | Batch: 0 | Loss: 0.5035
Epoch: 4 | Batch: 1 | Loss: 0.5272
Epoch: 4 | Batch: 2 | Loss: 0.6266


In [20]:
# calculate the accuracy of the function
def compute_accuracy(model, data_loader, device):
  correct_pred, num_examples = 0, 0
  model = model.eval()

  for idx, (features, targets) in enumerate(data_loader):
    features, targets = features.to(device), targets.to(device)
    logits = model(features)
    _, predicted_labels = torch.max(logits, 1)
    num_examples += targets.size(0)
    correct_pred += (predicted_labels == targets).sum()

  return correct_pred.float() / num_examples * 100

In [21]:
compute_accuracy(model, train_loader, device)

tensor(80., device='cuda:0')

In [22]:
compute_accuracy(model, test_loader, device)

tensor(50., device='cuda:0')