Test GPU Capabilities

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Get CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# get pytorch version
version = torch.__version__
print("Pytorch version: {}".format(version))
# check fp16 availability
fp16_available = torch.backends.cudnn.enabled
print("fp16 available: {}".format(fp16_available))
# check if GPU is available
gpu_available = torch.cuda.is_available()
print("GPU available: {}".format(gpu_available))





Pytorch version: 2.0.1
fp16 available: True
GPU available: True


In [4]:
try:
    t = torch.tensor([1,2,3], dtype=torch.fp8)
    print(t.dtype)
    print(t)
except Exception as e:
    print(e)

t = torch.tensor([1,2,3], dtype=torch.int8)
print(t.dtype)
print(t)



module 'torch' has no attribute 'fp8'
torch.int8
tensor([1, 2, 3], dtype=torch.int8)


Float16 testing

In [5]:
# Create a model with 2 layers
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        #self.fc1 = nn.Linear(10, 10)
        self.fc2 = nn.Linear(10, 1,dtype=torch.float16)

    def forward(self, x):
        #x = self.fc1(x)
        x = self.fc2(x)
        return x


x = torch.randn(10,dtype=torch.float16)
# evaluate the model
model = Model()
model.eval()
model.to(device)
x = x.to(device)
y = model(x)
print(y)



# train with this model on GPU with 10 inputs and output should learn to be sum of inputs

epochs = 1000
lr = 0.01
batch_size = 10
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criteria = nn.MSELoss()
for i in range(epochs):
    optimizer.zero_grad()
    x = torch.randn(batch_size,10,dtype=torch.float16).to(device)
    y = model(x)
    y_true = x.sum(dim=1, keepdim=True)
    loss = criteria(y, y_true)    
    loss.backward()    
    optimizer.step()
    loss_val = loss.item()
    if i % 10 == 0: 
        print(f"Epoch: {i}, Loss: {loss_val:.5f}")
    #print("y: {}, x.sum(): {}".format(y.detach().cpu().item(), x.sum().detach().cpu().item()))
print(f"Epoch: {i}, Loss: {loss_val:.5f}")
print(model.fc2.weight)

tensor([-0.2947], device='cuda:0', dtype=torch.float16, grad_fn=<AddBackward0>)
Epoch: 0, Loss: 8.66406
Epoch: 10, Loss: 3.67188
Epoch: 20, Loss: 4.82422
Epoch: 30, Loss: 6.45312
Epoch: 40, Loss: 3.03711
Epoch: 50, Loss: 3.86719
Epoch: 60, Loss: 2.49609
Epoch: 70, Loss: 3.06055
Epoch: 80, Loss: 2.29492
Epoch: 90, Loss: 3.44336
Epoch: 100, Loss: 0.85889
Epoch: 110, Loss: 2.58984
Epoch: 120, Loss: 1.19043
Epoch: 130, Loss: 0.81494
Epoch: 140, Loss: 0.78564
Epoch: 150, Loss: 0.33667
Epoch: 160, Loss: 0.55273
Epoch: 170, Loss: 0.39258
Epoch: 180, Loss: 0.37085
Epoch: 190, Loss: 0.10693
Epoch: 200, Loss: 0.13696
Epoch: 210, Loss: 0.05936
Epoch: 220, Loss: 0.04858
Epoch: 230, Loss: 0.02237
Epoch: 240, Loss: 0.01358
Epoch: 250, Loss: 0.02466
Epoch: 260, Loss: 0.01848
Epoch: 270, Loss: 0.00971
Epoch: 280, Loss: 0.00508
Epoch: 290, Loss: 0.00134
Epoch: 300, Loss: 0.00487
Epoch: 310, Loss: 0.00275
Epoch: 320, Loss: 0.00037
Epoch: 330, Loss: 0.00096
Epoch: 340, Loss: 0.00061
Epoch: 350, Loss: 0.0