In [1]:
import torch

In [2]:
torch.cuda.is_available()

True

In [3]:
device = torch.cuda.current_device()
print(device)

0


In [4]:
torch.cuda.get_device_name(device)

'GeForce RTX 3080'

In [5]:
torch.cuda.memory_allocated()

0

In [6]:
torch.cuda.memory_reserved()

0

In [7]:
a = torch.FloatTensor([1.0,2.0])

In [8]:
a

tensor([1., 2.])

In [9]:
a.device

device(type='cpu')

In [10]:
a = torch.FloatTensor([1.0,2.0]).cuda()

In [11]:
a.device

device(type='cuda', index=0)

In [12]:
torch.cuda.memory_allocated()

512

In [13]:
torch.cuda.memory_reserved()

2097152

In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [15]:
class Model(nn.Module):
    def __init__(self, in_features=4, h1=8, h2=9, out_features=3):
        super().__init__()
        self.fc1 = nn.Linear(in_features, h1)
        self.fc2 = nn.Linear(h1, h2)
        self.out = nn.Linear(h2, out_features)
        
        
    def forward(self, X):
        X = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = self.out(X)
        
        return X

In [16]:
torch.manual_seed(32)
model = Model()

In [17]:
next(model.parameters()).is_cuda

False

In [18]:
gpumodel = model.cuda()

In [19]:
next(gpumodel.parameters()).is_cuda

True

In [20]:
df = pd.read_csv("../Data/iris.csv")

In [21]:
X = df.drop('target', axis=1).values

In [22]:
y = df["target"].values

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=33)

In [24]:
X_train = torch.FloatTensor(X_train).cuda()

In [25]:
X_test = torch.FloatTensor(X_test).cuda()
y_train = torch.LongTensor(y_train).cuda()
y_test = torch.LongTensor(y_test).cuda()

In [26]:
X_train.device

device(type='cuda', index=0)

In [27]:
train_loader = DataLoader(X_train, batch_size=64, shuffle=True, pin_memory=True)
test_loader = DataLoader(X_test, batch_size=64, shuffle=False, pin_memory=True)

In [28]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

In [30]:
EPOCHS = 100
losses = []

import time

start_time = time.time()


for epoch in range(EPOCHS):
    y_pred = gpumodel.forward(X_train)
    loss = criterion(y_pred, y_train)
    losses.append(loss)
    
    if epoch%10 == 0:
        print(f"Epoch: {epoch+1} Loss: {loss.item()}")
        
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


total_time = time.time() - start_time
print(f"Total time: {total_time/60}min")

Epoch: 1 Loss: 0.06728433072566986
Epoch: 11 Loss: 0.06374862045049667
Epoch: 21 Loss: 0.060973454266786575
Epoch: 31 Loss: 0.058818697929382324
Epoch: 41 Loss: 0.05712449550628662
Epoch: 51 Loss: 0.05564584583044052
Epoch: 61 Loss: 0.05439453199505806
Epoch: 71 Loss: 0.05329487845301628
Epoch: 81 Loss: 0.0523374117910862
Epoch: 91 Loss: 0.051444489508867264
Total time: 0.0023996829986572266min


In [31]:
correct = 0
with torch.no_grad():
    for i, data in enumerate(X_test):
        y_val = gpumodel.forward(data)
        if y_val.argmax().item() == y_test[i]:
            correct += 1
print(f"{correct} out of {len(y_test)} = {100*correct/len(y_test):.2f} % correct")

30 out of 30 = 100.00 % correct


## What to remember about running traning on GPU

1. Add .cuda() at the end of every torch variable
2. Add .cuda() at the end of model initialization
3. Set pin_memory(True) in DataLoader 