In [None]:
import torch
torch.cuda.is_available()

In [None]:
## Get Id of default device
torch.cuda.current_device()

In [None]:
# 0
torch.cuda.get_device_name(0) # Get name device with ID '0'

In [None]:
# Returns the current GPU memory usage by 
# tensors in bytes for a given device
torch.cuda.memory_allocated()

In [None]:
# Returns the current GPU memory managed by the
# caching allocator in bytes for a given device
torch.cuda.memory_cached()

In [None]:
# CPU
a = torch.FloatTensor([1.,2.])

In [None]:
a

In [None]:
a.device #find which device is being used(CPU or GPU)

In [None]:
# GPU
a = torch.FloatTensor([1., 2.]).cuda() # .cuda would make it used CUDA instead

In [None]:
a.device

In [None]:
torch.cuda.memory_allocated() #512 MB memory is being allocated now

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
class Model(nn.Module):
    def __init__(self, in_features=4, h1=8, h2=9, out_features=3):
        super().__init__()
        self.fc1 = nn.Linear(in_features,h1)    # input layer
        self.fc2 = nn.Linear(h1, h2)            # hidden layer
        self.out = nn.Linear(h2, out_features)  # output layer
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.out(x)
        return x

In [None]:
torch.manual_seed(32)
model = Model()

In [None]:
# From the discussions here: discuss.pytorch.org/t/how-to-check-if-model-is-on-cuda
next(model.parameters()).is_cuda

In [None]:
gpumodel = model.cuda()

In [None]:
next(gpumodel.parameters()).is_cuda

In [None]:
df = pd.read_csv('C:/Users/Owner/Downloads/Udemy/Pytorch/iris.csv')
# Split the data into features and target
X = df.drop('variety',axis=1).values
y = df['variety'].values

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=33)

X_train, X_test, y_train, y_test

In [50]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

X_train_torch = X_train.to(device)
X_test_torch = X_test.to(device)
y_train_torch = torch.tensor(y_train_le, dtype=torch.long).to(device)
y_test_torch = torch.tensor(y_test_le, dtype=torch.long).to(device)

In [51]:
trainloader = DataLoader(X_train, batch_size=60, shuffle=True,pin_memory=True)
testloader = DataLoader(X_test, batch_size=60, shuffle=False,pin_memory=True)

In [52]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [56]:
import time
epochs = 100
losses = []
start = time.time()
for i in range(epochs):
    i+=1
    y_pred = gpumodel.forward(X_train_torch.cuda())
    loss = criterion(y_pred, y_train_torch.cuda())
    losses.append(loss)
    
    # a neat trick to save screen space:
    if i%10 == 1:
        print(f'epoch: {i:2}  loss: {loss.item():10.8f}')

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
print(f'TOTAL TRAINING TIME: {time.time()-start}')

#This will ensure that all tensors and the model are on the same device (the GPU, in this case), allowing PyTorch to perform the operations correctly. Note that the .cuda() calls are necessary only when working on a machine with a CUDA-compatible GPU. If you're working on a machine without a GPU, keep the tensors on the CPU by omitting the .cuda() calls.


epoch:  1  loss: 0.04595106
epoch: 11  loss: 0.04571902
epoch: 21  loss: 0.04548763
epoch: 31  loss: 0.04525835
epoch: 41  loss: 0.04503223
epoch: 51  loss: 0.04481002
epoch: 61  loss: 0.04459254
epoch: 71  loss: 0.04438039
epoch: 81  loss: 0.04417421
epoch: 91  loss: 0.04397450
TOTAL TRAINING TIME: 0.13001656532287598


In [58]:
correct = 0
with torch.no_grad():
    for i, data in enumerate(X_test_torch):
        y_val = gpumodel.forward(data)
        print(f'{i+1:2}. {str(y_val):38} {y_test_torch[i]}')
        if y_val.argmax().item() == y_test_torch[i]:
            correct += 1
print(f'\n{correct} out of {len(y_test_torch)} = {100*correct/len(y_test_torch):.2f}% correct')

 1. tensor([-13.2271,   7.0168,  -2.1098], device='cuda:0') 1
 2. tensor([-13.3811,   7.8008,  -2.7396], device='cuda:0') 1
 3. tensor([ 16.5113,   3.0487, -13.1177], device='cuda:0') 0
 4. tensor([-19.1503,   6.2611,   0.7067], device='cuda:0') 1
 5. tensor([-30.2820,   3.7052,   6.9888], device='cuda:0') 2
 6. tensor([-40.0698,   0.4675,  13.0620], device='cuda:0') 2
 7. tensor([ 15.9038,   3.2489, -13.0625], device='cuda:0') 0
 8. tensor([ 18.2580,   2.9889, -13.9614], device='cuda:0') 0
 9. tensor([-30.5461,   3.4989,   7.2078], device='cuda:0') 2
10. tensor([-37.4076,   1.9264,  11.0303], device='cuda:0') 2
11. tensor([-38.5687,   1.5563,  11.7180], device='cuda:0') 2
12. tensor([ 16.8079,   2.7965, -12.9399], device='cuda:0') 0
13. tensor([-36.5900,   1.4215,  11.0986], device='cuda:0') 2
14. tensor([-18.9548,   6.0786,   0.7875], device='cuda:0') 1
15. tensor([-32.0045,   3.0253,   8.1244], device='cuda:0') 2
16. tensor([-12.9605,   7.6847,  -2.7830], device='cuda:0') 1
17. tens