In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression

import torch
import torch.optim as optim
import torch.nn as nn
#from torchviz import make_dot

In [3]:
n_cudas = torch.cuda.device_count() 

for i in range(n_cudas):
  print(torch.cuda.get_device_name(i))


Tesla T4


In [12]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

Synthetic data

- 

In [8]:
true_b = 1 # parameter setting
true_w = 2 #true parameter
N = 100 #n_size

# Data Generation
np.random.seed(42)
x = np.random.rand(N, 1) #random x number n size
epsilon = (.1 * np.random.randn(N, 1)) # error term 1/10 of the true number..?
y = true_b + true_w * x + epsilon 

In [9]:
# Shuffles the indices
idx = np.arange(N) 
np.random.shuffle(idx) #shuffled indices

# Uses first 80 random indices for train
train_idx = idx[:int(N*.8)] #select first 80% as training
# Uses the remaining indices for validation
val_idx = idx[int(N*.8):] #select the last 20% as validation

#print(train_idx, val_idx)

# Generates train and validation sets
x_train, y_train = x[train_idx], y[train_idx] # training index are used to retrieve datapoint
x_val, y_val = x[val_idx], y[val_idx] #training index are used to retrieve datapoints.

In [10]:

x_train_tensor = torch.as_tensor(x_train)
x_train_tensor = torch.as_tensor(x_train)
x_train.dtype, x_train_tensor.dtype

(dtype('float64'), torch.float64)

In [11]:
float_tensor = x_train_tensor.float()
float_tensor.dtype

torch.float32

[as_tensor](https://pytorch.org/docs/stable/generated/torch.as_tensor.html?highlight=as_tensor#torch.as_tensor)

- This will share the underlying data
- Overwriting one differs another.
- These are CPU tensor.

In [4]:
dummy_array = np.array([1, 2, 3])
dummy_tensor = torch.as_tensor(dummy_array)
dummy_array[1] = 0
dummy_tensor

tensor([1, 0, 3])

In [5]:
dm_tensor = torch.from_numpy(dummy_array)
print(dm_tensor)

tensor([1, 0, 3])


In [6]:
dummy_tensor.numpy()

array([1, 0, 3])

In [7]:
torch.cuda.is_available()

True

## GPU tensor

- GPU tensor can be created by `to()`
- But this is not only for GPU, but also can handle cpus. 
- When using to() to cpu, nothing will change, but this is recommended.

- when we convert the GPU tensor to numpy tensor, we need to convert back to cpu() then np tensor

In [13]:
gpu_tensor = torch.as_tensor(x_train).to(device)

In [14]:
gpu_tensor[0]

tensor([0.7713], device='cuda:0', dtype=torch.float64)

In [16]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'


x_train_tensor = torch.as_tensor(x_train).float().to(device)
y_train_tensor = torch.as_tensor(y_train).float().to(device)

y_train_tensor.type()

'torch.cuda.FloatTensor'

In [18]:
numpy_tens = x_train_tensor.cpu().numpy()
print(numpy_tens)

[[0.77127033]
 [0.06355835]
 [0.86310345]
 [0.02541913]
 [0.7319939 ]
 [0.07404465]
 [0.19871569]
 [0.31098232]
 [0.47221494]
 [0.96958464]
 [0.12203824]
 [0.77513283]
 [0.802197  ]
 [0.72960615]
 [0.09767211]
 [0.18485446]
 [0.15601864]
 [0.02058449]
 [0.9868869 ]
 [0.6232981 ]
 [0.7080726 ]
 [0.5979    ]
 [0.9218742 ]
 [0.63755745]
 [0.2809345 ]
 [0.25877997]
 [0.11959425]
 [0.7290072 ]
 [0.94888556]
 [0.60754484]
 [0.5612772 ]
 [0.4937956 ]
 [0.18182497]
 [0.27134904]
 [0.96990985]
 [0.21233912]
 [0.1834045 ]
 [0.8661761 ]
 [0.37454012]
 [0.29122913]
 [0.80839735]
 [0.05808361]
 [0.83244264]
 [0.54269606]
 [0.77224475]
 [0.88721275]
 [0.08849251]
 [0.04522729]
 [0.59241456]
 [0.684233  ]
 [0.7132448 ]
 [0.03438852]
 [0.601115  ]
 [0.81546146]
 [0.4401525 ]
 [0.32518333]
 [0.785176  ]
 [0.76078504]
 [0.4951769 ]
 [0.19967379]
 [0.9507143 ]
 [0.29214466]
 [0.13949387]
 [0.31171107]
 [0.7068573 ]
 [0.11586906]
 [0.35846573]
 [0.00552212]
 [0.19598286]
 [0.89482737]
 [0.45606998]
 [0.52

## Creating tensor for Parameters.
We need to allow gradient for these tensors.
By setting device inside the torch.randn(), we can avoid unnecessary errors.

In [19]:
torch.manual_seed(42)
b = torch.randn(1, requires_grad=True, dtype = torch.float, device = device)
w = torch.randn(1, requires_grad=True, dtype = torch.float, device = device)
print(b, w)

tensor([0.1940], device='cuda:0', requires_grad=True) tensor([0.1391], device='cuda:0', requires_grad=True)


In [22]:
# Step 1 - Computes our model's predicted output - forward pass 
yhat = b + w * x_train_tensor

# Step 2 - Computes the loss # We are using ALL data points, so this is BATCH gradient # descent. How wrong is our model? That's the error!

error = (yhat - y_train_tensor) # It is a regression, so it computes mean squared error (MSE) 
loss = (error ** 2).mean()

# Step 3 - Computes gradients for both "b" and "w" parameters # No more manual computation of gradients!

# b_grad = 2 * error.mean() # w_grad = 2 * (x_tensor * error).mean() l
loss.backward()

In [23]:
print(b.grad, w.grad)

tensor([-6.7762], device='cuda:0') tensor([-3.8878], device='cuda:0')
