In [1]:
import torch

In [2]:
from torch import nn

In [3]:
import sklearn
from sklearn.datasets import make_circles
from sklearn.model_selection import train_test_split

In [4]:
X, y = make_circles(1000, random_state=42, noise=0.05)
X = torch.from_numpy(X).type(torch.float32)
y = torch.from_numpy(y).type(torch.float32)

In [5]:
len(X) == len(y), y.shape, y.device, y.dtype

(True, torch.Size([1000]), device(type='cpu'), torch.float32)

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [7]:
X = X.to(device)
y = y.to(device)
X_train, X_test,y_train, y_test = train_test_split(X, y,
                                                    test_size=0.3 ,
                                                    random_state=43)

y_test = y_test.squeeze(dim=0)
X_test = X_test.squeeze(dim=0)

In [8]:
X_train.shape, X_train.device

(torch.Size([700, 2]), device(type='cuda', index=0))

In [9]:
torch.manual_seed(43)
class CircleClassifier(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(in_features=2, out_features=8)
    self.layer_2 = nn.Linear(in_features=8, out_features=1)


  def forward(self, x):
    return self.layer_2(self.layer_1(x))


model0 = CircleClassifier().to(device)
model0

CircleClassifier(
  (layer_1): Linear(in_features=2, out_features=8, bias=True)
  (layer_2): Linear(in_features=8, out_features=1, bias=True)
)

In [10]:
# same model implemented using torch module, defining forward class is better for more complex models
torch.manual_seed(43)
model0 = nn.Sequential(nn.Linear(in_features=2, out_features=128),
                       nn.ReLU(),
                      nn.Linear(in_features=128, out_features=256),
                       nn.ReLU(),
                      nn.Linear(in_features=256, out_features=128),
                       nn.ReLU(),
                      nn.Linear(in_features=128, out_features=1)).to(device)
# model0.state_dict()

# same model implemented using torch module, defining forward class is better for more complex models
model1 = nn.Sequential(nn.Linear(in_features=2, out_features=128),
                       nn.ReLU(),
                      nn.Linear(in_features=128, out_features=256),
                       nn.ReLU(),
                      nn.Linear(in_features=256, out_features=128),
                       nn.ReLU(),
                      nn.Linear(in_features=128, out_features=1)).to(device)
# model0.state_dict()# model1.state_dict()

In [11]:
next(model0.parameters()).device

device(type='cuda', index=0)

In [12]:
loss_func1 = nn.BCELoss()         ## sigmoid implemented in this
loss_func2 = nn.BCEWithLogitsLoss()       ## not in this, more stable apparently??check docs

In [13]:
untrained_preds = model0(X_test).to(device).squeeze(dim=1)
# loss_func1(untrained_preds, y_test), loss_func2(untrained_preds, y_test)
loss_func2(untrained_preds, y_test)

tensor(0.6938, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

In [14]:
optimiser0 = torch.optim.SGD(params=model0.parameters(), lr=0.001)
optimiser1 = torch.optim.SGD(params=model1.parameters(), lr=0.001)


In [15]:
def accuracy_func(y_true, y_pred):
  correct = torch.eq(y_true, y_pred).sum().item()
  accuracy = correct/len(y_pred) * 100
  return accuracy

In [16]:
X_train.device, X_test.device, y_train.device, y_test.device

(device(type='cuda', index=0),
 device(type='cuda', index=0),
 device(type='cuda', index=0),
 device(type='cuda', index=0))

In [17]:
model0.eval()
with torch.inference_mode():
  y_logits = model0(X_test.to(device))
y_logits[:5]

tensor([[-0.0693],
        [-0.0714],
        [-0.0622],
        [-0.0705],
        [-0.0664]], device='cuda:0')

In [18]:
y_logits.device

device(type='cuda', index=0)

In [19]:
untrained_labels = torch.round(torch.sigmoid(y_logits))
untrained_labels[:5]

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.]], device='cuda:0')

In [20]:
## torch.eq() ## takes 2 tensors and returns a tensor with true when two elemts are same

In [21]:
loss_func1(y_test, untrained_labels.squeeze())

tensor(50.6667, device='cuda:0')

In [29]:
## using bce loss
epochs = 10000

for epoch in range(epochs):
  model0.train()
  y_logits = model0(X_train)
  y_pred = torch.sigmoid(y_logits)  ## forward pass
  loss = loss_func1(y_pred.squeeze(), y_train)
  optimiser0.zero_grad()
  loss.backward()
  optimiser0.step()

  model0.eval()
  with torch.inference_mode():
    test_logits = model0(X_test).squeeze()
    y_pred_test = torch.sigmoid(test_logits)

    test_loss = loss_func1(y_pred_test, y_test)
    test_acc = accuracy_func(y_true=y_test, y_pred=torch.round(y_pred_test))

  if epoch % 1000 == 0:
    print(epoch, test_loss, test_acc)

0 tensor(0.2001, device='cuda:0') 97.0
1000 tensor(0.1802, device='cuda:0') 97.33333333333334
2000 tensor(0.1639, device='cuda:0') 97.33333333333334
3000 tensor(0.1503, device='cuda:0') 97.33333333333334
4000 tensor(0.1390, device='cuda:0') 97.33333333333334
5000 tensor(0.1295, device='cuda:0') 97.33333333333334
6000 tensor(0.1216, device='cuda:0') 97.33333333333334
7000 tensor(0.1148, device='cuda:0') 97.33333333333334
8000 tensor(0.1090, device='cuda:0') 97.33333333333334
9000 tensor(0.1040, device='cuda:0') 97.33333333333334


In [30]:
## using bcewithouutsigmoid loss
epochs = 10000

for epoch in range(epochs):
  model1.train()
  y_logits = model1(X_train)
  y_pred= torch.round(torch.sigmoid(y_logits))  ## forward pass
  loss = loss_func2(y_logits.squeeze(), y_train)
  acc = accuracy_func(y_true=y_train,
                      y_pred=y_pred)
  optimiser1.zero_grad()
  loss.backward()
  optimiser1.step()

  model1.eval()
  with torch.inference_mode():
    test_logits = model1(X_test).squeeze()
    y_pred_test = torch.round(torch.sigmoid(test_logits))

    test_loss = loss_func2(test_logits, y_test)
    test_acc = accuracy_func(y_true=y_test, y_pred=y_pred_test)

  if epoch % 1000 == 0:
    print(epoch, test_loss, test_acc)

0 tensor(0.4096, device='cuda:0') 95.0
1000 tensor(0.3674, device='cuda:0') 95.66666666666667
2000 tensor(0.3263, device='cuda:0') 95.66666666666667
3000 tensor(0.2884, device='cuda:0') 96.66666666666667
4000 tensor(0.2550, device='cuda:0') 97.0
5000 tensor(0.2265, device='cuda:0') 97.0
6000 tensor(0.2025, device='cuda:0') 97.0
7000 tensor(0.1823, device='cuda:0') 97.33333333333334
8000 tensor(0.1656, device='cuda:0') 97.33333333333334
9000 tensor(0.1517, device='cuda:0') 97.0
