In [29]:
import torch
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn import metrics

In [30]:
class CustomDataset:
  def __init__(self, data, targets):
    self.data = data
    self.targets = targets

  def __len__(self):
    # return len(self.data)
    return self.data.shape[0]

  def __getitem__(self, idx):
    current_sample = self.data[idx, :]
    current_target = self.targets[idx]
    return {
        "x": torch.tensor(current_sample, dtype = torch.float),
        "y": torch.tensor(current_target, dtype = torch.long),
    }

In [31]:
data, targets = make_classification(n_samples=1000)

In [32]:
train_data, test_data, train_targets, test_targets = train_test_split(
    data,
    targets,
    stratify=targets
)

In [33]:
train_dataset = CustomDataset(train_data, train_targets)
test_dataset = CustomDataset(test_data, test_targets)

In [34]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, num_workers=2)


In [35]:
model = lambda x,w,b: torch.matmul(x,w) + b

In [36]:
print(data)

[[ 1.07760765 -0.85977048 -0.5456858  ...  0.46374506 -0.28675672
  -1.30012586]
 [ 1.0009462   0.90812057  1.13846463 ... -0.5354652   1.30632925
  -0.2787561 ]
 [-0.80521755  0.03069552  0.14309203 ...  0.14730349 -0.26143432
  -0.30191926]
 ...
 [ 0.49782843 -2.57787893 -2.30217292 ... -0.6012587   1.77477162
  -0.30358894]
 [ 1.56379675  0.65614816  0.21681264 ...  1.29950409  0.32248289
   2.07304427]
 [-1.27866961  1.4907399  -0.29966175 ...  0.6921353   0.51563809
   0.45543512]]


In [37]:
W = torch.randn(20,1,requires_grad=True)
b = torch.randn(1,requires_grad=True)
learning_rate = 0.001

In [38]:
outputs = []
labels = []
with torch.no_grad():
  for data in train_loader:
    xtrain = data['x']
    ytrain = data['y']

    output = model(xtrain,W,b)
    labels.append(ytrain)
    outputs.append(output)

In [39]:
metrics.roc_auc_score(torch.cat(labels).view(-1), torch.cat(outputs).view(-1))

0.42717866666666665

In [40]:
for epoch in range(10):
  epoch_loss = 0
  counter = 0
  for data in train_loader:
    xtrain = data['x']
    ytrain = data['y']

    if W.grad is not None:
      W.grad_zero_()

    output = model(xtrain,W,b)
    loss = torch.mean((ytrain.view(-1) - output.view(-1))**2)
    epoch_loss = epoch_loss + loss.item()
    loss.backward()

    with torch.no_grad():
      W = W - learning_rate * W.grad
      b = b - learning_rate * b.grad

    W.requires_grad_(True)
    b.requires_grad_(True)
    counter +=1
  print(epoch, epoch_loss/counter)

0 13.935946403507222
1 5.692830107630567
2 2.5748874310483325
3 1.260215735845664
4 0.666438023024734
5 0.3863255733554747
6 0.2503116335620747
7 0.18287946365060323
8 0.148901782117467
9 0.131550793833238


In [41]:
outputs = []
labels = []
with torch.no_grad():
  for data in train_loader:
    xtrain = data['x']
    ytrain = data['y']

    output = model(xtrain,W,b)
    labels.append(ytrain)
    outputs.append(output)

In [42]:
metrics.roc_auc_score(torch.cat(labels).view(-1), torch.cat(outputs).view(-1))

0.9268337777777778