In [30]:
import torch
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn import metrics

In [2]:
class CustomDataset:
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, idx):
        current_sample = self.data[idx, :]
        current_target = self.targets[idx]
        return {
            "x": torch.tensor(current_sample, dtype=torch.float),
            "y": torch.tensor(current_target, dtype=torch.long)
        }

In [6]:
data, targets = make_classification(n_samples=1000)
X_train, X_test, y_train, y_test = train_test_split(data, targets, stratify=targets)
len(X_train), len(X_test)

(750, 250)

In [7]:
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

In [8]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4)

In [9]:
model = lambda x, w, b: torch.matmul(x, w) + b

In [10]:
X_train.shape

(750, 20)

In [22]:
w = torch.randn(20, 1, requires_grad=True)
b = torch.randn(1, requires_grad=True)
learning_rate =0.001

for epoch in range(10):
    epoch_loss = 0
    counter = 0
    for data in train_loader:
        xtrain = data["x"]
        ytrain = data["y"]

        output = model(xtrain, w, b)
        loss = torch.mean((ytrain.view(-1) - output.view(-1))**2)

        epoch_loss = epoch_loss + loss.item()
        loss.backward()

        with torch.no_grad():
            w = w - learning_rate * w.grad
            b = b - learning_rate * b.grad 

        w.requires_grad_(True)
        b.requires_grad_(True)
        counter += 1

    print(epoch, epoch_loss/counter)

0 15.280452405518673
1 7.166542746602221
2 3.5206563089122165
3 1.7843884028732142
4 0.9393793940147821
5 0.522270168633537
6 0.31385635403034456
7 0.20851279670085282
8 0.1546682504233607
9 0.1268468733995836


In [23]:
outputs = []
labels = []

with torch.no_grad():
    for data in test_loader:
        xtest = data["x"]
        ytest = data["y"]

        output = model(xtest, w, b)
        labels.append(ytest)
        outputs.append(output)

In [27]:
torch.cat(outputs).shape

torch.Size([250, 1])

In [28]:
torch.cat(outputs).view(-1).shape

torch.Size([250])

In [31]:
metrics.roc_auc_score(torch.cat(labels).view(-1), torch.cat(outputs).view(-1))

np.float64(0.9186559999999999)