In [9]:
import pandas as pd

In [10]:
df = pd.read_csv("data_banknote_authentication.txt", header=None)
df

Unnamed: 0,0,1,2,3,4
0,3.62160,8.66610,-2.8073,-0.44699,0
1,4.54590,8.16740,-2.4586,-1.46210,0
2,3.86600,-2.63830,1.9242,0.10645,0
3,3.45660,9.52280,-4.0112,-3.59440,0
4,0.32924,-4.45520,4.5718,-0.98880,0
...,...,...,...,...,...
1367,0.40614,1.34920,-1.4501,-0.55949,1
1368,-1.38870,-4.87730,6.4774,0.34179,1
1369,-3.75030,-13.45860,17.5932,-2.77710,1
1370,-3.56370,-8.38270,12.3930,-1.28230,1


In [11]:
X_features = df[[0, 1, 2, 3]].values
y_labels = df[4].values

In [12]:
X_features.shape

(1372, 4)

In [13]:
import numpy as np
# 762 0;  610 1
np.bincount(y_labels)

array([762, 610])

In [14]:
from torch.utils.data import Dataset, DataLoader
class MyDataSet(Dataset):
    def __init__(self, X, y):
        self.features = torch.tensor(X, dtype=torch.float32)
        self.labels = torch.tensor(y, dtype=torch.float32)

    def __getitem__(self, index):
        x = self.features[index]
        y = self.labels[index]
        return x, y

    def __len__(self):
        return self.labels.shape[0]

In [15]:
train_size = int(X_features.shape[0] * 0.8)
train_size

1097

In [16]:
val_size = X_features.shape[0] - train_size
val_size

275

In [19]:
import torch

dataset = MyDataSet(X_features, y_labels)

torch.manual_seed(1)
train_set, val_set = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(
    dataset = train_set,
    batch_size = 10,
    shuffle = True
)

val_loader = DataLoader(
    dataset = val_set,
    batch_size = 10,
    shuffle = False
)

In [22]:
import torch

class LogisticRegression(torch.nn.Module):
    def __init__(self, num_features):
        super().__init__()
        self.linear = torch.nn.Linear(num_features, 1)

    def forward(self, x):
        logits = self.linear(x)
        probas = torch.sigmoid(logits)
        return probas

In [23]:
import torch.nn.functional as F


torch.manual_seed(1)
model = LogisticRegression(num_features=4)
optimizer = torch.optim.SGD(model.parameters(), lr=0.2) ## FILL IN VALUE

num_epochs = 20  ## FILL IN VALUE

for epoch in range(num_epochs):
    
    model = model.train()
    for batch_idx, (features, class_labels) in enumerate(train_loader):

        probas = model(features)
        
        loss = F.binary_cross_entropy(probas, class_labels.view(probas.shape))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % 20: # log every 20th batch
            print(f'Epoch: {epoch+1:03d}/{num_epochs:03d}'
                   f' | Batch {batch_idx:03d}/{len(train_loader):03d}'
                   f' | Loss: {loss:.2f}')

Epoch: 001/020 | Batch 000/110 | Loss: 1.30
Epoch: 001/020 | Batch 020/110 | Loss: 0.12
Epoch: 001/020 | Batch 040/110 | Loss: 0.23
Epoch: 001/020 | Batch 060/110 | Loss: 0.03
Epoch: 001/020 | Batch 080/110 | Loss: 0.02
Epoch: 001/020 | Batch 100/110 | Loss: 0.03
Epoch: 002/020 | Batch 000/110 | Loss: 0.08
Epoch: 002/020 | Batch 020/110 | Loss: 0.02
Epoch: 002/020 | Batch 040/110 | Loss: 0.12
Epoch: 002/020 | Batch 060/110 | Loss: 0.05
Epoch: 002/020 | Batch 080/110 | Loss: 0.04
Epoch: 002/020 | Batch 100/110 | Loss: 0.06
Epoch: 003/020 | Batch 000/110 | Loss: 0.02
Epoch: 003/020 | Batch 020/110 | Loss: 0.04
Epoch: 003/020 | Batch 040/110 | Loss: 0.02
Epoch: 003/020 | Batch 060/110 | Loss: 0.14
Epoch: 003/020 | Batch 080/110 | Loss: 0.04
Epoch: 003/020 | Batch 100/110 | Loss: 0.01
Epoch: 004/020 | Batch 000/110 | Loss: 0.01
Epoch: 004/020 | Batch 020/110 | Loss: 0.01
Epoch: 004/020 | Batch 040/110 | Loss: 0.11
Epoch: 004/020 | Batch 060/110 | Loss: 0.05
Epoch: 004/020 | Batch 080/110 |

In [25]:
def compute_accuracy(model, dataloader):

    model = model.eval()
    
    correct = 0.0
    total_examples = 0
    
    for idx, (features, class_labels) in enumerate(dataloader):
        
        with torch.no_grad():
            probas = model(features)
        
        pred = torch.where(probas > 0.5, 1, 0)
        lab = class_labels.view(pred.shape).to(pred.dtype)

        compare = lab == pred
        correct += torch.sum(compare)
        total_examples += len(compare)

    return correct / total_examples

In [26]:
train_acc = compute_accuracy(model, train_loader)
print(f"Accuracy: {train_acc*100:.2f}%")

Accuracy: 98.18%


In [27]:
X_features.shape

(1372, 4)

In [28]:
train_mean = torch.zeros(X_features.shape[1])

for x, y in train_loader:
    train_mean += x.sum(dim = 0)

train_mean = train_mean / len(train_loader)

In [29]:
train_std = torch.zeros(X_features.shape[1])
for x, y in train_loader:
    train_std += ((x - train_mean)**2).sum(dim=0)

train_std = torch.sqrt(train_std / (len(train_set)-1))

In [30]:
print("Feature means:", train_mean)
print("Feature std. devs:", train_std)

Feature means: tensor([  3.8440,  18.6294,  14.8823, -11.9659])
Feature std. devs: tensor([ 4.4875, 17.7838, 14.0961, 10.9746])
