In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from sklearn.metrics import accuracy_score, roc_auc_score
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
from torch.autograd import Variable

SAVE_PATH = './path/to/data/'

class Net(nn.Module):
    def __init__(self, n_input, n_output):
        super(Net, self).__init__()
        self.fc = nn.Linear(n_input, n_output)

    def forward(self, x):
        x = self.fc(x)
        return x.squeeze()

In [2]:
from sklearn import datasets
breast_cancer = datasets.load_breast_cancer()
data, labels = breast_cancer['data'], breast_cancer['target']

mu = np.mean(data, axis = 0)
std = np.std(data, axis = 0)
data = (data - mu) / std
num_train = data.shape[0] - 100
num_noisy = 100
num_val = 50
num_test = 50

x_train, y_train = data[:num_train, :], labels[:num_train]

noisy_index = np.random.randint(0, num_train, num_noisy)
print(noisy_index)
y_train[noisy_index] = 1 - y_train[noisy_index]

x_val, y_val = data[num_train:num_train+num_val,:], labels[num_train:num_train+num_val]
x_test, y_test = data[num_train + num_val:,:], labels[num_train + num_val:]

[450 153 369 223 180 447 105 328 149 460 124 417  44 269 315 184  33 342
 212  97 376 249 294 344 456 101 352 322 279 383 456 299 296 412  12 324
 223 372 300 303  46  13  69  43 171 385 427   8 374 319 354 153 221 189
 258  72  17  68 303 320 361  18 207 275 225 236 399 346 431 234 191 332
 440 109  50 268 441 289 224 283 457 293 211   9  50 448   9 449  47 216
 373  90 165 233 432 189 105 398 215 163]


In [5]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(x_train, y_train)
predition = lr.predict(x_test)
print(accuracy_score(predition, y_test), roc_auc_score(predition, y_test))

0.96 0.9375


In [3]:
LR = Net(x_train.shape[1], 1)
optimizer = torch.optim.SGD(LR.parameters(), lr = 0.01)
LR.train()

x_var = Variable(torch.FloatTensor(x_train))
y_var = Variable(torch.FloatTensor(y_train))
x_test_var = Variable(torch.FloatTensor(x_test))
y_test_var = Variable(torch.FloatTensor(y_test))

for i in range(100):
    y_hat = LR(x_var)
    cost = F.binary_cross_entropy_with_logits(y_hat, y_var)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    result = LR(x_test_var)
    pred_proba = torch.sigmoid(result)
    predicted = (pred_proba > 0.5).int()
    acc = accuracy_score(y_test, predicted.int())
    auc = roc_auc_score(y_test, pred_proba.data.numpy())
    print(auc, acc)

0.26999999999999996 0.26
0.31499999999999995 0.28
0.36 0.34
0.41 0.4
0.46499999999999997 0.42
0.53 0.44
0.5974999999999999 0.48
0.64 0.52
0.6599999999999999 0.54
0.675 0.58
0.7075 0.58
0.735 0.6
0.7625 0.62
0.78 0.64
0.8025 0.68
0.8225 0.72
0.835 0.76
0.8574999999999999 0.78
0.865 0.78
0.8725 0.78
0.8875 0.8
0.8975 0.8
0.9124999999999999 0.8
0.9249999999999999 0.82
0.9325 0.82
0.9375 0.82
0.94 0.82
0.9475 0.82
0.9524999999999999 0.82
0.955 0.82
0.9574999999999999 0.86
0.96 0.86
0.9624999999999999 0.88
0.9675 0.88
0.9675 0.88
0.9749999999999999 0.88
0.9749999999999999 0.88
0.9774999999999998 0.88
0.9799999999999999 0.88
0.9799999999999999 0.88
0.9799999999999999 0.88
0.9824999999999999 0.88
0.9849999999999999 0.88
0.9849999999999999 0.88
0.9849999999999999 0.88
0.9875 0.88
0.9875 0.88
0.99 0.88
0.99 0.88
0.99 0.9
0.99 0.9
0.99 0.9
0.99 0.9
0.99 0.9
0.99 0.92
0.99 0.92
0.99 0.92
0.99 0.92
0.99 0.92
0.99 0.92
0.99 0.92
0.99 0.92
0.99 0.92
0.9925 0.92
0.9925 0.92
0.9925 0.92
0.995 0.92
0.9