In [6]:
import mil_pytorch as mil
import utils
import train
import numpy
import math
from mill_tree.create_bags import create_bags
import torch
from torch.utils.data import DataLoader

from sklearn.datasets import make_classification

In [7]:
source_data, source_labels = make_classification(n_samples = 500, n_features = 5, n_informative = 5, n_redundant = 0, n_repeated = 0, n_classes = 10, class_sep = 1.0, n_clusters_per_class = 1)
data, ids, labels = create_bags(source_data, source_labels, pos = 10, neg = 10, max_subbags = 3, max_instances = 3)
data = torch.Tensor(data).double()
ids = torch.Tensor(ids).long()
labels = torch.Tensor(labels).long()

labels[labels == 0] = -1

dataset = mil.MilDataset(data, ids, labels, normalize = True)
train_ds, test_ds = mil.train_test_split(dataset, test_size = 0.2)

# Dataloader
train_dl = DataLoader(train_ds, batch_size = len(train_ds), shuffle = True, collate_fn=mil.collate)
train_batch_dl = DataLoader(train_ds, batch_size = 2, shuffle = True, collate_fn=mil.collate)
test_dl = DataLoader(test_ds, batch_size = len(test_ds), shuffle = False, collate_fn=mil.collate)

INFO: data normalized


In [8]:
# Set model
n_neurons1 = 5
n_neurons2 = 5
n_neurons3 = 5
learning_rate = 1e-3
weight_decay = 1e-4

# Pre and after agg function
prepNN1 = torch.nn.Sequential(
    torch.nn.Linear(len(dataset.data[0]), n_neurons1, bias = True),
    torch.nn.ReLU(),
    torch.nn.Linear(n_neurons1, n_neurons2, bias = True),
    torch.nn.ReLU()
)

afterNN1 = torch.nn.Sequential(
    torch.nn.Identity()
)

prepNN2 = torch.nn.Sequential(
    torch.nn.Linear(n_neurons2, n_neurons3, bias = True),
    torch.nn.ReLU(),
)

afterNN2 = torch.nn.Sequential(
    torch.nn.Linear(n_neurons3, 1),
    torch.nn.Tanh()
)

# Model and loss function
model = torch.nn.Sequential(
    mil.BagModel(prepNN1, afterNN1, torch.mean),
    mil.BagModel(prepNN2, afterNN2, torch.mean)
).double()
criterion = mil.MyHingeLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay = weight_decay)

In [9]:
# train.fit(model, optimizer, criterion, train_dl, test_dl, epochs = 1000)
train.k_fold_cv(model, optimizer, criterion, train_ds, epochs = 300)

Epoch:  50 Loss: 0.9400529181071849
Epoch: 100 Loss: 0.9102542517754277
Epoch: 150 Loss: 0.8744165979575649
Epoch: 200 Loss: 0.8396825631221034
Epoch: 250 Loss: 0.7983179251883162
Epoch: 300 Loss: 0.7568962812257061
Epoch:  50 Loss: 0.9517688196743607
Epoch: 100 Loss: 0.9292619309382159
Epoch: 150 Loss: 0.853829873033688
Epoch: 200 Loss: 0.6189418009352985
Epoch: 250 Loss: 0.3167082446670323
Epoch: 300 Loss: 0.2013635143443837
Epoch:  50 Loss: 1.0027655846423016
Epoch: 100 Loss: 0.8189040557132663
Epoch: 150 Loss: 0.7329478685525056
Epoch: 200 Loss: 0.69075415672422
Epoch: 250 Loss: 0.6356382228941947
Epoch: 300 Loss: 0.5383579282532414
Epoch:  50 Loss: 0.9899055429398801
Epoch: 100 Loss: 0.9733602472024854
Epoch: 150 Loss: 0.9480418125885044
Epoch: 200 Loss: 0.9145269180843419
Epoch: 250 Loss: 0.888476390172592
Epoch: 300 Loss: 0.8612287773899024
Epoch:  50 Loss: 1.0303363363893427
Epoch: 100 Loss: 1.0176172646758257
Epoch: 150 Loss: 0.9854142629675872
Epoch: 200 Loss: 0.9245918933607

tensor(1.1732)