In [1]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [2]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [19]:
data = dataset[0]
data.edge_index
data.x.shape[0]
print('Number of nodes = ', data.x.shape[0])
print('Number of features = ', data.x.shape[1])

Number of nodes =  2708
Number of features =  1433


In [36]:
data.edge_attr.shape[-1]

AttributeError: 'NoneType' object has no attribute 'shape'

In [35]:
data.__dict__

{'x': tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 'edge_index': tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
         [ 633, 1862, 2582,  ...,  598, 1473, 2706]]),
 'edge_attr': None,
 'y': tensor([3, 4, 4,  ..., 3, 3, 3]),
 'pos': None,
 'norm': None,
 'face': None,
 'train_mask': tensor([ True,  True,  True,  ..., False, False, False]),
 'val_mask': tensor([False, False, False,  ..., False, False, False]),
 'test_mask': tensor([False, False, False,  ...,  True,  True,  True])}

In [32]:
conv1 = GCNConv(dataset.num_node_features, 16)
conv2 = GCNConv(16, dataset.num_classes)

In [26]:
x, edge_index = data.x, data.edge_index

In [34]:
F.log_softmax(conv2(F.dropout(F.relu(conv1(x, edge_index))), edge_index), dim=1).shape

torch.Size([2708, 7])

In [1]:
import numpy as np


In [2]:
def bin_array(num, m):
    """Convert a positive integer num into an m-bit bit vector"""
    return np.array(list(np.binary_repr(num).zfill(m))).astype(np.int8)

In [5]:
bin_array(3, 2)

array([1, 1], dtype=int8)

In [11]:
np.array([bin_array(x, 3) for x in np.random.randint(7, size = (10))])

array([[1, 1, 0],
       [0, 1, 1],
       [0, 1, 0],
       [0, 1, 1],
       [0, 0, 1],
       [1, 1, 0],
       [0, 1, 1],
       [0, 0, 0],
       [1, 0, 1],
       [1, 0, 1]], dtype=int8)

In [14]:
d_arr = sorted(np.random.randint(140, size = (10)))
d_arr

[11, 13, 33, 33, 38, 49, 94, 106, 120, 137]

In [17]:
d_of_the_week_arr = np.mod(d_arr,7)
d_of_the_week_arr

array([4, 6, 5, 5, 3, 0, 3, 1, 1, 4], dtype=int32)

In [25]:
d_arr_bin = np.array([bin_array(x, 10) for x in d_arr])
d_arr_bin

array([[0, 0, 0, 0, 0, 0, 1, 0, 1, 1],
       [0, 0, 0, 0, 0, 0, 1, 1, 0, 1],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1, 0, 0, 1, 1, 0],
       [0, 0, 0, 0, 1, 1, 0, 0, 0, 1],
       [0, 0, 0, 1, 0, 1, 1, 1, 1, 0],
       [0, 0, 0, 1, 1, 0, 1, 0, 1, 0],
       [0, 0, 0, 1, 1, 1, 1, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 1, 0, 0, 1]], dtype=int8)

In [26]:
d_of_the_week_arr_bin = np.array([bin_array(x, 3) for x in d_of_the_week_arr])
d_of_the_week_arr_bin

array([[1, 0, 0],
       [1, 1, 0],
       [1, 0, 1],
       [1, 0, 1],
       [0, 1, 1],
       [0, 0, 0],
       [0, 1, 1],
       [0, 0, 1],
       [0, 0, 1],
       [1, 0, 0]], dtype=int8)

In [27]:
np.hstack((d_arr_bin,d_of_the_week_arr_bin))

array([[0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1],
       [0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1],
       [0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1],
       [0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1],
       [0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1],
       [0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0]], dtype=int8)

In [32]:
c = ['ADOPTION PAY',
'AGE RELATED PAY',
'ATTENDANCE ALLOWANCE',
'BEREAVEMENT ALLOWANCE',
'BEREAVEMENT PAYMENT',
'CARERS ALLOWANCE',
'CHRISTMAS BONUS',
'COLD WEATHER PAY',
'CONTRIBUTORY ESA',
'DISABILITY LIVING ALLOWANCE',
'EESA',
'ESA',
'HOUSING BENEFIT',
'INCAPACITY BENEFIT',
'INCOME SUPPORT',
'INDUSTRIAL INJURIES BENEFIT',
'INDUSTRIAL INJURIES DISABLEMENT BENEFIT',
'JSA',
'MATERNITY ALLOWANCE',
'MATERNITY PAY',
'PATERNITY PAY',
'PENSION CREDIT',
'PERSONAL INDEPENDENCE PAYMENTS',
'SHARED PARENTAL LEAVE PAYMENTS',
'SICK PAY',
'STATE PENSION',
'TAX CREDITS',
'UNIVERSAL CREDIT',
'ADULTCARE',
'AIRLINES',
'BANK CHARGES',
'BANK REWARDS',
'BANK TRANSFERS',
'BEAUTY',
'BUILDING AND CONTENTS INSURANCE',
'BURSERY',
'BUSES',
'CAR INSURANCE',
'CAR PARKS',
'CASH DEPOSIT',
'CASH WITHDRAWAL',
'CHARITY',
'CHILD MAINTENANCE',
'CHILDCARE',
'COUNCIL TAX',
'COURT FINES',
'CREDIT CARD',
'DCA',
'DIVIDENDS',
'ENERGY',
'FUEL',
'GAMBLING',
'GROCERIES',
'HEALTH INSURANCE',
'HEALTHCARE',
'HIRE PURCHASE',
'HOME COMMUNICATIONS',
'HOME REPAIRS',
'INTEREST',
'LIFE INSURANCE',
'MECHANICS',
'MOBILE PHONES',
'MORTGAGE',
'OTHER EXPENDITURE',
'OTHER EXPENDITURE REFUNDS',
'OTHER INCOME',
'PETCARE',
'PHARMACIES',
'PRIVATE PENSION',
'PUBS AND BARS',
'RENT',
'RESTAURANT AND CAFE',
'RETAIL',
'RETURNED CHEQUE OR DD',
'ROAD TAX',
'SALARY',
'SAVINGS',
'SECURED LOAN',
'TAKE AWAY',
'TAX',
'TAXI',
'TOLL ROADS',
'TRAINS',
'TV LICENCE',
'UNSECURED LOAN',
'WATER']

from itertools import product
import pandas as pd

pd.DataFrame([list(x) for x in product(c,c) if x[0]> x[1]]).to_excel("output.xlsx")


In [33]:
out_list = []

for i, c_i in enumerate(c):
    for j in range(i+1, len(c)):
        c_j = c[j]
        
        out_list = out_list +[[c_i, c_j]]

In [35]:
pd.DataFrame(out_list).to_excel("output.xlsx")

In [95]:
target = np.random.randint(0,2,(4,214,214)).astype(np.float32)
pred = np.random.uniform(size = (4,214,214))

In [96]:
target.shape

(4, 214, 214)

In [83]:
pred.shape

(4, 214, 214)

### test new loss function

In [120]:
import itertools

#make fake data in np arrays
target = np.random.randint(0,2,(4,214,214)).astype(np.float32)
pred = np.random.uniform(size = (4,214,214))

#permute the targets and broadcast the arrays
perm_targets = np.array([target[np.array(x), :, :] for x in itertools.permutations([0,1,2,3])])
broadcast_preds = np.array([pred for _ in range(24)])

#turn into tensors
broadcast_preds_ten = torch.tensor(broadcast_preds).type(torch.DoubleTensor)
perm_targets_ten = torch.tensor(perm_targets).type(torch.float)

In [121]:
torch.min(torch.mean(F.binary_cross_entropy_with_logits(broadcast_preds_ten, perm_targets_ten, reduction = 'none'), dim = (1,2,3)))

tensor(0.7332)

In [128]:
smooth = 1
intersection = (broadcast_preds_ten*perm_targets_ten).sum(dim=2).sum(dim=2)
loss = (1 - ((2. * intersection + smooth) / (broadcast_preds_ten.sum(dim=2).sum(dim=2) + perm_targets_ten.sum(dim=2).sum(dim=2) + smooth)))

In [132]:
torch.min(torch.mean(loss, dim = 1))

tensor(0.4990, dtype=torch.float64)

In [161]:
#make fake data in np arrays
target = np.random.randint(0,2,(4,214,214)).astype(np.float32)
pred = np.random.uniform(size = (4,214,214))

#permute the targets and broadcast the arrays
perm_targets = np.array([target[np.array(x), :, :] for x in itertools.permutations([0,1,2,3])])
broadcast_preds = np.array([pred for _ in range(24)])

#turn into tensors
broadcast_preds_ten = torch.tensor(broadcast_preds).type(torch.DoubleTensor)
perm_targets_ten = torch.tensor(perm_targets).type(torch.float)


def calc_loss(pred, perm_targets, metrics, bce_weight=0.5):
    
    broadcast_preds = pred.unsqueeze(0).repeat(24,1,1,1)
    
    #bce loss
    bce = torch.min(torch.mean(F.binary_cross_entropy_with_logits(broadcast_preds, perm_targets, reduction = 'none'), dim = (1,2,3)))
   
    #dice loss
    pred = F.sigmoid(pred)    
    smooth = 1
    intersection = (broadcast_preds_ten*perm_targets_ten).sum(dim=2).sum(dim=2)
    loss = (1 - ((2. * intersection + smooth) / (broadcast_preds_ten.sum(dim=2).sum(dim=2) + perm_targets_ten.sum(dim=2).sum(dim=2) + smooth)))
    dice = torch.min(torch.mean(loss, dim = 1))

    loss = bce * bce_weight + dice * (1 - bce_weight)

#     metrics['bce'] += bce.data.cpu().numpy() * target.size(0)
#     metrics['dice'] += dice.data.cpu().numpy() * target.size(0)
#     metrics['loss'] += loss.data.cpu().numpy() * target.size(0)

    return loss

calc_loss(torch.tensor(pred).type(torch.DoubleTensor), perm_targets_ten, defaultdict(float), bce_weight=0.5)

tensor(0.6161, dtype=torch.float64)

### Now do it with batches

In [181]:
batch_size = 8

def get_a_target_arr():
    return np.random.randint(0,2,(4,214,214)).astype(np.float32)

batch_perm_targets_ten = torch.tensor(np.array([np.array([get_a_target_arr()[np.array(x), :, :] for x in itertools.permutations([0,1,2,3])]) for _ in range(num_batches)])).type(torch.float)

batch_perm_targets_ten.shape #(batch_size, permutation_of_labels, labels, image_x, image_y)

batch_pred = torch.tensor(np.random.uniform(size = (batch_size, 4,214,214))).type(torch.DoubleTensor)

batch_pred.shape

torch.Size([8, 4, 214, 214])

In [None]:
broadcast_preds = batch_pred.unsqueeze(1).repeat(1,24,1,1,1)

batch_mins, 

In [207]:
broadcast_preds = batch_pred.unsqueeze(1).repeat(1,24,1,1,1)

batch_mins, _ = torch.min(torch.mean(F.binary_cross_entropy_with_logits(broadcast_preds, batch_perm_targets_ten, reduction = 'none'), dim = (2,3,4)), dim = 1)
bce = torch.mean(batch_mins)

bce

batch_pred = F.sigmoid(batch_pred)    
smooth = 1

# intersection = (broadcast_preds_ten*perm_targets_ten).sum(dim=2).sum(dim=2)
# loss = (1 - ((2. * intersection + smooth) / (broadcast_preds_ten.sum(dim=2).sum(dim=2) + perm_targets_ten.sum(dim=2).sum(dim=2) + smooth)))
# dice = torch.min(torch.mean(loss, dim = 1))

tensor(0.7325)

In [169]:
np.array([[np.random.randint(0,2,(num_batches, 4,214,214))[np.array(x), :, :] for x in itertools.permutations([0,1,2,3])]]).shape

(1, 24, 4, 4, 214, 214)

In [159]:
from collections import defaultdict
defaultdict(float)

defaultdict(float, {})

In [146]:
pred_ten = torch.tensor(pred).type(torch.DoubleTensor)

In [151]:
broadcast_preds_ten = pred_ten.unsqueeze(0).repeat(24,1,1,1)

tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)


In [125]:
broadcast_preds_ten.sum(dim=2).sum(dim=2).shape

torch.Size([24, 4])

In [126]:
broadcast_preds_ten.shape

torch.Size([24, 4, 214, 214])

In [127]:
intersection.shape

torch.Size([24, 4, 214, 214])

In [None]:
loss

In [75]:
broadcast_preds = np.array([pred for _ in range(24)])

In [76]:
broadcast_preds.shape

(24, 4, 214, 214)

In [79]:
perm_targets.shape

(24, 4, 214, 214)

In [109]:
import torch
import torch.nn.functional as F
from Unet.loss import dice_loss

unreduced_loss = F.binary_cross_entropy_with_logits(torch.tensor(pred).type(torch.DoubleTensor), torch.tensor(target).type(torch.float), reduction = 'none')
torch.mean(unreduced_loss, dim = (0,1,2))


tensor(0.7338)

In [106]:
F.binary_cross_entropy_with_logits(torch.tensor(pred).type(torch.DoubleTensor), torch.tensor(target).type(torch.float), reduction = 'mean')

tensor(0.7338)

In [100]:
torch.tensor(target).type(torch.int)

tensor([[[1, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 1, 1, 0],
         [1, 0, 0,  ..., 1, 0, 1],
         ...,
         [1, 0, 0,  ..., 1, 1, 0],
         [0, 0, 1,  ..., 0, 1, 1],
         [1, 1, 1,  ..., 0, 1, 1]],

        [[0, 0, 0,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 1, 0],
         [1, 0, 0,  ..., 1, 1, 1],
         ...,
         [1, 0, 1,  ..., 1, 0, 0],
         [0, 0, 0,  ..., 1, 0, 1],
         [1, 0, 1,  ..., 1, 1, 1]],

        [[0, 1, 0,  ..., 1, 1, 0],
         [1, 0, 1,  ..., 0, 0, 0],
         [1, 0, 0,  ..., 1, 0, 1],
         ...,
         [1, 0, 0,  ..., 1, 1, 0],
         [1, 1, 1,  ..., 1, 0, 1],
         [0, 0, 0,  ..., 1, 0, 1]],

        [[1, 1, 1,  ..., 1, 1, 0],
         [0, 1, 1,  ..., 0, 1, 1],
         [1, 1, 0,  ..., 1, 1, 1],
         ...,
         [1, 1, 1,  ..., 0, 1, 0],
         [1, 0, 1,  ..., 0, 1, 0],
         [0, 1, 0,  ..., 0, 1, 1]]], dtype=torch.int32)

In [99]:
torch.tensor(pred).type(torch.DoubleTensor)

tensor([[[1.7174e-02, 9.4924e-01, 2.8111e-01,  ..., 4.6864e-02,
          6.6449e-01, 1.0025e-01],
         [6.0906e-01, 7.2659e-01, 5.8833e-01,  ..., 4.2880e-01,
          8.3214e-01, 8.9257e-01],
         [8.9081e-01, 4.7198e-01, 3.2546e-02,  ..., 9.7560e-01,
          8.5477e-01, 4.0709e-01],
         ...,
         [8.7397e-02, 2.3085e-01, 8.3511e-01,  ..., 7.5738e-01,
          7.3927e-01, 8.7057e-01],
         [4.1357e-01, 2.0404e-01, 7.4593e-01,  ..., 2.5145e-01,
          6.3137e-01, 5.5587e-01],
         [4.1039e-01, 1.3681e-01, 7.7471e-05,  ..., 3.0337e-01,
          9.2200e-01, 2.1884e-01]],

        [[6.7311e-01, 8.3811e-01, 9.0856e-01,  ..., 3.4986e-02,
          6.2916e-01, 4.3828e-01],
         [7.8609e-01, 3.4771e-02, 1.9450e-01,  ..., 8.6038e-02,
          6.1469e-02, 9.2251e-02],
         [1.9180e-02, 3.9999e-01, 9.0191e-01,  ..., 3.8970e-01,
          5.6312e-01, 9.5934e-01],
         ...,
         [7.6972e-01, 6.7038e-01, 6.5055e-01,  ..., 4.2093e-01,
          1.208

In [112]:
torch.min(torch.mean(F.binary_cross_entropy_with_logits(torch.tensor(broadcast_preds).type(torch.DoubleTensor), torch.tensor(perm_targets).type(torch.float), reduction = 'none'), dim = (1,2,3)))

tensor(0.7339)

In [None]:
def calc_loss(pred, target, metrics, bce_weight=0.5):
    bce = F.binary_cross_entropy_with_logits(pred, target)

    pred = F.sigmoid(pred)
    dice = dice_loss(pred, target)

    loss = bce * bce_weight + dice * (1 - bce_weight)

    metrics['bce'] += bce.data.cpu().numpy() * target.size(0)
    metrics['dice'] += dice.data.cpu().numpy() * target.size(0)
    metrics['loss'] += loss.data.cpu().numpy() * target.size(0)

In [117]:
smooth = 1
intersection = (torch.tensor(broadcast_preds).type(torch.DoubleTensor)*torch.tensor(perm_targets).type(torch.float)).sum(dim=2).sum(dim=2)
loss = (1 - ((2. * intersection + smooth) / (pred.sum(dim=2).sum(dim=2) + target.sum(dim=2).sum(dim=2) + smooth)))

TypeError: _sum() got an unexpected keyword argument 'dim'

In [None]:
def dice_loss(pred, target, smooth = 1.):
    pred = pred.contiguous()
    target = target.contiguous()    

    intersection = (pred * target).sum(dim=2).sum(dim=2)
    
    loss = (1 - ((2. * intersection + smooth) / (pred.sum(dim=2).sum(dim=2) + target.sum(dim=2).sum(dim=2) + smooth)))
    
    return loss.mean()