In [1]:
import torch
import numpy as np

from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cuda', index=0)

In [3]:
class GG:
    def __init__(self) -> None:
        pass

    def torch(self, X: np.ndarray, p: float = 2, progress = False):
        X = torch.Tensor(X).to(DEVICE)
        n = X.shape[0]
        F = torch.cdist(X, X, p = p)**p
        F.fill_diagonal_(float('inf'))

        adj = torch.zeros((n,n), dtype=torch.bool).to(DEVICE)
        if progress:
          iterador = tqdm(range(n-1))
        else:
          iterador = range(n-1)
        for i in iterador:
            A = F[i]+F[i+1:]
            idx_min = torch.argmin(A, axis=1)
            a = A[torch.arange(A.shape[0]), idx_min] - F[i, i+1:]
            adj[i, i+1:] = torch.where(a > 0, 1, 0)
        adj = adj + adj.T
        return adj.cpu()

    def divorciado(self, X_train: np.ndarray, X_test: np.ndarray, p: float = 2, progress = False):
        X_train = torch.Tensor(X_train).to(DEVICE)
        X_test = torch.Tensor(X_test).to(DEVICE)
        n = X_train.shape[0]
        N = X_test.shape[0]
        F = torch.cdist(X_train, X_train, p = p)**p
        F.fill_diagonal_(float('inf'))
        Ft = torch.cdist(X_test, X_train, p = p)**p

        adj = torch.zeros((N, n), dtype=torch.bool).to(DEVICE)
        if progress:
          iterador = tqdm(range(N))
        else:
          iterador = range(N)
        for i in iterador:
            A = Ft[i] + F
            idx_min = torch.argmin(A, axis=1)
            a = A[torch.arange(A.shape[0]), idx_min] - Ft[i]
            adj[i, :] = torch.where(a > 0, 1, 0)
        return adj.cpu()

In [4]:
H_train = torch.load('data/H_train.pt')
H_test = torch.load('data/H_test.pt')

In [6]:
# p = 16
# Ft = torch.cdist(H_test.to(DEVICE), H_train.to(DEVICE), p = p)**p
# torch.sum(torch.isinf(Ft)) / 50000 / 10000, torch.sum(Ft == 0) / 50000 / 10000

(tensor(0., device='cuda:0'), tensor(0., device='cuda:0'))

In [5]:
ggclass = GG()
p = 128

tol = 0.01
eta = 0.5
K = int(np.log(tol) / np.log(1 - eta**2))

N = H_train.shape[0]
btsz = int(N * eta)

In [6]:
idx = np.arange(N)
adj = torch.ones((N, N), dtype=torch.bool)
for epoch in (range(K)):
    print(epoch)
    np.random.shuffle(idx)
    for b in range(0, N, btsz):
        idx_batch = idx[b:min(b+btsz, N)]
        X_batch = H_train[idx_batch, :]
        adj[np.ix_(idx_batch, idx_batch)] *= ggclass.torch(X_batch / 4, p = p, progress = True)

0


100%|██████████| 24999/24999 [02:36<00:00, 159.35it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.32it/s] 


1


100%|██████████| 24999/24999 [02:36<00:00, 159.33it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.38it/s] 


2


100%|██████████| 24999/24999 [02:36<00:00, 159.41it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.38it/s] 


3


100%|██████████| 24999/24999 [02:36<00:00, 159.38it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.40it/s] 


4


100%|██████████| 24999/24999 [02:36<00:00, 159.42it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.38it/s] 


5


100%|██████████| 24999/24999 [02:36<00:00, 159.39it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.39it/s] 


6


100%|██████████| 24999/24999 [02:36<00:00, 159.42it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.43it/s] 


7


100%|██████████| 24999/24999 [02:36<00:00, 159.37it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.39it/s] 


8


100%|██████████| 24999/24999 [02:36<00:00, 159.42it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.36it/s] 


9


100%|██████████| 24999/24999 [02:36<00:00, 159.39it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.41it/s] 


10


100%|██████████| 24999/24999 [02:37<00:00, 158.99it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.40it/s] 


11


100%|██████████| 24999/24999 [02:36<00:00, 159.33it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.40it/s] 


12


100%|██████████| 24999/24999 [02:36<00:00, 159.38it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.42it/s] 


13


100%|██████████| 24999/24999 [02:36<00:00, 159.38it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.38it/s] 


14


100%|██████████| 24999/24999 [02:36<00:00, 159.41it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.38it/s] 


15


100%|██████████| 24999/24999 [02:36<00:00, 159.41it/s] 
100%|██████████| 24999/24999 [02:36<00:00, 159.38it/s] 


In [8]:
torch.save(adj, 'data/gg_train_bootstrap.pt')

In [5]:
ggclass = GG()
p = 5

tol = 0.01
eta = 0.5
K = int(np.log(tol) / np.log(1 - eta)) + 1

N = H_train.shape[0]
n = H_test.shape[0]
btsz = int(N * eta)
K

7

In [6]:
idx = np.arange(N)
adjb = torch.ones((n, N), dtype=torch.bool)
for epoch in (range(K)):
    print(epoch)
    np.random.shuffle(idx)
    for b in range(0, N, btsz):
        idx_batch = idx[b:min(b+btsz, N)]
        X_batch = H_train[idx_batch, :]
        # adjb[:, idx_batch] *= ggclass.divorciado(X_batch / 4, H_test / 4, p = p, progress = True)
        adjb[:, idx_batch] *= ggclass.divorciado(X_batch, H_test, p = p, progress = True)

0


100%|██████████| 10000/10000 [02:03<00:00, 80.86it/s]
100%|██████████| 10000/10000 [02:03<00:00, 80.86it/s]


1


100%|██████████| 10000/10000 [02:03<00:00, 80.85it/s]
100%|██████████| 10000/10000 [02:03<00:00, 80.85it/s]


2


100%|██████████| 10000/10000 [02:03<00:00, 80.86it/s]
100%|██████████| 10000/10000 [02:03<00:00, 80.88it/s]


3


100%|██████████| 10000/10000 [02:03<00:00, 80.86it/s]
100%|██████████| 10000/10000 [02:03<00:00, 80.86it/s]


4


100%|██████████| 10000/10000 [02:03<00:00, 80.86it/s]
100%|██████████| 10000/10000 [02:03<00:00, 80.86it/s]


5


100%|██████████| 10000/10000 [02:03<00:00, 80.86it/s]
100%|██████████| 10000/10000 [02:03<00:00, 80.86it/s]


6


100%|██████████| 10000/10000 [02:03<00:00, 80.86it/s]
100%|██████████| 10000/10000 [02:03<00:00, 80.86it/s]


In [7]:
torch.save(adjb, 'data/gg_test_bootstrap_05.pt')

In [29]:
# X = torch.Tensor(H_train[:1000, :]).to(DEVICE)
# X.shape

torch.Size([1000, 512])

In [42]:
# F = torch.cdist(X / 4, X / 4, p = p)**p

In [43]:
# (F == 0).sum(), torch.isinf(F).sum() / 100 / 100

(tensor(1012, device='cuda:0'), tensor(0., device='cuda:0'))

In [44]:
# F = torch.cdist(X / 4, torch.Tensor(H_test[:1000, :]).to(DEVICE) / 4, p = p)**p

In [45]:
# (F == 0).sum(), torch.isinf(F).sum()

(tensor(31, device='cuda:0'), tensor(0, device='cuda:0'))