In [3]:
import wandb
from src.models.TAE import TAE
from src.datasets.polynomial import PolynomialDataset
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn.functional as F
import torch.nn as nn
from sklearn.cluster import AgglomerativeClustering

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
run = wandb.init(project='DTC', name='test')

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mtristanbester1[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
model_artifact = run.use_artifact('tristanbester1/DTC/TAE-3qwd8j4j:v999', type='model')
model_artifact_dir = model_artifact.download()

In [6]:
model = TAE(
        input_dim=1,
        seq_len=100,
        cnn_kernel=10,
        cnn_stride=3,
        mp_kernel=10,
        mp_stride=3,
        lstm_hidden_dim=8,
        upsample_scale=2,
        deconv_kernel=10,
        deconv_stride=6,
    )

In [7]:
model.load_state_dict(torch.load('/Users/tristan/Documents/CS/Research/DTC/artifacts/TAE-3qwd8j4j:v999/model.pt', map_location=torch.device('cpu')))    

<All keys matched successfully>

In [8]:
dataset = PolynomialDataset(
    run,
    "tristanbester1/DTC/polynomial_dataset_X:v0",
    "tristanbester1/DTC/polynomial_dataset_Y:v0",
)

In [9]:
data_loader = DataLoader(dataset, 1)

In [10]:
preds = []

for x, _ in data_loader:
    pred = model(x)
    preds.append(pred.detach())
preds = torch.cat(preds)

In [11]:
def euclidean_distance_matrix(x):
    '''(batch_size, n_time_steps, n_dim)'''
    a = x.squeeze(-1).repeat(x.shape[0], 1, 1)
    b = x.repeat(1,1,x.shape[0]).permute(0, 2, 1)
    return torch.sum((a-b) ** 2, dim=2)

In [12]:
x = euclidean_distance_matrix(preds)

In [13]:
x.shape

torch.Size([300, 300])

In [14]:
preds.shape

torch.Size([300, 100, 1])

In [15]:
cluster_algo = AgglomerativeClustering(
    n_clusters=3,
    affinity='precomputed',
    linkage='complete',
)

In [16]:
assignments = cluster_algo.fit_predict(x)

In [17]:
centroids = []

for i in np.unique(assignments):
    centroid = preds[assignments == i].mean(dim = 1).unsqueeze(0)
    centroids.append(centroid)

centroids = torch.cat(centroids)    

In [18]:
centroids.shape

torch.Size([3, 100, 1])

In [84]:
def euclidean_distance(x, y):
    '''
    Return (x.shape[0], y.shape[0]) matrix where each element is d(x_i, y_i) 
    where x_i is the i-th time series in x => x_i = x[i].
    '''
    a = x.repeat(1,1,y.shape[0]).permute(0,2,1)
    b = y.repeat(x.shape[0],1,1).reshape(a.shape)
    return torch.sqrt(torch.sum((a - b)**2, dim=2))

def complexity_estimate(x):
    x_back_shift = x[:, :-1]
    x_forward_shift = x[:, 1:]
    return torch.sqrt(torch.sum((x_forward_shift - x_back_shift)**2))

def complexitity_factor(x,y):
    ce = torch.tensor([complexity_estimate(x), complexity_estimate(y)])
    return torch.max(ce) / torch.min(ce)

def complexity_invariant_similarity(x, y):
    ed = euclidean_distance(x, y)
    cf = complexitity_factor(x, y)
    return ed * cf

def correlation_based_similarity(x, y):
    t = torch.vstack(
        (
            x.squeeze(2),
            y.squeeze(2)
        )
    )
    p = torch.corrcoef(t)[0,1]
    return torch.sqrt(2 * (1 - p))

In [85]:
x.squeeze(2)

tensor([[ 0., 10.,  3.,  2.]])

In [86]:
y.squeeze(2)

tensor([[11.,  1., 21.,  3.]])

In [87]:
t = torch.vstack((x.squeeze(2), y.squeeze(2)))
t

tensor([[ 0., 10.,  3.,  2.],
        [11.,  1., 21.,  3.]])

In [88]:
p = torch.corrcoef(t)[0, 1]

In [89]:
torch.sqrt(2 * (1 - p))

tensor(1.7158)

In [90]:
correlation_based_similarity(x, y)

tensor(1.7158)

In [58]:
torch.corrcoef(torch.cat((x.flatten(), y.flatten())))

tensor(1.)

In [69]:
x = torch.tensor([
    [[0.0],[10],[3], [2]],
])
y = torch.tensor([
    [[11.0],[1],[21], [3]],
])

In [27]:
euclidean_distance(x, y)

tensor([[22.9565]])

In [187]:
def euclidean_distance(x, y):
    '''
    Return (x.shape[0], y.shape[0]) matrix where each element is d(x_i, y_i) 
    where x_i is the i-th time series in x => x_i = x[i].
    '''
    a = x.repeat(1,1,y.shape[0]).permute(0,2,1)
    b = y.repeat(x.shape[0],1,1).reshape(a.shape)
    return torch.sum((a - b)**2, dim=2)

def students_t_distribution_kernel(x, alpha):
    num = torch.pow((1 + x/alpha), -(alpha+1)/2)
    denom = num.sum(dim=1).reshape(-1,1).repeat(1, 2)
    return num / denom

def target_distribution(Q):
    F = Q.sum(dim=0)
    num = (Q**2) / F
    denom = num.sum(dim=1).reshape(-1,1).repeat(1,2)
    return num / denom

In [188]:
d = euclidean_distance(x, centroids).to(torch.float)

In [189]:
d

tensor([[0., 3.],
        [3., 0.]])

In [190]:
alpha=3

In [191]:

num

tensor([[0.4784, 0.0150],
        [0.0299, 0.2404],
        [0.0142, 0.2792],
        [0.0299, 0.2404]])

In [192]:
Q = students_t_distribution_kernel(d, 3)

In [193]:
F = Q.sum(dim=0)
F

tensor([1., 1.])

In [194]:
Q ** 2

tensor([[0.6400, 0.0400],
        [0.0400, 0.6400]])

In [195]:
num = (Q**2) / F
num


tensor([[0.6400, 0.0400],
        [0.0400, 0.6400]])

In [196]:
denom = num.sum(dim=1).reshape(-1,1).repeat(1,2)
denom

tensor([[0.6800, 0.6800],
        [0.6800, 0.6800]])

In [197]:
num / denom

tensor([[0.9412, 0.0588],
        [0.0588, 0.9412]])

In [198]:
P = target_distribution(Q)

In [199]:
P

tensor([[0.9412, 0.0588],
        [0.0588, 0.9412]])

In [200]:
log_P = torch.log(P)
log_Q = torch.log(Q)

In [201]:
loss = nn.KLDivLoss(log_target=True, reduction='batchmean')

In [202]:
loss(log_Q, log_P)

tensor(0.0810)

In [203]:
P

tensor([[0.9412, 0.0588],
        [0.0588, 0.9412]])

In [204]:
Q

tensor([[0.8000, 0.2000],
        [0.2000, 0.8000]])

In [205]:
log_P

tensor([[-0.0606, -2.8332],
        [-2.8332, -0.0606]])

In [206]:
log_Q

tensor([[-0.2231, -1.6094],
        [-1.6094, -0.2231]])

In [95]:
centroids.shape

torch.Size([2, 3, 1])

In [185]:
x = torch.tensor([
    [[0],[0],[0]],
    [[1],[1],[1]],
])

In [109]:
x = torch.tensor([
    [[0],[0],[0]],
    [[1],[1],[1]],
    [[2],[2],[2]],
    [[3],[3],[3]],
])

In [186]:
centroids = torch.tensor([
    [[0],[0],[0]],
    [[1],[1],[1]],
])

In [69]:
n_centroids = 2
batch_size = 4

In [70]:
a = x.repeat(1,1,n_centroids).permute(0,2,1)
a

tensor([[[0, 0, 0],
         [0, 0, 0]],

        [[1, 1, 1],
         [1, 1, 1]],

        [[2, 2, 2],
         [2, 2, 2]],

        [[3, 3, 3],
         [3, 3, 3]]])

In [92]:
b = centroids.repeat(batch_size,1,1).reshape(a.shape)
b

tensor([[[0, 0, 0],
         [1, 1, 1]],

        [[0, 0, 0],
         [1, 1, 1]],

        [[0, 0, 0],
         [1, 1, 1]],

        [[0, 0, 0],
         [1, 1, 1]]])

In [93]:
a.shape

torch.Size([4, 2, 3])

In [94]:
b.shape

torch.Size([4, 2, 3])

In [133]:
x.shape

torch.Size([300, 100, 1])

In [17]:
x = torch.tensor([
    [[0],[0],[0]],
    [[1],[1],[1]],
    [[2],[2],[2]],
    [[3],[3],[3]],
])
batch_size = 4
n_time_steps = 3
n_dims = 1

In [18]:
x.shape

torch.Size([4, 3, 1])

In [19]:
x.squeeze(-1)

tensor([[0, 0, 0],
        [1, 1, 1],
        [2, 2, 2],
        [3, 3, 3]])

In [20]:
a = x.squeeze(-1).repeat(batch_size,1,1)

In [21]:
a

tensor([[[0, 0, 0],
         [1, 1, 1],
         [2, 2, 2],
         [3, 3, 3]],

        [[0, 0, 0],
         [1, 1, 1],
         [2, 2, 2],
         [3, 3, 3]],

        [[0, 0, 0],
         [1, 1, 1],
         [2, 2, 2],
         [3, 3, 3]],

        [[0, 0, 0],
         [1, 1, 1],
         [2, 2, 2],
         [3, 3, 3]]])

In [22]:
a.shape

torch.Size([4, 4, 3])

In [28]:
c = x.repeat(1,1,batch_size).permute(0, 2, 1)

In [29]:
c.permute(0, 2, 1)

tensor([[[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[1, 1, 1, 1],
         [1, 1, 1, 1],
         [1, 1, 1, 1]],

        [[2, 2, 2, 2],
         [2, 2, 2, 2],
         [2, 2, 2, 2]],

        [[3, 3, 3, 3],
         [3, 3, 3, 3],
         [3, 3, 3, 3]]])

In [179]:
b = x.repeat(1,1,batch_size).squeeze(-1).reshape(batch_size,batch_size,n_time_steps)

In [180]:
a

tensor([[[0, 0, 0],
         [1, 1, 1],
         [2, 2, 2],
         [3, 3, 3]],

        [[0, 0, 0],
         [1, 1, 1],
         [2, 2, 2],
         [3, 3, 3]],

        [[0, 0, 0],
         [1, 1, 1],
         [2, 2, 2],
         [3, 3, 3]],

        [[0, 0, 0],
         [1, 1, 1],
         [2, 2, 2],
         [3, 3, 3]]])

In [181]:
b

tensor([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],

        [[1, 1, 1],
         [1, 1, 1],
         [1, 1, 1],
         [1, 1, 1]],

        [[2, 2, 2],
         [2, 2, 2],
         [2, 2, 2],
         [2, 2, 2]],

        [[3, 3, 3],
         [3, 3, 3],
         [3, 3, 3],
         [3, 3, 3]]])

In [30]:
((a - c) ** 2).sum(dim=2)

tensor([[ 0,  3, 12, 27],
        [ 3,  0,  3, 12],
        [12,  3,  0,  3],
        [27, 12,  3,  0]])

In [None]:
x

In [29]:
X = preds

In [45]:
def euclidean(x, y):
    return torch.sum((x-y) ** 2)

In [81]:
x = torch.tensor([
    [[0,0,0]],
    [[1,1,1]],
    [[2,2,2]],
])

In [98]:
x.shape

torch.Size([3, 1, 3])

In [108]:
x_1 = x.repeat(1, 3, 1).reshape(9, 1, 3)

In [109]:
x_2 = x.repeat(3, 1, 1)

In [113]:
torch.sum((x_1 - x_2) ** 2, dim=2).reshape(3,3)

tensor([[ 0,  3, 12],
        [ 3,  0,  3],
        [12,  3,  0]])

In [186]:
print(torch.sum((x[0] - x[0]) ** 2))
print(torch.sum((x[0] - x[1]) ** 2))
print(torch.sum((x[0] - x[2]) ** 2))
print(torch.sum((x[0] - x[3]) ** 2))

tensor(0)
tensor(3)
tensor(12)
tensor(27)


In [83]:
t_1 = x[0]

In [90]:
torch.sum((t_1.expand(3, 1, 3) - x) ** 2, dim=2)

tensor([[ 0],
        [ 3],
        [12]])

In [82]:
x - x

tensor([[[0, 0, 0]],

        [[0, 0, 0]],

        [[0, 0, 0]]])

In [74]:
z = x.expand((x.shape[0], x.shape[0], 3))

In [75]:
mse = torch.sqrt(torch.sum((z - x.unsqueeze(1)) ** 2, dim=2))

In [76]:
mse

tensor([[[0.0000, 0.0000, 0.0000],
         [1.4142, 1.4142, 1.4142]],

        [[1.4142, 1.4142, 1.4142],
         [0.0000, 0.0000, 0.0000]]])

In [77]:
torch.transpose(mse, 0, 1)

tensor([[[0.0000, 0.0000, 0.0000],
         [1.4142, 1.4142, 1.4142]],

        [[1.4142, 1.4142, 1.4142],
         [0.0000, 0.0000, 0.0000]]])

In [33]:
cluster_algo = AgglomerativeClustering(
    n_clusters=3,
    affinity='euclidean',
    linkage='complete',
)

In [34]:
cluster_algo.fit_predict(X)

ValueError: Found array with dim 3. AgglomerativeClustering expected <= 2.