In [1]:
import torch
import numpy as np

## Task1
The goal is to let you see the power of broadcasting for speeding up computations. Also to see that you can use pytorch with GPU to speed up other
computations than aged deep learning.

You have seen in the linear regression lecture an RBF kernel which is a matrix

$$
\phi\left(x_{i}, t_{j}\right)=\exp \left(-\frac{\|X[i, :]-T[j, \cdot]\|^{2}}{\gamma}\right)
$$

Inside is a l2 distance matrix between $X[i, :]$ and $T [j, :]$, this is actually a so-
called RBF kernel, shapes are:

$$X.size() = (N, D)$$
$$T.size() = (P, D)$$

Use pytorch to compute
$$k\left(x_{i}, z_{j}\right)=\sum_{d} \min \left(x_{i}^{(d)}, z_{j}^{(d)}\right)$$

the hik-kernel matrix in pytorch. 

- X are features from one dataset with dimensionality D and sample size N . 
- Z are features from another dataset with dimensionality D and sample size L.

In [12]:
# data
numdata1=2500
numdata2=500
dims=30

# genarate some random histogram data
feats1=np.random.normal(size=(numdata1,dims))**2
feats2=np.random.normal(size=(numdata2,dims))**2

print(feats1.shape, feats2.shape)
# row , col

(2500, 30) (500, 30)


In [13]:
feats1=feats1/np.sum(feats1,axis=1)[:,np.newaxis]
feats2=feats2/np.sum(feats2,axis=1)[:,np.newaxis]
print(feats1.shape, feats2.shape)

(2500, 30) (500, 30)


In [25]:
def forloopdists(feats1,feats2):
    assert feats1.shape[1] == feats2.shape[1]
    N, D = feats1.shape
    L, D = feats2.shape
    
    hik_kernel = np.zeros((N, L))
    for i in range(N):
        for j in range(L):
            x1 = feats1[i]
            x2 = feats2[j]
            
            dist = 0
            for d in range(D):
                dist += min(x1[d], x2[d])

            hik_kernel[i][j] = dist
    return hik_kernel

In [26]:
k_forloop = forloopdists(feats1,feats2)

In [28]:
k_forloop

array([[0.3933693 , 0.30999872, 0.36893095, ..., 0.3790115 , 0.45168505,
        0.3608769 ],
       [0.30651823, 0.28050431, 0.32435223, ..., 0.37838237, 0.23918952,
        0.2705609 ],
       [0.39620163, 0.29213952, 0.50180438, ..., 0.39440891, 0.34903443,
        0.4019443 ],
       ...,
       [0.49728528, 0.38309871, 0.44581552, ..., 0.45918649, 0.24001065,
        0.38618867],
       [0.4271406 , 0.30888384, 0.37961515, ..., 0.47824132, 0.31815212,
        0.2754624 ],
       [0.38110014, 0.44029108, 0.3280549 , ..., 0.41341018, 0.34257043,
        0.3868733 ]])

In [None]:
def pytorchdists(feats1,feats2,device):
    

    return dist.cpu().numpy()