In [1]:
import toy_data.cov_shift as data_gen
import toy_data as td
import bokeh.io
bokeh.io.output_notebook()

In [2]:
m_sine = td.models.Sine_1D(frequency=3)
#data = data_gen.Gaussian_Shift_1D(m_sine, n_samples=200, tst_ratio=0.5, tr_X_mean=1,
#                                  tst_X_mean_shift=0.5, tr_X_sd=0.5, tst_X_sd=0.3,
#                                  noise_sd=0.3)
data = data_gen.Gaussian_Shift_1D(m_sine)
data_gen.visualize_1D_regression(data)

### Using a naïve least square regression

In [3]:
import numpy as np
import bokeh.plotting as bp

tr = data.tr
tst = data.tst
X_n = tr.X - np.mean(tr.X, 0)
y_n = tr.y - np.mean(tr.y, 0)
w = np.linalg.solve(X_n.T.dot(X_n), y_n.T.dot(X_n))
b = np.mean(tr.y, 0) - w*np.mean(tr.X, 0)

def regress_linear_1D(x):
    return np.ravel(w*x + b)

data_gen.visualize_1D_regression(data, regress_linear_1D)

## Using kernel mean matching 

## 1. Finding reweightging ratio $\beta$

### Finding kernel $k(x_i^{tr}, x_j^{tr})$  and $\kappa_i = \frac{n_{tr}}{n_{te}}\sum_{j=1}^{n_{te}} k(x_i^{tr}, x_j^{te})$ using RBF

In [4]:
def rbf_dot(A, B, sigma=1):
    """
        A, B: sample × feature
    """
    A2 = np.sum(A**2, 1).reshape(1, -1).T
    B2 = np.sum(B**2, 1).reshape(1, -1)
    distance = A2 + B2 - 2*A.dot(B.T)
    return np.exp(-distance/(2*sigma**2))

In [5]:
n_tr = tr.X.shape[0]
n_tst = tst.X.shape[0]
# tr_X = add_offset(tr.X)
# tst_X = add_offset(tst.X)
K = rbf_dot(tr.X, tr.X)
K = (K + K.T)/2
kappa = np.sum(rbf_dot(tr.X, tst.X), axis=1)*n_tr/n_tst

def heat_map(data, height=None, width=None):
    n_y, n_x = data.shape
    print(data.shape)
    p = bp.figure(
        x_range=(0, n_x),
        y_range=(0, n_y),
    )
    if height:
        p.height=height
    if width:
        p.width=width
    p.image(image=[data], dw=[n_x], dh=[n_y], palette="Spectral11")
    bp.show(p)

def plot(X):
    p = bp.figure()
    x = np.ravel(X)
    p.scatter(range(x.size), x)
    bp.show(p)
    
heat_map(K)

(160, 160)


### Formulate the optimization problem

$$
\begin{align*}
    \newcommand{\argmin}{\mathop{\mathrm{argmin}}}
    &\argmin_{\beta} &\frac{1}{n_{tr}^2} \beta^\top K \beta
    - \frac{2}{n_{tr}^2} \kappa^\top \beta \\
    &\text{s.t.} &\beta_i \in [0, B] \\
    & &\lvert \sum_{i=1}^{n_{tr}} \beta_i - n_{tr} \rvert \leq \epsilon
\end{align*}
$$

In [6]:
# B = 500
B = 10
epsilon = B/np.sqrt(n_tr)
# epsilon = np.sqrt(n_tr) - 1/np.sqrt(n_tr)

import cvxopt as opt
    
# X=solvers.qp(P, q, G, h, A, b) attempts to solve the quadratic programming problem:
#              min 0.5*x'*P*x + q'*x   
# subject to:  G*x <= h 
#              A*x = b

_P = opt.matrix(K)
_q = -opt.matrix(kappa*2)
constraint_coefs = (
    -np.eye(n_tr),
    np.eye(n_tr),
    -np.ones(n_tr),
    np.ones(n_tr)
)
_G = opt.matrix(np.vstack(constraint_coefs))
constraints = (
    np.zeros(n_tr),
    np.ones(n_tr)*B,
    np.array(n_tr*(1 - epsilon)).reshape(1),
    np.array(n_tr*(1 + epsilon)).reshape(1)
)
_h = opt.matrix(np.concatenate(constraints))
solution = opt.solvers.qp(_P, _q, _G, _h)
beta = np.array(solution['x'])
plot(beta)
data_gen.visualize_1D_regression_with_tr_weights(data, beta)

     pcost       dcost       gap    pres   dres
 0: -2.7130e+04 -3.0923e+05  6e+05  4e-01  8e-16
 1: -2.6102e+04 -1.7780e+05  2e+05  9e-02  7e-16
 2: -2.5838e+04 -3.9348e+04  1e+04  1e-04  4e-16
 3: -2.8679e+04 -3.2603e+04  4e+03  2e-05  4e-15
 4: -2.9278e+04 -3.1125e+04  2e+03  8e-06  2e-15
 5: -2.9543e+04 -3.0016e+04  5e+02  4e-07  1e-15
 6: -2.9625e+04 -2.9764e+04  1e+02  1e-07  1e-15
 7: -2.9642e+04 -2.9751e+04  1e+02  6e-08  1e-15
 8: -2.9675e+04 -2.9690e+04  1e+01  4e-09  1e-15
 9: -2.9678e+04 -2.9685e+04  7e+00  2e-09  3e-15
10: -2.9680e+04 -2.9683e+04  3e+00  3e-10  2e-15
11: -2.9681e+04 -2.9682e+04  3e-01  2e-11  1e-15
12: -2.9681e+04 -2.9681e+04  4e-03  3e-13  2e-15
Optimal solution found.


## 2. Run linear regression with weights

Let $\bar{\beta}$ be ```diag(```$\beta$ ```)```
$$
\alpha = (\lambda \bar{\beta}^{-1} + K)^{-1} y
$$
$$
(\lambda \bar{\beta}^{-1} + K)\alpha = y
$$
where $\lambda$ is the regularizer

In [7]:
l = 1
# add_offset = lambda X: np.hstack((X, np.ones((X.shape[0], 1))))
beta_inverse = np.diag(1/np.ravel(beta))
alpha_kmm = np.linalg.solve(l*beta_inverse + K, tr.y)

$$
\hat y^\prime = y^T (K + \lambda \beta)^{-1} \kappa 
$$
$$
\hat y^\prime = \alpha \kappa 
$$

In [8]:
def regress_1D_kmm(x):
    X = x.reshape(-1, 1)
    n_X = x.shape[0]
    kappa = rbf_dot(tr.X, X)
    return np.ravel(kappa.T.dot(alpha_kmm))

data_gen.visualize_1D_regression(data, regress_1D_kmm)

### Try kernel regresion without weights

In [9]:
l = 1
alpha = np.linalg.solve(l*np.eye(n_tr) + K, tr.y)

def regress_1D_kernel_ridge(x):
    X = x.reshape(-1, 1)
    n_X = x.shape[0]
    kappa = rbf_dot(tr.X, X)
    return np.ravel(kappa.T.dot(alpha))

data_gen.visualize_1D_regression(data, regress_1D_kernel_ridge)