In [93]:
import toy_data.cov_shift as data_gen
import toy_data as td
import bokeh.io
import numpy as np
import bokeh.plotting as bp
import tensorflow as tf
bokeh.io.output_notebook()

In [2]:
# m_ = td.models.LinearBinary(2)
m_ = td.models.rotatedSine2D(phase=np.pi/2, frequency=1)
data = data_gen.Gaussian_Shift_2D_BinaryClassification(m_, tst_X_mean_shift=(-1.9, -1.0), noise_sd=0.2)
data_gen.visualize_2D_classification(data, classifyF=m_, fig_width=800)

### Using logistic regression for classification

In [4]:
tr = data.tr
tst = data.tst

x = tf.placeholder(tf.float32, [None, 2])
W = tf.Variable(tf.random_uniform(shape=[2, 1]), dtype=tf.float32)
b = tf.Variable(tf.random_uniform(shape=[1]), dtype=tf.float32)
logits = tf.matmul(x, W) + b
y = tf.nn.sigmoid(logits)
y_ = tf.placeholder(tf.float32, [None, 1])

loss1 = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=y_)
cross_entropy = tf.reduce_mean(loss1)
# train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
train_step = tf.train.RMSPropOptimizer(0.01).minimize(cross_entropy)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

for i in range(1000):
    sess.run(train_step, feed_dict={x: tr.X, y_: tr.y})
    
correct_prediction = tf.equal(y > 0.5, y_>0.5)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(cross_entropy, feed_dict={x: tr.X, y_: tr.y}))
print(sess.run(accuracy, feed_dict={x: tst.X, y_: tst.y}))

def classifierF(X):
    return sess.run(y, feed_dict={x: X})

data_gen.visualize_2D_classification(data, classifyF=classifierF, fig_width=800)

0.298249
0.625


## Using kernel mean matching 

## 1. Finding reweightging ratio $\beta$

### Finding kernel $k(x_i^{tr}, x_j^{tr})$  and $\kappa_i = \frac{n_{tr}}{n_{te}}\sum_{j=1}^{n_{te}} k(x_i^{tr}, x_j^{te})$ using RBF

In [5]:
def rbf_dot(A, B, sigma=1):
    """
        A, B: sample × feature
    """
    A2 = np.sum(A**2, 1).reshape(1, -1).T
    B2 = np.sum(B**2, 1).reshape(1, -1)
    distance = A2 + B2 - 2*A.dot(B.T)
    return np.exp(-distance/(2*sigma**2))

In [8]:
n_tr = tr.X.shape[0]
n_tst = tst.X.shape[0]
# tr_X = add_offset(tr.X)
# tst_X = add_offset(tst.X)
K = rbf_dot(tr.X, tr.X)
K = (K + K.T)/2
kappa = np.sum(rbf_dot(tr.X, tst.X), axis=1)*n_tr/n_tst

def heat_map(data, height=None, width=None):
    n_y, n_x = data.shape
    print(data.shape)
    p = bp.figure(
        x_range=(0, n_x),
        y_range=(0, n_y),
    )
    if height:
        p.height=height
    if width:
        p.width=width
    p.image(image=[data], x=0, y=0, dw=[n_x], dh=[n_y], palette="Spectral11")
    bp.show(p)

def plot(X):
    p = bp.figure()
    x = np.ravel(X)
    p.scatter(range(x.size), x)
    bp.show(p)
    
heat_map(K)

(160, 160)


### Formulate the optimization problem

$$
\begin{align*}
    \newcommand{\argmin}{\mathop{\mathrm{argmin}}}
    &\argmin_{\beta} &\frac{1}{n_{tr}^2} \beta^\top K \beta
    - \frac{2}{n_{tr}^2} \kappa^\top \beta \\
    &\text{s.t.} &\beta_i \in [0, B] \\
    & &\lvert \sum_{i=1}^{n_{tr}} \beta_i - n_{tr} \rvert \leq \epsilon
\end{align*}
$$

In [9]:
# B = 500
B = 10
epsilon = B/np.sqrt(n_tr)
# epsilon = np.sqrt(n_tr) - 1/np.sqrt(n_tr)

import cvxopt as opt
    
# X=solvers.qp(P, q, G, h, A, b) attempts to solve the quadratic programming problem:
#              min 0.5*x'*P*x + q'*x   
# subject to:  G*x <= h 
#              A*x = b

_P = opt.matrix(K)
_q = -opt.matrix(kappa*2)
constraint_coefs = (
    -np.eye(n_tr),
    np.eye(n_tr),
    -np.ones(n_tr),
    np.ones(n_tr)
)
_G = opt.matrix(np.vstack(constraint_coefs))
constraints = (
    np.zeros(n_tr),
    np.ones(n_tr)*B,
    np.array(n_tr*(1 - epsilon)).reshape(1),
    np.array(n_tr*(1 + epsilon)).reshape(1)
)
_h = opt.matrix(np.concatenate(constraints))
solution = opt.solvers.qp(_P, _q, _G, _h)
beta = np.array(solution['x'])
plot(beta)
data_gen.visualize_2D_classification_with_tr_weights(data, beta)

     pcost       dcost       gap    pres   dres
 0: -1.8188e+04 -2.8251e+05  5e+05  3e-01  5e-16
 1: -1.7825e+04 -1.9563e+05  3e+05  1e-01  9e-16
 2: -1.6062e+04 -5.0399e+04  3e+04  6e-16  8e-16
 3: -1.8343e+04 -2.4628e+04  6e+03  6e-16  1e-15
 4: -1.9023e+04 -2.1502e+04  2e+03  4e-16  9e-16
 5: -1.9310e+04 -2.0021e+04  7e+02  7e-16  5e-16
 6: -1.9438e+04 -1.9579e+04  1e+02  2e-15  2e-15
 7: -1.9468e+04 -1.9512e+04  4e+01  3e-16  3e-15
 8: -1.9480e+04 -1.9492e+04  1e+01  1e-15  2e-14
 9: -1.9484e+04 -1.9486e+04  2e+00  6e-16  2e-14
10: -1.9485e+04 -1.9485e+04  6e-01  1e-15  5e-14
11: -1.9485e+04 -1.9485e+04  1e-01  3e-16  8e-14
12: -1.9485e+04 -1.9485e+04  7e-03  1e-15  2e-13
Optimal solution found.


In [None]:
# KMM 

## 2. Run logistic regression with weights

$$
\begin{align*}
    &\text{minimize}_{\theta} \sum_{i=1}^{n_{tr}} - \beta_i \log p(y_i^{tr}|x_i^{tr}, \theta)\\
    =&\text{minimize}_{\theta} \sum_{i=1}^{n_{tr}} - \beta_i \log I(y_i = \hat{y_i})\\
    =&\text{minimize}_{\theta} \sum_{i=1}^{n_{tr}} - \beta_i (\log \frac{1}{1+e^{-\theta X_i}} I(y_i = 1)
    + \log (1 - \frac{1}{1+e^{-\theta X_i}}) I(y_i = 0))\\ 
\end{align*}
$$

In [11]:
x = tf.placeholder(tf.float32, [None, 2])
W = tf.Variable(tf.random_uniform(shape=[2, 1]), dtype=tf.float32)
b = tf.Variable(tf.random_uniform(shape=[1]), dtype=tf.float32)
logits = tf.matmul(x, W) + b
y = tf.nn.sigmoid(logits)
y_ = tf.placeholder(tf.float32, [None, 1])
sample_weights = tf.placeholder(tf.float32, [None, 1])

loss1 = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=y_)
cross_entropy = tf.reduce_mean(sample_weights*loss1)
# train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
train_step = tf.train.RMSPropOptimizer(0.01).minimize(cross_entropy)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

for i in range(1000):
    sess.run(train_step, feed_dict={x: tr.X, y_: tr.y, sample_weights: beta})
    
correct_prediction = tf.equal(y > 0.5, y_>0.5)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(cross_entropy, feed_dict={x: tr.X, y_: tr.y, sample_weights: beta}))
print(sess.run(accuracy, feed_dict={x: tst.X, y_: tst.y, sample_weights: beta}))

def classifierF(X):
    return sess.run(y, feed_dict={x: X, sample_weights: beta})

data_gen.visualize_2D_classification_with_tr_weights(data, beta, classifyF=classifierF, fig_width=800)

0.296874
0.9


### RBA

#### Density estimation using logistic regression
\begin{align}
    \frac{P(X_{\text{tr}})}{P(X_{\text{tst}})} = \frac{P(X\mid s=\text{tr})}{P(X\mid s=\text{tst})}
    &= \frac{\frac{P(s=\text{tr}\mid X)P(X)}{P(s=\text{tr})}}{\frac{P(s=\text{tst}\mid X)P(X)}{P(s=\text{tst})}} \\
    &= \frac{P(s=\text{tr}\mid X)P(s=\text{tst})}{P(s=\text{tst}\mid X)P(s=\text{tr})} \\
    &= \frac{P(s=\text{tst})}{P(s=\text{tr})}\frac{1-P(s=\text{tst}\mid X)}{P(s=\text{tst}\mid X)} \\
    &= \frac{P(s=\text{tst})}{P(s=\text{tr})}(\frac{1}{P(s=\text{tst}\mid X)} - 1)
\end{align}

Hence we can find $P(s=\text{tst}\mid X)$ first and then calculate the ratio from it
\begin{align}
\text{minimize}_{w} -\log p(s=\text{tst}\mid x; w),
\end{align}
which is logistic regression on $x, s$

We just reuse the previous LR model here:

In [83]:
n_tr = tr.y.shape[0]
n_tst = tst.y.shape[0]

x = tf.placeholder(tf.float32, [None, 2])
W = tf.Variable(tf.random_uniform(shape=[2, 1]), dtype=tf.float32)
b = tf.Variable(tf.random_uniform(shape=[1]), dtype=tf.float32)
logits = tf.matmul(x, W) + b
y = tf.nn.sigmoid(logits)
y_ = tf.placeholder(tf.float32, [None, 1])

cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=y_)
# loss = tf.reduce_mean(cross_entropy + tf.nn.l2_loss(W))
loss = tf.reduce_mean(cross_entropy)
# train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
train_step = tf.train.RMSPropOptimizer(0.01).minimize(cross_entropy)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

s = np.concatenate((np.zeros(n_tr), np.ones(n_tst))).reshape(-1, 1)

for i in range(1000):
    sess.run(train_step, feed_dict={x: np.vstack((tr.X, tst.X)), y_: s})
    
correct_prediction = tf.equal(y > 0.5, y_>0.5)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(loss, feed_dict={x: np.vstack((tr.X, tst.X)), y_: s}))
print(sess.run(accuracy, feed_dict={x: np.vstack((tr.X, tst.X)), y_: s}))

def logi_classify_F(X):
    return sess.run(y, feed_dict={x: X})

data_gen.visualize_2D_classification(data, classifyF=logi_classify_F, fig_width=400)

0.363683
0.82


* The the ratio is: 
\begin{align}
    &\frac{P(s=\text{tst})}{P(s=\text{tr})}(\frac{1}{P(s=\text{tst}\mid X)} - 1) \\
    &=\frac{P(s=\text{tst})}{P(s=\text{tr})}(\frac{1}{\text{sigmoid}(-wX)} - 1) \\
    &=\frac{P(s=\text{tst})}{P(s=\text{tr})}e^{-wX}
\end{align}

In [97]:
def logi_reweight_F(X):
    _R = sess.run(n_tst/n_tr*(tf.exp(logits)), feed_dict={x: X})
    # _R = sess.run(n_tst/n_tr*(tf.exp(-logits)), feed_dict={x: X})
    return _R
z = np.array([[-2,2]]).T.T
logi_reweight_F(z)

array([[ 0.06622934]], dtype=float32)

In [98]:
p = bp.figure()
p.circle(range(0, n_tst), np.ravel(1/logi_reweight_F(tst.X)), color='red')
p.circle(range(0, n_tr), np.ravel(1/logi_reweight_F(tr.X)), color='blue')
bp.show(p)

In [123]:
min_eps = 0.0001
max_iter = 50000
lr = 0.001
l_theta = 0.1

In [137]:
sigmoid = lambda x: .5 * (1 - np.tanh(-.5 * x))

def moments_1st(x, y_):
    _moments = (np.ones(x.shape[0]),
               x[:, 0],
               x[:, 1]
              )
    _moments = tuple(y_*m for m in _moments)
    return np.vstack(_moments)

def moments_2nd(x, y_):
    _moments = (np.ones(x.shape[0]),
               x[:, 0]**2,
               x[:, 1]**2,
               x[:, 0] * x[:, 1],
               x[:, 0],
               x[:, 1]
              )
    _moments = tuple(y_*m for m in _moments)
    return np.vstack(_moments)

def moments_3rd(x, y_):
    _moments = (np.ones(x.shape[0]),
               x[:, 0]**3,
               x[:, 1]**3,
               x[:, 0]**2 * x[:, 1],
               x[:, 1]**2 * x[:, 0],
               x[:, 0]**2,
               x[:, 1]**2,
               x[:, 0] * x[:, 1],
               x[:, 0],
               x[:, 1]
              )
    _moments = tuple(y_*m for m in _moments)
    return np.vstack(_moments)

theta = np.zeros((6, 1))
get_moments = moments_2nd

m_t = get_moments(tr.X, True)
m_f = get_moments(tr.X, False)
m_c = m_t.copy()
m_c[:, np.ravel(tr.y) == False] = m_f[:, np.ravel(tr.y) == False]
beta_log = 1/logi_reweight_F(tr.X)

_iter = 0
while True:
    _iter += 1
    p_t = 1 - sigmoid(-beta_log*(m_t - m_f).T.dot(theta))
    dL = m_c.mean(axis=1, keepdims=True) - (m_t.dot(p_t) + m_f.dot(1 - p_t))/n_tr
    theta += lr * (dL - l_theta*theta)
    eps = np.linalg.norm(dL - l_theta*theta)
    if not (_iter % 1000): # and _iter % 1001 and _iter % 1002):
        print(eps, np.linalg.norm(theta))
        print(theta.T)
    if eps < min_eps or _iter >= max_iter: 
        break
        
def RBA_F(x):
    _m_t = get_moments(x, True)
    _m_f = get_moments(x, False)
    print(_m_f.sum())
    # logi_reweight_F
    return 1 - sigmoid(-1/logi_reweight_F(x)*(_m_t - _m_f).T.dot(theta))

0.10910306174 0.0980706867376
[[-0.0266266   0.01366963 -0.01347127  0.00386584 -0.05685578  0.07275318]]
0.0254611818443 0.1313322879
[[-0.03771695  0.01571932 -0.0208172   0.01197291 -0.07543453  0.09649576]]
0.0160847257724 0.151292666253
[[-0.04491538  0.01718588 -0.02425117  0.0148527  -0.08554714  0.11157818]]
0.0111056611545 0.164531378957
[[-0.05046396  0.01811132 -0.02626205  0.01720524 -0.0923734   0.12115025]]
0.00793916305005 0.173807657615
[[-0.05457143  0.01872694 -0.02752242  0.01888659 -0.09716337  0.12777111]]
0.00604363208801 0.180552631075
[[-0.0576264   0.01889207 -0.02813049  0.02034905 -0.10068688  0.13258312]]
0.00498337519459 0.18577349185
[[-0.05998982  0.01857753 -0.02818712  0.02191816 -0.10354679  0.13627557]]
0.00419580553697 0.190052116855
[[-0.06188983  0.01813002 -0.0280549   0.02342626 -0.10599266  0.13924959]]
0.00355148820936 0.193643517026
[[-0.06345811  0.01769979 -0.02789003  0.02476032 -0.10808396  0.14171989]]
0.00301454994241 0.196686224644
[[-0

In [138]:
p = bp.figure()
p.circle(range(0, n_tr), np.ravel(RBA_F(tr.X)))
bp.show(p)

In [139]:
data_gen.visualize_2D_classification(data, classifyF=RBA_F, fig_width=600)