In [1]:
import autograd.numpy as np
from autograd import grad, hessian
from autograd import elementwise_grad
from scipy.optimize import fmin_l_bfgs_b, fmin_bfgs, fmin_cg, fmin_ncg
import matplotlib.pyplot as plt
from scipy.linalg import cho_factor, cho_solve, cholesky
from sklearn.decomposition import FactorAnalysis, PCA
from sklearn.linear_model import LogisticRegression
from autograd.misc.optimizers import adam
import copy
import glob
import imageio
import skimage
from skimage import data, io, filters


In [2]:
img_arr = np.array([skimage.img_as_float(imageio.imread(file)) for file in glob.glob('yale/*png')])

In [4]:
img_arr_rescaled = np.array([skimage.img_as_float(skimage.transform.rescale(imageio.imread(file),1.0 / 4.0)) for file in glob.glob('yale/*png')])

In [5]:
img_arr_rescaled.shape

(130, 62, 50)

In [None]:
skimage.io.imshow(img_arr_rescaled[0])
skimage.io.show()
skimage.io.imshow(img_arr[0])
skimage.io.show()

In [6]:
img_arr.shape

(130, 250, 200)

In [None]:
x = np.reshape(img_arr, (img_arr.shape[0], img_arr.shape[1]*img_arr.shape[2]))

In [7]:
x = np.reshape(img_arr_rescaled, (img_arr_rescaled.shape[0], img_arr_rescaled.shape[1]*img_arr_rescaled.shape[2]))

In [8]:
y = np.zeros(x.shape[0])
for i in range(x.shape[0]):
    if any(s in glob.glob('yale/*png')[i] for s in ('sad', 'wink', 'surprised', 'sleepy', 'happy')):
        y[i] = 1
    

In [9]:
x.shape

(130, 3100)

In [25]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def objective_logit (params, x, y, latent_dim, lambda_e, opt): #check the order of arguments!
    N = x.shape[0]
    D = x.shape[1]
    f, bias_x, cov_noise, w = decode_parameters_fast(params, D, latent_dim, opt)
    L = f.shape[1]
        
    icn = (1.0 / cov_noise).reshape((-1, 1))
    xn = (x - bias_x).T
    Ax = icn * xn
    Au = icn * f
    C = np.eye(L) + np.dot(f.T, Au)
    temp1 = Ax - np.dot(Au, np.linalg.solve(C, np.dot(f.T, Ax)))
    sign, log_det_cov_x = np.linalg.slogdet(C)
    log_det_cov_x += np.sum(np.log(cov_noise))
    
    unnorm_log_pdf_x = np.einsum("nd,dn->n", x - bias_x, temp1)
    
    mean_z = np.einsum("dl,dn->nl", f, temp1)
    temp2 = np.einsum("l,nl->n", w[1:], mean_z) + w[0]
    log_bern_pdf_y = np.log(sigmoid(np.multiply((2*y-1),temp2)))
    log_prior = 0 
    lambda_r = 1
    #reg1 = lambda_r*np.sum(f**2)
    #reg2 = lambda_r*np.sum(w**2)
    #reg = reg1+reg2
    reg = 0
    ll = N*D*np.log(2*np.pi)/2 + log_det_cov_x*0.5*N + np.sum(unnorm_log_pdf_x)*0.5 
    ll = ll - lambda_e*np.sum(log_bern_pdf_y) - log_prior + reg
    return ll

def decode_parameters(params, D, latent_dim, opt):
    size_f = D*latent_dim
    f =  params[:size_f]
    #f = f*np.array([0,1]) + np.array([1,0])
    f =  f.reshape(D, latent_dim)
    bias_x = params[size_f:size_f+D]
    if (opt=="ppca"):
        var = params[size_f+D]
        cov_noise= np.diag(np.ones(D)*np.log(1+np.exp(var)))
        #cov_noise= np.diag(np.ones(D)*np.exp(var))
        w = params[size_f+D+1:]
    else:
        var = params[size_f+D:size_f+D*2]
        cov_noise= np.diag(np.log(1+np.exp(var)))
        w = params[size_f+D*2:]
    return f, bias_x, cov_noise, w

def transform(f, bias, cov_noise, x):
    cov_x = np.einsum("dl,ml->dm",f,f) + cov_noise
    temp = np.linalg.solve(cov_x, (x - bias).T)
    mean_z = np.einsum("dl,dn->nl", f, temp)
    return mean_z

def compute_ll(f,bias,cov_noise, x):
    N = x.shape[0]
    D = x.shape[1]
    cov_x = np.einsum("dl,ml->dm",f,f) + cov_noise
    sign, log_det_cov_x = np.linalg.slogdet(cov_x)
    temp1 = np.linalg.solve(cov_x, (x - bias).T)
    ll = N*D*np.log(2*np.pi)/2
    ll += log_det_cov_x*N/2
    ll += np.sum(np.einsum("nd,dn->n", x - bias, temp1))/2
    return -ll

def compute_pl_logit(f, bias_x, cov_noise, w, x, y):
    cov_x = np.einsum("dl,ml->dm",f,f) + cov_noise
    temp1 = np.linalg.solve(cov_x, (x - bias_x).T)
    mean_z = np.einsum("dl,dn->nl", f, temp1)
    temp2 = np.einsum("l,nl->n", w[1:], mean_z) + w[0]
    log_bern_pdf_y = np.log(sigmoid(np.multiply((2*y-1),temp2)))
    return np.sum(log_bern_pdf_y)

def decode_parameters_fast(params, D, latent_dim, opt):
    size_f = D*latent_dim
    f =  params[:size_f]
    #f = f*np.array([0,1]) + np.array([1,0])
    f =  f.reshape(D, latent_dim)
    bias_x = params[size_f:size_f+D]
    if (opt=="ppca"):
        var = params[size_f+D]
        cov_noise= np.ones(D)*np.log(1+np.exp(var))
        #cov_noise= np.diag(np.ones(D)*np.exp(var))
        w = params[size_f+D+1:]
    else:
        var = params[size_f+D:size_f+D*2]
        cov_noise = np.log(1+np.exp(var))
        w = params[size_f+D*2:]
    return f, bias_x, cov_noise, w

In [11]:
latent_dim = 10
x_dim = x.shape[1]      
size_f = latent_dim*x_dim

params_size_logit_fa = x_dim*latent_dim + x_dim + x_dim + latent_dim + 1
params_size_norm_fa = x_dim*latent_dim + x_dim + x_dim + latent_dim
params_size_logit_ppca = x_dim*latent_dim + x_dim + 1 + latent_dim + 1
params_size_norm_ppca = x_dim*latent_dim + x_dim + 1 + latent_dim 

In [12]:
opt = "ppca"
if (opt == "fa"):
    params_size = params_size_logit_fa
else:
    params_size = params_size_logit_ppca

In [13]:
transformer = PCA(latent_dim)
x_proj_pca = transformer.fit_transform(x)
clf = LogisticRegression().fit(x_proj_pca, y)
acc_score_pca = clf.score(x_proj_pca, y)



In [21]:
transformer_fa = FactorAnalysis(latent_dim)
x_proj_fa = transformer.fit_transform(x)
clf = LogisticRegression().fit(x_proj_fa, y)
acc_score_fa = clf.score(x_proj_fa, y)

AttributeError: 'FactorAnalysis' object has no attribute 'components_'

In [22]:
acc_score_fa

0.6923076923076923

In [30]:
acc_score_pca

0.6923076923076923

In [15]:
clf = LogisticRegression().fit(x, y)
acc_score_orig = clf.score(x, y)

In [16]:
acc_score_orig

1.0

In [None]:
f_sklearn = transformer.components_.T
cov_noise_sklearn = np.diag(np.ones(x_dim)*transformer.noise_variance_)
bias_sklearn = np.mean(x, axis = 0)
print "F sklearn", f_sklearn 
print "mean of x \n", bias_sklearn 
print "cov noise sklearn\n", cov_noise_sklearn 


In [None]:
compute_ll(f_sklearn, bias_sklearn, cov_noise_sklearn, x)


In [17]:
init_params = np.random.rand(params_size)
f, bias, cov_noise, w =  decode_parameters(init_params, x_dim, latent_dim, opt)
print "Initial parameters"
print "F:", f
print "slope", f[1]/f[0]
print "bias: ", bias
print "cov_noise matrix: ", cov_noise
print "bias for y:\n", w[0]
print "logistic regression weights:\n", w[1:]
print "initial obj value logit", objective_logit(init_params, x, y, latent_dim, 0, opt)


Initial parameters
F: [[0.86361742 0.2355222  0.88850065 ... 0.67123869 0.88760575 0.21885433]
 [0.50260063 0.27849017 0.43636393 ... 0.09593587 0.42263038 0.61544172]
 [0.46564178 0.53018685 0.80685484 ... 0.02530548 0.37167352 0.09453595]
 ...
 [0.33170219 0.29020874 0.01553651 ... 0.26673531 0.10267912 0.16523587]
 [0.65992233 0.25550432 0.01537221 ... 0.34414316 0.48902544 0.33284171]
 [0.40025097 0.25679528 0.11733277 ... 0.48657463 0.40348414 0.33505653]]
slope [0.5819714  1.18243702 0.49112393 0.85361542 0.27801165 6.01039355
 8.18134554 0.14292364 0.47614651 2.8121066 ]
bias:  [0.69522956 0.1238199  0.76589619 ... 0.25150356 0.72394408 0.5949079 ]
cov_noise matrix:  [[0.97335665 0.         0.         ... 0.         0.         0.        ]
 [0.         0.97335665 0.         ... 0.         0.         0.        ]
 [0.         0.         0.97335665 ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.97335665 0.         0.        ]
 [0.         0.     

In [None]:
grad_objective_logit = grad(objective_logit, argnum = 0)
lambda_e = 100000
import time

t0 = time.time()
params_optimizied_logit, obj, dict = fmin_l_bfgs_b(objective_logit, x0 = init_params, fprime = grad_objective_logit, args = (x, y, latent_dim, lambda_e, opt))#, callback=callback)
t1 = time.time()
print t1-t0


In [None]:
dict


In [31]:
f_logit, bias_logit, cov_noise_logit, w_logit = decode_parameters(params_optimizied_logit, x_dim, latent_dim, opt)
x_proj_logit = transform(f_logit, bias_logit, cov_noise_logit, x)
clf_pc = LogisticRegression().fit(x_proj_logit, y)
acc_scores = clf_pc.score(x_proj_logit, y)

In [32]:
acc_scores

0.6923076923076923

In [None]:
lambda_e_1 = 0.4
lambda_e_2 = 0.51
step  = 0.01
lambda_es = np.arange(lambda_e_1,lambda_e_2,step)

acc_scores = np.zeros(lambda_es.shape[0])
obj_values_logit = np.zeros(lambda_es.shape[0])
ll_logit = np.zeros(lambda_es.shape[0])
pred_l_logit = np.zeros(lambda_es.shape[0])


def callback(params):
    print objective_logit_1(params, transformer.noise_variance_, x, y, latent_dim, lambda_es[i], opt)
    print grad_objective_logit_1(params, transformer.noise_variance_, x, y, latent_dim, lambda_es[i], opt)
    #print params[1]/params[0]
    #print params[-2],"-",params[-1]
    pass
    print '\n'
grad_objective_logit = grad(objective_logit, argnum = 0)

for i in range(lambda_es.shape[0]):
    params_optimizied_logit, obj, dict = fmin_l_bfgs_b(objective_logit, x0 = init_params, fprime = grad_objective_logit, args = (x, y, latent_dim, lambda_es[i], opt))#, callback=callback)
    f_logit, bias_logit, cov_noise_logit, w_logit = decode_parameters(params_optimizied_logit, x_dim, latent_dim, opt)
    #cov_noise_logit = np.diag(np.ones(x_dim)*transformer.noise_variance_)
    x_proj_logit = transform(f_logit, bias_logit, cov_noise_logit, x)
    clf_pc = LogisticRegression().fit(x_proj_logit, y)
    acc_scores[i] = clf_pc.score(x_proj_logit, y)
    ll_logit[i] = compute_ll(f_logit, bias_logit, cov_noise_logit, x)
    obj_values_logit[i]=  obj
    pred_l_logit[i] =  compute_pl_logit(f_logit, bias_logit, cov_noise_logit, w_logit, x, y) 
    
acc_scores_adam = np.zeros(lambda_es.shape[0])
obj_values_logit_adam = np.zeros(lambda_es.shape[0])
ll_logit_adam = np.zeros(lambda_es.shape[0])
pred_l_logit_adam = np.zeros(lambda_es.shape[0])

for i in range(lambda_es.shape[0]):
    print i
    params_opt_adam_logit = adam(get_grad_obj(init_params, x, y, latent_dim, lambda_es[i], opt, iter), init_params, step_size = step_size,  num_iters=num_iters)
    
    f_logit, bias_logit, cov_noise_logit, w_logit = decode_parameters(params_opt_adam_logit, x_dim, latent_dim, opt)
    x_proj_logit = transform(f_logit, bias_logit, cov_noise_logit, x)
    
    clf_pc = LogisticRegression().fit(x_proj_logit, y)
    acc_scores_adam[i] = clf_pc.score(x_proj_logit, y)
    
    ll_logit_adam[i] = compute_ll(f_logit, bias_logit, cov_noise_logit, x)
    obj_values_logit_adam[i]=  objective_logit(params_opt_adam_logit, x, y, latent_dim, lambda_es[i], opt)
    pred_l_logit_adam[i] =  compute_pl_logit(f_logit, bias_logit, cov_noise_logit, w_logit, x, y) 

In [None]:
decode_parameters(params_opt_adam_logit, x_dim, latent_dim, opt)

In [None]:
lambda_es

In [None]:
plt.figure(figsize=(8,20))

plt.subplot(411)
plt.title("Final obj values ")
plt.xticks(np.arange(lambda_e_1,lambda_e_2, step))
plt.plot(lambda_es, obj_values_logit,  label = "l_bfgs_b")
plt.plot(lambda_es, obj_values_logit_adam,  label = "adam")
plt.legend()

plt.subplot(412)
plt.title("Final log-likelihood")
plt.xticks(np.arange(lambda_e_1,lambda_e_2, step))
plt.plot(lambda_es, ll_logit,  label = "l_bfgs_b")
plt.plot(lambda_es, ll_logit_adam,  label = "adam")
plt.legend()

plt.subplot(413)
plt.title("Train accuracy")
plt.xticks(np.arange(lambda_e_1,lambda_e_2, step))
plt.plot(lambda_es, acc_scores,  label = "l_bfgs_b")
plt.plot(lambda_es, acc_scores_adam,  label = "adam", )
plt.legend()

plt.subplot(414)
plt.title("Prediction log-likelihood")
plt.xticks(np.arange(lambda_e_1,lambda_e_2, step))
plt.plot(lambda_es, pred_l_logit, label = "l_bfgs_b")
plt.plot(lambda_es, pred_l_logit_adam,  label = "adam")
plt.legend()
plt.xlabel("lambda_e")

plt.savefig("adam_vs_l_bfgs_b_lambda_e{},{}.png".format(lambda_e_1, lambda_e_2))
plt.show()


In [None]:

#params_optimizied_logit_1, obj_value_logit, dict = fmin_l_bfgs_b(objective_logit_1, x0 = params, fprime = grad_objective_logit_1, args = (transformer.noise_variance_, x, y, latent_dim, lambda_e, opt))

params_optimizied_logit_1, obj_value_logit_1, dict = fmin_l_bfgs_b(objective_logit, x0 = params, fprime = grad_objective_logit, args = (x, y, latent_dim, lambda_e_1, opt))
params_optimizied_logit_2, obj_value_logit_2, dict = fmin_l_bfgs_b(objective_logit, x0 = params, fprime = grad_objective_logit, args = (x, y, latent_dim, lambda_e_2, opt))


f_logit_1, bias_logit_1, cov_noise_logit_1, w_logit_1 = decode_parameters(params_optimizied_logit_1, x_dim, latent_dim, opt)
ll_1=  compute_ll(f_logit_1, bias_logit_1, cov_noise_logit_1, x)
pl_1 = compute_pl_logit(f_logit_1, bias_logit_1, cov_noise_logit_1, w_logit_1, x, y) 

f_logit_2, bias_logit_2, cov_noise_logit_2, w_logit_2 = decode_parameters(params_optimizied_logit_2, x_dim, latent_dim, opt)
ll_2=  compute_ll(f_logit_2, bias_logit_2, cov_noise_logit_2, x)
pl_2 = compute_pl_logit(f_logit_2, bias_logit_2, cov_noise_logit_2, w_logit_2, x, y) 

#lambda_es = np.arange(0,1,0.1)
lambda_es = np.array([lambda_e_1, lambda_e_2])
obj_values_logit = np.ones((2, lambda_es.shape[0]))

for i in range(lambda_es.shape[0]):
    obj_values_logit[0,i] = objective_logit(params_optimizied_logit_1, x, y, latent_dim, lambda_es[i], opt)
    obj_values_logit[1,i] = objective_logit(params_optimizied_logit_2, x, y, latent_dim, lambda_es[i], opt)

In [None]:
interp = np.arange(0, 1.0, 0.01)
obj1 = [objective_logit(params_optimizied_logit_1 * i + (1.0-i) * params_optimizied_logit_2, x, y, latent_dim, lambda_e_1, opt) for i in np.arange(0, 1, 0.01)]
obj2 = [objective_logit(params_optimizied_logit_1 * i + (1.0-i) * params_optimizied_logit_2, x, y, latent_dim, lambda_e_2, opt) for i in np.arange(0, 1, 0.01)]

plt.plot(interp, obj1)
plt.plot(interp, obj2)

In [None]:
interp = np.arange(-0., 1.0, 0.01)
lik =[]
plik = []
obj = []
grad_objective_logit_1 = grad(objective_logit_1)
params_start= np.random.rand(params_size)
for i in interp:
    slope = 3.83374105 * i + (1.0 - i) * -7.39114372
    f_logit = np.array([1., slope]).reshape((2,1))
    print f_logit
    params_optimizied_logit, obj_value_logit, dict = fmin_l_bfgs_b(objective_logit_1, x0 = params_start, fprime = grad_objective_logit_1, args = (f_logit, x, y, latent_dim, lambda_e_1, opt))
    _, bias_logit, cov_noise_logit, w_logit = decode_parameters(params_optimizied_logit, x_dim, latent_dim, opt)
    ll =  compute_ll(f_logit, bias_logit, cov_noise_logit, x)
    pl = compute_pl_logit(f_logit, bias_logit, cov_noise_logit, w_logit, x, y) 
    lik.append(ll)
    plik.append(pl)
    obj.append(obj_value_logit)
    
plt.plot(interp, lik, label='lik')
#plt.plot(interp, plik, label='predictive')
plt.legend()

In [None]:
plt.plot(interp, plik, label='predictive')


In [None]:
plt.plot(interp, obj)

In [None]:
obj[0], obj[-1]

In [None]:
obj_values_logit

In [None]:
w_logit_1

In [None]:
w_logit_2

In [None]:
lambda_es

In [None]:
plt.figure(figsize=(24,7))
plt.title("Final obj values ")
plt.xticks(lambda_es)
plt.plot(lambda_es, obj_values_logit[0, :], label = "lambda_e_1 = {}".format(lambda_e_1))
plt.plot(lambda_es, obj_values_logit[1, :], label = "lambda_e_2 = {}".format(lambda_e_2))
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(15,15))

plt.subplot(221) 
#plt.axis('equal')

plt.axis('equal')
plt.title("sklearn PCA projection")
x_proj_sklearn = transform(f_sklearn, bias_sklearn, cov_noise_sklearn, x)

plt.plot(x_proj_sklearn[:n_samples_class,0], np.ones(n_samples_class), 'x')
plt.plot(x_proj_sklearn[n_samples_class:,0], np.zeros(n_samples_class), 'o')


plt.subplot(222) 
plt.title("PC PPCA projection: lambda_e = {}".format(lambda_e_1))

x_proj_logit_1 = transform(f_logit_1, bias_logit_1, cov_noise_logit_1, x)
plt.plot(x_proj_logit_1[:n_samples_class], np.ones(n_samples_class), 'x')
plt.plot(x_proj_logit_1[n_samples_class:], np.zeros(n_samples_class), 'o')


plt.subplot(223)
plt.title("PC PPCA projection: lambda_e = {}".format(lambda_e_2))
x_proj_logit_2 = transform(f_logit_2, bias_logit_2, cov_noise_logit_2, x)
plt.plot(x_proj_logit_2[:n_samples_class], np.ones(n_samples_class), 'x')
plt.plot(x_proj_logit_2[n_samples_class:], np.zeros(n_samples_class), 'o')


plt.subplot(224) 
plt.axis('equal')
clf_pc_1 = LogisticRegression().fit(x_proj_logit_1, y)
acc_scores_1 = clf_pc.score(x_proj_logit_1, y)
clf_pc_2 = LogisticRegression().fit(x_proj_logit_2, y)
acc_scores_2 = clf_pc.score(x_proj_logit_2, y)

    
plt.plot(x[:n_samples_class,0], x[:n_samples_class,1], 'x')
plt.plot(x[n_samples_class:,0], x[n_samples_class:,1], 'o')

x_lim_1 = -5
x_lim_2 = 7


a =  np.array(range(x_lim_1, x_lim_2)) 

b_pc_fa_logit_1 = f_logit_1[1]/f_logit_1[0]*(a + bias_logit_1[0]) + bias_logit_1[1]
b_pc_fa_logit_2 = f_logit_2[1]/f_logit_2[0]*(a + bias_logit_2[0]) + bias_logit_2[1]
b_sklearn = f_sklearn[1]/f_sklearn[0]*(a + bias_sklearn [0]) + bias_sklearn [1]

plt.plot(a, b_sklearn, label='sklearn pca')
plt.plot(a, b_pc_fa_logit_1, label="pc ppca lambda_e = {}".format(lambda_e_1))
plt.plot(a, b_pc_fa_logit_2, label="pc ppca lambda_e = {}".format(lambda_e_2))
plt.ylim(x_lim_1,x_lim_2)

plt.legend()
plt.savefig("data_lambda_e_1={}_lambda_e_2={}_logistic_pc_ppca_vs_sklearn_pca.png".format(lambda_e_1, lambda_e_2))

plt.show()


In [None]:
plt.figure(figsize=(15,15))

plt.subplot(221) 
#plt.axis('equal')

plt.axis('equal')
plt.title("sklearn PCA projection")
x_proj_sklearn = transform(f_sklearn, bias_sklearn, cov_noise_sklearn, x)

plt.plot(x_proj_sklearn[:n_samples_class,0], np.ones(n_samples_class), 'x')
plt.plot(x_proj_sklearn[n_samples_class:,0], np.zeros(n_samples_class), 'o')


plt.subplot(222) 

plt.title("PC PPCA projection opt fmin_l_bfgs_b")

f_logit_1, bias_logit_1, cov_noise_logit_1, w_logit_1 = decode_parameters(params_optimizied_logit, x_dim, latent_dim, opt)
x_proj_logit_1 = transform(f_logit_1, bias_logit_1, cov_noise_logit_1, x)
plt.plot(x_proj_logit_1[:n_samples_class], np.ones(n_samples_class), 'x')
plt.plot(x_proj_logit_1[n_samples_class:], np.zeros(n_samples_class), 'o')


plt.subplot(223)

plt.title("PC PPCA projection opt ADAM")

f_logit_2, bias_logit_2, cov_noise_logit_2, w_logit_2 = decode_parameters(params_opt_adam_logit, x_dim, latent_dim, opt)
x_proj_logit_2 = transform(f_logit_2, bias_logit_2, cov_noise_logit_2, x)
plt.plot(x_proj_logit_2[:n_samples_class], np.ones(n_samples_class), 'x')
plt.plot(x_proj_logit_2[n_samples_class:], np.zeros(n_samples_class), 'o')


plt.subplot(224) 
plt.axis('equal')
clf_pc_1 = LogisticRegression().fit(x_proj_logit_1, y)
acc_scores_1 = clf_pc.score(x_proj_logit_1, y)
clf_pc_2 = LogisticRegression().fit(x_proj_logit_2, y)
acc_scores_2 = clf_pc.score(x_proj_logit_2, y)

plt.title("train acc at lambda_e = {} is {} for fmin_l_bfgs_b, and {} for ADAM".format(lambda_es[i], acc_scores[i], acc_scores_adam[i]))


plt.plot(x[:n_samples_class,0], x[:n_samples_class,1], 'x')
plt.plot(x[n_samples_class:,0], x[n_samples_class:,1], 'o')

x_lim_1 = -5
x_lim_2 = 7


a =  np.array(range(x_lim_1, x_lim_2)) 

b_pc_fa_logit_1 = f_logit_1[1]/f_logit_1[0]*(a + bias_logit_1[0]) + bias_logit_1[1]
b_pc_fa_logit_2 = f_logit_2[1]/f_logit_2[0]*(a + bias_logit_2[0]) + bias_logit_2[1]
b_sklearn = f_sklearn[1]/f_sklearn[0]*(a + bias_sklearn [0]) + bias_sklearn [1]

plt.plot(a, b_sklearn, label='sklearn pca')
plt.plot(a, b_pc_fa_logit_1, label="fmin_l_bfgs_b")
plt.plot(a, b_pc_fa_logit_2, label="adam")
plt.ylim(x_lim_1,x_lim_2)

plt.legend()
plt.savefig("data_lambda_e_=_{}_logistic_pc_ppca_adam_vs_l_bfgs_b_vs_sklearn_pca.png".format(lambda_es[i]))

plt.show()


In [None]:
b_pc_fa_logit_1[(b_pc_fa_logit_1<np.max(x[:,1] and b_pc_fa_logit_1>np.min(x[:,1]))].shape

In [None]:
b_pc_fa_logit_1[b_pc_fa_logit_1[b_pc_fa_logit_1>np.min(x[:,1])]< np.max(x[:,1])] 

In [None]:
print ll_1
print pl_1
print ll_2

print pl_2

print "Final parameters_logit 1"
print "logit slope\n", f_logit_1[1]/f_logit_1[0]
print "F:\n", f_logit_1
print "bias for x:\n", bias_logit_1
print "var :\n",cov_noise_logit_1
print "bias for y:\n", w_logit_1[0]
print "logistic regression weights:\n", w_logit_1[1:]

print "Final parameters_logit 2"
print "logit slope\n", f_logit_2[1]/f_logit_2[0]
print "F:\n", f_logit_2
print "bias for x:\n", bias_logit_2
print "cov_noise matrix:\n", cov_noise_logit_2
print "bias for y:\n", w_logit_2[0]
print "logistic regression weights:\n", w_logit_2[1:]

In [None]:
t = np.array([[ 1.        ], [-7.39114372]])

In [None]:
np.einsum("dl,ml->dm",t,t)

In [None]:
np.einsum("dl,ml->dm",f_logit_2,f_logit_2)