In [1]:
import numpy as np
import os
from collections import OrderedDict
import io
import pdb
from csls import CSLS
from data import Language, WordDictionary
from utils import to_numpy

In [2]:
# Constants
BASE_DIR = "data"
CROSSLINGUAL = os.path.join(BASE_DIR, "crosslingual", "dictionaries")
src = "en"
tgt = "zh"
train_file_path = os.path.join(CROSSLINGUAL, f"{src}-{tgt}.0-5000.txt")
eval_file_path = os.path.join(CROSSLINGUAL, f"{src}-{tgt}.5000-6500.txt")

params = OrderedDict()
params["src"] = OrderedDict()
params["src"]["mean_center"] = True
params["src"]["unit_norm"] = True
params["tgt"] = OrderedDict()
params["tgt"]["mean_center"] = True
params["tgt"]["unit_norm"] = True

In [3]:
languages = OrderedDict()
gpu = False
languages[src] = Language(src, gpu, **params["src"])
languages[src].load(f"wiki.{src}.vec", "data")
languages[tgt] = Language(tgt, gpu, **params["tgt"])
languages[tgt].load(f"wiki.{tgt}.vec", "data")

In [4]:
training_mapping = WordDictionary(languages[src], languages[tgt], train_file_path)

In [5]:
unique_src, src_indices = np.unique(training_mapping.word_map[:, 0], return_inverse=True)
unique_tgt, tgt_indices = np.unique(training_mapping.word_map[:, 1], return_inverse=True)
A = np.zeros((unique_src.shape[0], unique_tgt.shape[0]))
for six, tix in zip(src_indices, tgt_indices):
    A[six, tix] = 1
# A : number of unique src tgt pairs.
# A[i, j] is 1 unique_src[i] and unique_tgt[j] are aligned, 0 otherwise
Xs = languages[src].get_embeddings(unique_src)
Xt = languages[tgt].get_embeddings(unique_tgt)

# Procrustes Solution

In [None]:
import torch
from torch.autograd import Variable
from torch import Tensor

In [None]:
matrix = torch.mm(Xs.transpose(1, 0), Xt)
u, _, v = torch.svd(matrix)
mapping_matrix = torch.mm(u, v.t())
def mapfunction(mapping_matrix):
    def map_embeddings(src_embed):
        return torch.mm(src_embed, mapping_matrix)
    return map_embeddings
map_fn = mapfunction(mapping_matrix)
src_embeddings = languages[src].embeddings
tgt_embeddings = languages[tgt].embeddings
src_transform = to_numpy(map_fn(src_embeddings), gpu)
tgt_transform = to_numpy(tgt_embeddings, gpu)

In [None]:
optval = ((torch.mm(Xs, mapping_matrix) - Xt) ** 2).sum()
print(optval)

# Procrustes Optimization

In [None]:
import autograd.numpy as anp
from pymanopt.manifolds import Product, Stiefel, PositiveDefinite
from pymanopt import Problem
from pymanopt.solvers import SteepestDescent, ConjugateGradient
import os

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = ''

In [None]:
Xs_numpy = to_numpy(Xs, gpu)
Xt_numpy = to_numpy(Xt, gpu)
ns, d = Xs.shape
nt, d = Xt.shape

In [None]:
manifold = Product([Stiefel(d, d)])
def ProcrustesCost(Xs, Xt):
    def ProcrustesCostHelper(theta):
        Us = theta[0]
        XUs = anp.dot(Xs, Us)
        cost = ((Xt - XUs) ** 2).sum()
        return cost
    return ProcrustesCostHelper

In [None]:
cost_fn = ProcrustesCost(Xs_numpy, Xt_numpy)
problem = Problem(manifold=manifold, cost=cost_fn)

In [None]:
solver = ConjugateGradient(maxtime=float('inf'), maxiter=2000)
theta = solver.solve(problem)

In [None]:
src_embeddings = to_numpy(languages[src].embeddings, gpu)
tgt_embeddings = to_numpy(languages[tgt].embeddings, gpu)
src_transform = np.dot(src_embeddings, theta[0])
tgt_transform = tgt_embeddings

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# Manifold Learning

In [6]:
# Now the Manifold learning part
import autograd.numpy as np
from pymanopt.manifolds import Product, Stiefel, PositiveDefinite
from pymanopt import Problem
from pymanopt.solvers import SteepestDescent, ConjugateGradient
import os
import theano
from theano import shared
import theano.tensor as T
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
# import tensorflow as tf

Can not use cuDNN on context None: cannot compile with cuDNN. We got this error:
b'/tmp/try_flags_qc6hnuxa.c:4:19: fatal error: cudnn.h: No such file or directory\n #include <cudnn.h>\n                   ^\ncompilation terminated.\n'
Mapped name None to device cuda: TITAN X (Pascal) (0000:02:00.0)


In [7]:
Xs_numpy = to_numpy(Xs, gpu)
Xt_numpy = to_numpy(Xt, gpu)
ns, d = Xs.shape
nt, d = Xt.shape
print(ns, nt, d)

5000 6599 300


In [8]:
manifold = Product([Stiefel(d, d), PositiveDefinite(d), Stiefel(d, d)])

In [9]:
# From the paper directly
# def cost_paper(Xs_t, Xt_t, lbda=10):
#     Xs = Xs_t.transpose()  # d x n1
#     Xt = Xt_t.transpose()  # d x n2
#     XsXs_t = np.dot(Xs, Xs.transpose())
#     XtXt_t = np.dot(Xt, Xt.transpose())
#     def cost_helper(theta):
#         Us = theta[0]
#         B = theta[1]
#         Ut = theta[2]
#         UtBUs_t = np.dot(np.dot(Ut, B), Us.transpose())
#         partial = np.dot(UtBUs_t, XsXs_t)
#         partial = np.dot(partial, UtBUs_t.transpose())
#         partial = np.dot(partial, XtXt_t)
#         tracecost = np.trace(partial)
#         simcost = 0
#         for ix in range(Xs.shape[0]):
#             simcost += np.dot(np.dot(Xs[:, ix], UtBUs_t.transpose()), Xt[:, ix])
#         simcost = -2 * simcost
#         regcost = lbda * (B ** 2).sum()
#         return regcost + simcost + tracecost
#     return cost_helper
U1 = T.matrix()
U2 = T.matrix()
B = T.matrix()
Lambda = 1e3
cost = T.sum(
                ((shared(Xs_numpy).dot(U1.dot(B.dot(U2.T)))).dot(
                    shared(Xt_numpy).T) - A)**2)
cost += 0.5 * Lambda * (T.sum(B**2))

In [10]:
max_opt_time = 5000
max_opt_iter = 150
solver = ConjugateGradient(
            maxtime=max_opt_time, maxiter=max_opt_iter)
problem = Problem(manifold=manifold, cost=cost, arg=[U1, B, U2], verbosity=3)

In [11]:
theta = solver.solve(problem)

Compiling cost function...
Computing gradient of cost function...
Optimizing...
 iter		   cost val	    grad. norm


GpuArrayException: b'cuMemAlloc: CUDA_ERROR_OUT_OF_MEMORY: out of memory'
Apply node that caused the error: GpuDot22(GpuDot22.0, InplaceGpuDimShuffle{1,0}.0)
Toposort index: 9
Inputs types: [GpuArrayType<None>(float32, matrix), GpuArrayType<None>(float32, matrix)]
Inputs shapes: [(5000, 300), (300, 6599)]
Inputs strides: [(1200, 4), (4, 1200)]
Inputs values: ['not shown', 'not shown']
Outputs clients: [[GpuElemwise{sub,no_inplace}(GpuDot22.0, GpuArrayConstant{[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]})]]

Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
  File "/zfsauton/home/bpatra/miniconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/zfsauton/home/bpatra/miniconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/zfsauton/home/bpatra/miniconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/zfsauton/home/bpatra/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/zfsauton/home/bpatra/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/zfsauton/home/bpatra/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2901, in run_ast_nodes
    if self.run_code(code, result):
  File "/zfsauton/home/bpatra/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-9-9ba9006b3274>", line 29, in <module>
    shared(Xt_numpy).T) - A)**2)
  File "/zfsauton/home/bpatra/miniconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/zfsauton/home/bpatra/miniconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/zfsauton/home/bpatra/miniconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/zfsauton/home/bpatra/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/zfsauton/home/bpatra/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/zfsauton/home/bpatra/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2901, in run_ast_nodes
    if self.run_code(code, result):
  File "/zfsauton/home/bpatra/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-9-9ba9006b3274>", line 29, in <module>
    shared(Xt_numpy).T) - A)**2)

HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

In [None]:
Us, B, Ut = theta
np.save("Us.npy", arr=Us)
np.save("B.npy", arr=B)
np.save("Ut.npy", arr=Ut)

In [None]:
Us = np.load("Us.npy")
B = np.load("B.npy")
Ut = np.load("Ut.npy")

In [None]:
# Transform the source and target embedding spaces
# Us, B, Ut = Xopt
u,s,vh = np.linalg.svd(B, full_matrices=True)
b_sqrt = np.dot(u, np.dot(np.diag(np.sqrt(s)), vh))

In [None]:
# Source transform
src_embeddings = to_numpy(languages[src].embeddings, gpu)
tgt_embeddings = to_numpy(languages[tgt].embeddings, gpu)
src_transform = np.dot(np.dot(src_embeddings, Us), b_sqrt)
tgt_transform = np.dot(np.dot(tgt_embeddings, Ut), b_sqrt)

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

# NN Evaluation

In [None]:
csls = CSLS(src_transform, tgt_transform, gpu=True)

In [None]:
import evaluate as evl
evaluator = evl.Evaluator(languages[src], languages[tgt], data_dir="data")

In [None]:
metrics = evaluator.supervised(csls, {})

In [None]:
print(metrics)

In [None]:
manifold = Stiefel(5, 2)

# (2) Define the cost function (here using autograd.numpy)
def cost(X): return np.sum(X)

problem = Problem(manifold=manifold, cost=cost)

# (3) Instantiate a Pymanopt solver
solver = SteepestDescent()

# let Pymanopt do the rest
Xopt = solver.solve(problem)
print(Xopt)