# So you want to convert Tensorflow to MxNet, eh? 

Lets try parsing the API's and generate a mapping between the two. This might take a while

In [32]:
import mxnet as mx
import tensorflow as tf
import pkgutil, types, pprint
import numpy as np
from random import shuffle

def doc_dump(base, docs={}):
    try:
        for a in dir(eval(base)):
            if isinstance(eval(base).__dict__.get(a), types.FunctionType):
                doc = eval(base).__dict__.get(a).__doc__
                docs[base + '.' + a] = base + '.' + a + (doc if doc else '')
        for importer, modname, ispkg in pkgutil.walk_packages(path=eval(base).__path__,
                                                              prefix=base + '.',
                                                              onerror=lambda e: None):
            docs = doc_dump(modname, docs)
    except Exception as e:
        pass
    return docs
        
tf_docs = doc_dump('tf', {})
mx_docs = doc_dump('mx', {})

In [33]:
SUB_SAMPLE = -1
tf_labels, tf_values = zip(*list(tf_docs.items())[:SUB_SAMPLE])
mx_labels, mx_values = zip(*list(mx_docs.items())[:SUB_SAMPLE])

In [None]:
HIDDEN_UNITS = 100
LAYERS = 1
BATCH_SIZE = 1
INPUT_SIZE = 1

from scipy import sparse
from scipy.sparse import linalg as s_linalg

W = sparse.rand(HIDDEN_UNITS, HIDDEN_UNITS, density=0.1)
W = W*np.real(0.9/max(s_linalg.eigs(W)[0]))
U = 0.8*sparse.rand(HIDDEN_UNITS, INPUT_SIZE, density=0.1)

tf_states = []
for v in tf_values:
    h = np.zeros((HIDDEN_UNITS, 1))
    for c in v:
        h = np.tanh(W.dot(h) + U*ord(c))
    tf_states.append(mx.nd.array(h))
    
mx_states = []
for v in mx_values:
    h = np.zeros((HIDDEN_UNITS, 1))
    for c in v:
        h = np.tanh(W.dot(h) + U*ord(c))
    mx_states.append(mx.nd.array(h))

In [None]:
net = mx.gluon.nn.Dense(HIDDEN_UNITS)
ctx = mx.cpu()
net.collect_params().initialize(mx.init.Xavier(), ctx=ctx)

trainer = mx.gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})
l2loss = mx.gluon.loss.L2Loss()

for epoch in range(10):
    avg_loss = 0.0
    for i, x in enumerate(tf_states + mx_states):
        for j, y in enumerate(tf_states + mx_states):
            with mx.autograd.record():
                output_x = net(x)
                output_y = net(y)
                # Just compute loss on last output
                loss = (-1 if i == j else 1) * l2loss(output_x, output_y)
                loss.backward()
            trainer.step(BATCH_SIZE)
            avg_loss += mx.nd.mean(loss).asscalar()
    
    print(epoch, avg_loss/(len(tf_values) + len(mx_values))**2)
    for i in np.random.randint(len(tf_values), size=5):
        losses = []
        for j in range(len(mx_values)):
            output_x = net(x.reshape((-1, BATCH_SIZE, INPUT_SIZE)))
            output_y = net(y.reshape((-1, BATCH_SIZE, INPUT_SIZE)))
            losses.append((l2loss(output_x[len(x)-1, :, :], output_y[len(y)-1, :, :]), j))
        print(tf_labels[i], ' ==> ', [mx_labels[j] for l, j in sorted(losses)[:3]])
    
    
    