In [1]:
import os
import json
import glob

import imageio
import numpy as np
import moviepy.editor as mp

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.backends.backend_pgf import FigureCanvasPgf
mpl.use("pgf")
mpl.backend_bases.register_backend('pdf', FigureCanvasPgf)
mpl.rc('text', usetex=True)
mpl.rc("pgf", rcfonts=False, preamble=r'\usepackage{color}')
%matplotlib inline
%config InlineBackend.figure_format='retina'

from profit.utils.data_utils.tokenizers import AminoAcidTokenizer

Using TensorFlow backend.


In [2]:
# Create tmp folder to save results
savedir = "../../results"
if not os.path.isdir(savedir):
    os.makedirs(savedir)


tokenizer = AminoAcidTokenizer('aa20')
vocab_size = tokenizer.vocab_size
viridis = mpl.cm.get_cmap('viridis', vocab_size).colors

epoch = 0
files = sorted(glob.glob("../../dumps/2020-Mar-26-19:57:10/*.json"))
for file in files:
    epoch += 1
    with open(file, "r") as fp:
        # Obtain original (encoded) data and latent (z) space
        dump = json.load(fp)
        target_seqs, z = dump["target_seqs"], np.array(dump["z"])
        data = np.array([tokenizer.encode(seq) for seq in target_seqs], dtype=np.int)
        
        plt.figure(constrained_layout=True, figsize=(6, 4))
        for vocab_idx in np.unique(data):
            # Find all idxs where the exact vocab is found
            xidx, yidx = np.where(data == vocab_idx)
            plt.scatter(z[xidx, yidx, 0], z[xidx, yidx, 1], marker="o",
                        alpha=1.0, cmap=viridis[vocab_idx])
        plt.title(f"Latent ($z$) vector (epoch ${epoch}$)", fontsize=18)
        plt.xlabel("$q(z_{0}|x)$", fontsize=16)
        plt.ylabel("$q(z_{1}|x)$", fontsize=16)
        plt.xticks(fontsize=16)
        plt.yticks(fontsize=16)
        plt.xlim([-5, 5])
        plt.ylim([-5, 5])
        plt.savefig(os.path.join(savedir, "ours-iter-{:04d}.png".format(epoch)), bbox_inches="tight", dpi=100)
        plt.close()

In [3]:
filenames = sorted(glob.glob(os.path.join(savedir, "ours-iter-*.png")))
with imageio.get_writer(os.path.join(savedir, 'ours.gif'), mode='I', duration=0.2) as writer:
    for filename in filenames:
        image = imageio.imread(filename)
        writer.append_data(image)

clip = mp.VideoFileClip(os.path.join(savedir, 'ours.gif'))
clip.write_videofile(os.path.join(savedir, 'ours.mp4'))

t:  10%|█         | 5/50 [00:00<00:00, 47.18it/s, now=None]

Moviepy - Building video ../../results/ours.mp4.
Moviepy - Writing video ../../results/ours.mp4



                                                            

Moviepy - Done !
Moviepy - video ready ../../results/ours.mp4


In [4]:
epoch = 0
files = sorted(glob.glob("../../dumps/2020-Mar-26-21:30:07/*.json"))
for file in files:
    epoch += 1
    with open(file, "r") as fp:
        # Obtain original (encoded) data and latent (z) space
        dump = json.load(fp)
        z = np.array(dump["z"])
        plt.scatter(z[:, 0], z[:, 1], marker="o", alpha=1.0, c='k')
        plt.title(f"Latent ($z$) vector (epoch ${epoch}$)", fontsize=18)
        plt.xlabel("$q(z_{0}|x)$", fontsize=16)
        plt.ylabel("$q(z_{1}|x)$", fontsize=16)
        plt.xticks(fontsize=16)
        plt.yticks(fontsize=16)
        plt.xlim([-5, 5])
        plt.ylim([-5, 5])
        plt.savefig(os.path.join(savedir, "cbas-iter-{:04d}.png".format(epoch)), bbox_inches="tight", dpi=100)
        plt.close()

In [5]:
filenames = sorted(glob.glob(os.path.join(savedir, "cbas-iter-*.png")))
with imageio.get_writer(os.path.join(savedir, 'cbas.gif'), mode='I', duration=0.2) as writer:
    for filename in filenames:
        image = imageio.imread(filename)
        writer.append_data(image)

clip = mp.VideoFileClip(os.path.join(savedir, 'cbas.gif'))
clip.write_videofile(os.path.join(savedir, 'cbas.mp4'))

t:  16%|█▌        | 8/50 [00:00<00:00, 76.11it/s, now=None]

Moviepy - Building video ../../results/cbas.mp4.
Moviepy - Writing video ../../results/cbas.mp4



                                                            

Moviepy - Done !
Moviepy - video ready ../../results/cbas.mp4
