In [1]:
"""
Imports
"""
import numpy as np
import tensorflow as tf
%matplotlib inline
import matplotlib.pyplot as plt
import time
import os
import pickle
#from tensorflow.models.rnn.ptb import reader

<h1> Generate new text </h1>

In [2]:
l_key = []
l_val=[]
with open("vectors.txt") as f:
    for line in f:
        line_content = line.split()
        key=line_content[0]
        val=[float(x) for x in line_content[1:]]
        l_key.append(key)
        l_val.append(np.asarray(val))
dictionary=dict(zip(l_key,l_val))

In [3]:
with open('title_file', 'rb') as f:
    title_ph_filtered = pickle.load(f)
datawords = [item for sublist in title_ph_filtered for item in sublist]

In [6]:
words_unique = set(datawords)
vocab = words_unique
vocab_size = len(vocab)
idx_to_vocab = dict(enumerate(vocab))
vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))
data_temp= [vocab_to_idx[word] for word in datawords]
del title_ph_filtered

In [7]:
emb_matrix=np.matrix([dictionary[idx_to_vocab[i]] for i in range(vocab_size)])

In [8]:
data=data_temp

In [9]:
"""
Load and process data, utility functions
"""

def gen_epochs(n, num_steps, batch_size):
    for i in range(n):
        yield ptb_iterator(data, batch_size, num_steps)

def reset_graph():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

def build_graph(
    cell_type = None,
    num_weights_for_custom_cell = 5,
    state_size = 100,
    num_classes = vocab_size,
    batch_size = 32,
    num_steps = 200,
    num_layers = 2,
    build_with_dropout=True,
    temperature = 1,
    learning_rate = 5e-4):

    reset_graph()

    x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
    y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')

    dropout = tf.constant(0.8)

    init_emb = tf.constant(emb_matrix.astype(np.float32))
    embeddings = tf.get_variable('embedding_matrix', initializer=init_emb)

    rnn_inputs = tf.nn.embedding_lookup(embeddings, x)

    if cell_type == 'Custom':
        cell = CustomCell(state_size, num_weights_for_custom_cell)
    elif cell_type == 'GRU':
        cell = tf.nn.rnn_cell.GRUCell(state_size)
    elif cell_type == 'LSTM':
        cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
    elif cell_type == 'LN_LSTM':
        cell = LayerNormalizedLSTMCell(state_size)
    else:
        cell = tf.nn.rnn_cell.BasicRNNCell(state_size)

    if build_with_dropout:
        cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=dropout)

    if cell_type == 'LSTM' or cell_type == 'LN_LSTM':
        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
    else:
        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers)

    if build_with_dropout:
        cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=dropout)

    init_state = cell.zero_state(batch_size, tf.float32)
    rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)

    with tf.variable_scope('softmax'):
        W = tf.get_variable('W', [state_size, num_classes])
        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))

    #reshape rnn_outputs and y
    rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])
    y_reshaped = tf.reshape(y, [-1])

    logits = (tf.matmul(rnn_outputs, W) + b)/temperature

    predictions = tf.nn.softmax(logits)

    total_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=y_reshaped))
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

    return dict(
        x = x,
        y = y,
        init_state = init_state,
        final_state = final_state,
        total_loss = total_loss,
        train_step = train_step,
        preds = predictions,
        saver = tf.train.Saver()
    )

In [10]:
def generate_characters(g, checkpoint, num_chars, prompt='A', pick_top_chars=None):
    """ Accepts a current character, initial state"""

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        g['saver'].restore(sess, checkpoint)

        state = None
        current_char = vocab_to_idx[prompt]
        chars = [current_char]

        for i in range(num_chars):
            if state is not None:
                feed_dict={g['x']: [[current_char]], g['init_state']: state}
            else:
                feed_dict={g['x']: [[current_char]]}

            preds, state = sess.run([g['preds'],g['final_state']], feed_dict)

            if pick_top_chars is not None:
                p = np.squeeze(preds)
                p[np.argsort(p)[:-pick_top_chars]] = 0
                p = p / np.sum(p)
                current_char = np.random.choice(vocab_size, 1, p=p)[0]
            else:
                current_char = np.random.choice(vocab_size, 1, p=np.squeeze(preds))[0]

            chars.append(current_char)

    chars = map(lambda x: idx_to_vocab[x], chars)
    print(" ".join(chars))
    return(" ".join(chars))

In [166]:
g = build_graph(cell_type='GRU', num_steps=1, batch_size=1, temperature=1.1)
out=generate_characters(g, "saves_title/abstract_epochs", 5000, prompt='a', pick_top_chars=3)

Instructions for updating:
Use `tf.global_variables_initializer` instead.
INFO:tensorflow:Restoring parameters from saves_title/abstract_epochs
a holographic superconductor model with magnetic field # the spectrum of the su sector in su yang mills higgs theory # the supersymmetric dual of supersymmetric quantum mechanics # on the quantum hall effect on a torus # on the classification of vacua of su yang mills theory # on the relation between the mandelstam and a gauge couplings # the type iib string on the conifold of pp # quantum field theory # the open spin chain # a note on a class of the radion in the world sheet # the brane world # the quantum information of the cosmological constant # a new perspective on the cosmological constant # the cosmological constant problem and a quantum field theory # the effective average action for the effective action of the supermembrane # on the effective action of string field theory in de sitter backgrounds # a note on the zakharov shabat theory 

Manipulate titles

In [172]:
def manipulate_string_title(s):
    """ Takes a string and do a bunch of stuff on it"""
    s=s.capitalize()
    s=s.replace(" w ","W-")
    s=s.replace(" rg "," RG ")
    s=s.replace("feynman","Feynman")
    s=s.replace(" y ","Y")
    s=s.replace(" x ","X")
    s=s.replace("hamilton jacobi","Hamilton-Jacobi")
    s=s.replace("klt","KLT")
    s=s.replace("cft","CFT")
    s=s.replace(" ads "," Ads ")
    s=s.replace("sachdev ye kitaev","Sachdev-Ye-Kitaev")
    s=s.replace("klebanov strassler","Klebanov-Strassler")
    s=s.replace("wilson","Wilson")
    s=s.replace("gribov","Gribov")
    s=s.replace("zwanziger","Zwanziger")
    s=s.replace("brst","BRST")
    s=s.replace("coulomb","Coulomb")
    s=s.replace("dirac","Dirac")
    s=s.replace("d brane","d-brane")
    s=s.replace("p brane","p-brane")
    s=s.replace("chern simons","Chern-Simons")
    s=s.replace("cardy verlinde","Cardy-Verlinde")
    s=s.replace("gl ","GL-")
    s=s.replace(" o "," O-")
    s=s.replace("m theory","M-theory")
    s=s.replace("atiyah manton","Atiyah-Manton")
    s=s.replace("space time","space-time")
    s=s.replace("faddeev jackiw","Faddeev-Jackiw")
    s=s.replace("skyrme faddeev","Skyrme-Faddeev")
    s=s.replace("kazakov migdal kontsevich","Kazakov-Migdal-Kontsevich")
    s=s.replace("knizhnik zamolodchikov","Knizhnik-Zamolodchikov")
    s=s.replace(" gr "," GR ")
    s=s.replace(" s matrix "," S-matrix ")
    s=s.replace("planck","Planck")
    s=s.replace(" b "," B ")
    s=s.replace(" klein "," Klein ")
    s=s.replace(" abj "," ABJ ")
    s=s.replace("yang mills","Yang-Mills")
    s=s.replace(" brans dicke "," Brans-Dicke ")
    s=s.replace(" sym "," SYM ")
    s=s.replace(" iib "," IIB ")
    s=s.replace(" iia "," IIA ")
    s=s.replace(" casimir "," Casimir ")
    s=s.replace(" pp wave "," $pp$-wave ")
    s=s.replace(" born infeld "," Born-Infeld ")
    s=s.replace(" birkhoff "," Birkhoff ")
    s=s.replace(" kazakov migdal "," Kazakov-Migdal ")
    s=s.replace(" : ", ": ")
    s=s.replace(" , ", ", ")
    s=s.replace(" / ","/")
    return s

In [173]:
import re
out_final=re.split(r"[#]", out)
out_final = [string[1:-1] for string in out_final]
out_final = [manipulate_string_title(string) for string in out_final][1:-1]

In [175]:
with open('titles_out', 'w') as f:
    for item in out_final:
        f.write("%s\n" % item)