<a href="https://colab.research.google.com/github/eniktab/MoE_nlp/blob/master/AnEconomistPlease.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##### Copyright 2019 The TensorFlow Authors.

In [26]:
import tensorflow as tf
import numpy as np
import os
import time
import random


trained_model_path = ["https://storage.googleapis.com/bucket-1-free/%20philip/ArthurGrimes.txt", 
                    "https://storage.googleapis.com/bucket-1-free/%20philip/ShamubeelEaqubs.txt"]
!rm -r /content/training_checkpoints
for i in trained_model_path:
        tf.keras.utils.get_file(
            i.split("/")[-1], i, cache_subdir=os.path.abspath("/content"))

ecolist = ['/content/ArthurGrimes.txt', '/content/ShamubeelEaqubs.txt']
temtxt= random.choice(ecolist)     
# Read, then decode for py2 compat.
text = open(temtxt, 'rb').read().decode(encoding='utf-8')
print('Length of text: {} characters'.format(len(text)))
# The unique characters in the file
vocab = sorted(set(text))
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)
text_as_int = np.array([char2idx[c] for c in text])
# The maximum length sentence you want for a single input in characters
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)
# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text
dataset = sequences.map(split_input_target)
# Batch size
BATCH_SIZE = 64
BUFFER_SIZE = 10000
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim,
                                  batch_input_shape=[batch_size, None]),
        tf.keras.layers.GRU(rnn_units,
                            return_sequences=True,
                            stateful=True,
                            recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model
model = build_model(
    vocab_size=len(vocab),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE)
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
model.compile(optimizer='adam', loss=loss)
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints/ArthurGrimes'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
    )

model = build_model(
    vocab_size=len(vocab),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE)

optimizer = tf.keras.optimizers.Adam()

@tf.function
def train_step(inp, target):
    with tf.GradientTape() as tape:
        predictions = model(inp)
        loss = tf.reduce_mean(
            tf.keras.losses.sparse_categorical_crossentropy(
                target, predictions, from_logits=True))
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    return loss

def generate_text(model, start_string):
    # Evaluation step (generating text using the learned model)

    # Number of characters to generate
    num_generate = 1000

    # Converting our start string to numbers (vectorizing)
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    # Empty string to store our results
    text_generated = []

    # Low temperature results in more predictable text.
    # Higher temperature results in more surprising text.
    # Experiment to find the best setting.
    temperature = 0.5

    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        # using a categorical distribution to predict the character returned by the model
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        # Pass the predicted character as the next input to the model
        # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))
  
tf.train.latest_checkpoint(checkpoint_dir)
# Training step
EPOCHS = 1500

for epoch in range(EPOCHS):
    start = time.time()

    # resetting the hidden state at the start of every epoch
    model.reset_states()

    for (batch_n, (inp, target)) in enumerate(dataset):
        loss = train_step(inp, target)

        if batch_n % 100 == 0:
            template = 'Epoch {} Batch {} Loss {}'
            print(template.format(epoch + 1, batch_n, loss))

    # saving (checkpoint) the model every 5 epochs
    if (epoch + 1) % 5 == 0:
        model.save_weights(checkpoint_prefix.format(epoch=epoch))

    print('Epoch {} Loss {:}'.format(epoch + 1, loss))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

model.save_weights(checkpoint_prefix.format(epoch=epoch))

tf.train.latest_checkpoint(checkpoint_dir)
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 251 Batch 0 Loss 0.04342662915587425
Epoch 251 Loss 0.0911531075835228
Time taken for 1 epoch 0.13294124603271484 sec

Epoch 252 Batch 0 Loss 0.042454976588487625
Epoch 252 Loss 0.08568805456161499
Time taken for 1 epoch 0.1099553108215332 sec

Epoch 253 Batch 0 Loss 0.043992504477500916
Epoch 253 Loss 0.09024076163768768
Time taken for 1 epoch 0.11803007125854492 sec

Epoch 254 Batch 0 Loss 0.04118701443076134
Epoch 254 Loss 0.09083137661218643
Time taken for 1 epoch 0.11141562461853027 sec

Epoch 255 Batch 0 Loss 0.04077591374516487
Epoch 255 Loss 0.09088481962680817
Time taken for 1 epoch 0.13321232795715332 sec

Epoch 256 Batch 0 Loss 0.042754337191581726
Epoch 256 Loss 0.08252063393592834
Time taken for 1 epoch 0.11565089225769043 sec

Epoch 257 Batch 0 Loss 0.03834758326411247
Epoch 257 Loss 0.08907270431518555
Time taken for 1 epoch 0.12285661697387695 sec

Epoch 258 Batch 0 Loss 0.039091020822525024
Epoch 25

In [23]:
add = temtxt.split(r"/").repleace(".txt", " Says: ")
print(add + ecolist generate_text(model, start_string=u"life "))



life rarke in Drug use remains widespread and, crucially, the social costs and personal harms are still large. Prork. Worse, exposing people who use drugs to the criminal world and prisons because of drug use, or nefits are important, they are difficult to measure, they may be realised in different timeframes to the world to take a health-focused approach to reduce the harm from the problematic use of drugs. The use and possession of all illicit drugs but to keep their supply illegal average rent is over a quarter of gross income of an average family.  
Renting has not traditionally National and Labour policies – reducing supply. is uncertain.
The fundamentals still suck
Whatever the noise in the housing market, the problem ise in health spending needs to happen now, regardless of other policy.
Conservatively, we estimate t outcomes for people who avoid convictions, and better life outcomes for their children. These are he US suggests that, while recreational use rises after legali