<a href="https://colab.research.google.com/github/avlis-MMO/Deep_Learning/blob/main/Book_GNN/Book_GNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow
!pip install numpy
!pip install matplotlib
!pip install scipy
!pip install pandas

# **Load Data**

In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import re

In [None]:
# Dataset
path_to_file = ("../content/drive/MyDrive/archive/Book.txt")
text_file = open(path_to_file, encoding="utf8")
text= text_file.read()
text_file.close

print(len(text))
print(text[:50])

# **Tokenize**

In [None]:
# Clean text and take out special char
text = re.sub(r'[^\w\s]', '',text)
#text = text.lower()
vocab = sorted(set(text))
print(vocab)

# Create tokenization
char2idx = {char:num for num, char in enumerate(vocab)}

idx2char=np.array(vocab)

# Function to tokenize text
def text_to_token(text):
  tokenized = []
  for c in text:
    tokenized.append(char2idx[c])
  return tf.convert_to_tensor(tokenized)

text_as_token = text_to_token(text)
print(text_as_token)

In [None]:

print("Text:", text[:13])
print("Token:", text_to_token(text[:13]))

In [None]:
def int_to_text(ints):
  try:
    ints = ints.numpy()
  except:
    pass
  return ''.join(idx2char[ints])

# **Create Training Examples**

In [None]:
seq_length = 100
batch_size = 32
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024
examples_per_epoch = len(text)//(seq_length+1)

In [None]:
def split_input_target(vect_text, seq_length, batch_size):

  idx = np.random.choice(len(vect_text)-1-seq_length, batch_size)
  input_text = []
  target_text = []

  for i in idx:
    input_text.append(vect_text[i:i+seq_length])# Hous
    target_text.append(vect_text[i+1:i+seq_length+1])  # ouse

  x_batch = np.reshape(input_text, [batch_size, seq_length])
  y_batch = np.reshape(target_text, [batch_size, seq_length])

  return x_batch, y_batch

x, y = split_input_target(text_as_token, seq_length, batch_size)



In [None]:
print(x)
print(y)

# **Building Model**

In [None]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
      tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape = [batch_size, None]),
      tf.keras.layers.LSTM(rnn_units, return_sequences=True, recurrent_initializer='glorot_uniform',  recurrent_activation='sigmoid'),
      tf.keras.layers.Dense(vocab_size, activation ='relu')
                               ])
  return model

model = build_model(vocab_size, embedding_dim, rnn_units, batch_size)
model.summary()

# **Creating a Loss Function**

In [None]:
example_batch_predictions = model(x)
print(example_batch_predictions.shape)
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

print("Input:\n", int_to_text(x[0]))
print()
print("Next Char Predictions:\n", int_to_text(sampled_indices))

In [None]:
# see what is an example of a prediction
print(example_batch_predictions)

In [None]:
# see first prediction
print(example_batch_predictions[0])

In [None]:
# see prediction at the first timestep
print(example_batch_predictions[0][0])

In [None]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

example_batch_loss = loss(y, example_batch_predictions)

print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())

# **Compile Model**

In [None]:
optimizer = tf.keras.optimizers.Adam(0.001)

# Create checkpoints
checkpoint_dir = './training_checkpoints'

checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)
print(x.shape)

# **Train Model**

In [None]:
epochs = 100

# Custom training loop
for epoch in range(epochs):
    # Initialize the LSTM states at the beginning of each epoch


    # Iterate through the batches of your training data (x, y)
    for iteration in range(examples_per_epoch):
        # Extract the batch for the current iteration
        x_batch, y_batch = split_input_target(text_as_token, seq_length, batch_size)

        with tf.GradientTape() as tape:
            # Forward pass
            predictions = model(x_batch, training=True)

            # Compute the loss
            batch_loss = loss(y_batch, predictions)

        # Compute gradients
        grads = tape.gradient(batch_loss, model.trainable_variables)

        # Update model parameters
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

    # Save model weights after each epoch
    model.save_weights(checkpoint_prefix.format(epoch=epoch))

    # Print epoch-wise information
    print("Epoch {}/{} - Loss: {:.4f}".format(epoch + 1, epochs, batch_loss.numpy()))

# Training complete
print("Training finished.")

# **Loading the Model**

In [None]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

In [None]:
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

In [None]:
def generate_text(model, start_string):
  num_generate = 800

  input_eval = text_to_token(start_string)
  input_eval = tf.expand_dims(input_eval, 0)

  text_generated = []

  temperature = 2.0


  model.reset_states()
  for i in range(num_generate):
    predictions = model(input_eval)

    predictions = tf.squeeze(predictions, 0)

    predictions = predictions / temperature
    predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

    input_eval = tf.expand_dims([predicted_id], 0)

    text_generated.append(idx2char[predicted_id])
  return (start_string + ''.join(text_generated))

In [None]:
imp = input("Type a starting string: ")
print(generate_text(model, imp))