# Pagero 3rd Line GPT

### Import general libraries

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import sys

# !pip install tensorflow==2.11.0
# !pip install tensorflow==2.14.0
import tensorflow as tf

tf.random.set_seed(1234)
AUTO = tf.data.experimental.AUTOTUNE

# !pip install tensorflow-datasets==4.1.0
import tensorflow_datasets as tfds

import os
import re
import numpy as np
from time import time
import pandas as pd



print("Tensorflow version {}".format(tf.__version__))

### Initialize TPU

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU {}'.format(tpu.cluster_spec().as_dict()['worker']))
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

print("REPLICAS: {}".format(strategy.num_replicas_in_sync))

### HyperParameters

In [None]:
ATTEMPT = 1
# Maximum sentence length, subject+ description word length
MAX_LENGTH = 20 

# Maximum number of samples to preprocess
MAX_SAMPLES = 70000 # 0 = for All of data otherwise mention the size

# Cut off value of words in the dictionary
TRESHOLD_VALUE = 1

# For tf.data.Dataset
BATCH_SIZE = 8 * strategy.num_replicas_in_sync
BUFFER_SIZE = 20000 #Shuffle data in the dataset

# For Transformer
NUM_LAYERS = 2
D_MODEL = 512
NUM_HEADS = 16
UNITS = 512
DROPOUT = 0.3

EPOCHS = 4
TRAINING_RATIO = 0.9

### Prepare dataset

In [None]:
import pickle

with open('data.pkl', 'rb') as file:
    loaded_data = pickle.load(file)

tickets = loaded_data['questions']
teams = loaded_data['answers']
VOCAB_SIZE = loaded_data['VOCAB_SIZE']

In [None]:
import math

training_len = math.floor(len(tickets)*TRAINING_RATIO)

train_questions = tickets[:training_len]
train_answers = teams[:training_len]

test_questions = tickets[training_len:]
test_answers = teams[training_len:]

In [30]:
VOCAB_SIZE

100

In [None]:
def make_dataset(data_questions, data_answers):
    dataset = tf.data.Dataset.from_tensor_slices((
        {
            'inputs': data_questions,
        },
        {
            'outputs': data_answers
        },
    ))

    dataset = dataset.cache()
    dataset = dataset.shuffle(BUFFER_SIZE)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    return dataset

In [None]:
train_dataset = make_dataset(train_questions,train_answers)
test_dataset = make_dataset(test_questions,test_answers)

### Set Vocab size

### Feed to transformer

In [32]:
from LossFunction import loss_function
from transformer.Transformer import transformer
from CustomScheduler import CustomSchedule

# clear backend
tf.keras.backend.clear_session()

learning_rate = CustomSchedule(D_MODEL)

optimizer = tf.keras.optimizers.Adam(
    learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

def accuracy(y_true, y_pred):
    # ensure labels have shape (batch_size, MAX_LENGTH - 1)
    y_true = tf.reshape(y_true, shape=(-1, MAX_LENGTH - 1))
    return tf.keras.metrics.sparse_categorical_accuracy(y_true, y_pred)

# initialize and compile model within strategy scope
with strategy.scope():
    model = transformer(
        vocab_size=VOCAB_SIZE,
        num_layers=NUM_LAYERS,
        units=UNITS,
        d_model=D_MODEL,
        num_heads=NUM_HEADS,
        dropout=DROPOUT)

    model.compile(optimizer=optimizer, loss=loss_function, metrics=[accuracy])

model.summary()

TypeError: MultiHeadAttention.call() missing 1 required positional argument: 'value'

### Train the model

In [None]:
start_time = time()
history = model.fit(train_dataset, validation_data= test_dataset, epochs=EPOCHS )
filename = 'weights.h5'
model.save_weights(filename)
training_time = time() - start_time

### Evaluate model

In [None]:
y= 0.15
r= .02
params_print = ["ACCURACY :"+ str(history.history['accuracy'][-1]),
                "VAL_ACCURACY :"+ str(history.history['val_accuracy'][-1]),
                "------ TRANSFORMER---------- :",
                "UNITS :"+ str(UNITS),
                "LAYERS :"+ str(NUM_LAYERS),
                "DROPOUT :"+ str(DROPOUT),
                "D_MODEL :"+ str(D_MODEL),
                "NUM_HEADS :"+ str(NUM_HEADS),
                "------ DATASET---------- :",
                "BATCH_SIZE :"+ str(BATCH_SIZE),
                "TRAINING_RATIO :"+ str(TRAINING_RATIO),
                "BUFFER_SIZE :"+ str(BUFFER_SIZE),
                "------ TRAINING---------- :",
                "EPOCHS:"+ str(EPOCHS),
                "TIME:"+ str(format(training_time,".3f")),
                "TIME:"+ str(training_time//60)+" m"+ str(training_time%60)+" s",
                "REPLICAS:"+ str(strategy.num_replicas_in_sync)
                ]