In [4]:
import pandas as pd
import numpy as np
from tqdm import tqdm

import tensorflow as tf

In [6]:
import sys
sys.path.append('../src')

from positionalencoding import PositionalEncoding
from encoder import Encoder
from transformer import Transformer

SyntaxError: invalid syntax. Perhaps you forgot a comma? (encoder.py, line 13)

In [1]:
TRAIN_PADDED = '../data/clean/train_padded.csv'
VAL_PADDED = '../data/clean/val_padded.csv'

BATCH_SIZE = 32
MAX_SENTENCE = 50

In [7]:
#load and preprocess
def load_preprocess(df_file_path, batch_size=BATCH_SIZE):
    
    # load data from csv
    input_data = pd.read_csv(df_file_path)['inputs'].tolist()
    target_data = pd.read_csv(df_file_path)['targets'].tolist()
    
    # create tensorflow dataset
    dataset = tf.data.Dataset.from_tensor_slices((input_data, target_data))
    
    # processing function
    def preprocess(dat_input, dat_target):
        dat_input = tf.cast(dat_input, tf.int64)
        dat_target = tf.cast(dat_target, tf.int64)
        return dat_input, dat_target
    # apply processing function
    dataset = dataset.map(preprocess)
    
    # batch and prefetch
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    
    return dataset

In [8]:
train_dataset = load_preprocess(TRAIN_PADDED)
val_dataset = load_preprocess(VAL_PADDED)

2024-03-25 15:14:52.472000: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2024-03-25 15:14:52.472034: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2024-03-25 15:14:52.472042: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2024-03-25 15:14:52.472064: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-03-25 15:14:52.472093: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [9]:
def create_mask(input, target):
    # encoding mask
    encoding_padding_mask = tf.cast(tf.math.equal(input, 0), tf.float32)
    encoding_padding_mask = encoding_padding_mask[:, tf.newaxis, tf.newaxis, :]
    
    # decoding mask
    decoding_padding_mask = tf.cast(tf.math.equal(target, 0), tf.float32)
    decoding_padding_mask = decoding_padding_mask[:, tf.newaxis, tf.newaxis, :]
    
    # future mask
    future_mask = tf.linalg.band_part(tf.ones((1, None, None, None)), -1, 0)
    future_mask = tf.maximum(decoding_padding_mask, future_mask)
    
    return encoding_padding_mask, future_mask, decoding_padding_mask

In [10]:
# parameters
num_layers = 2
embedding_dimension = 10
num_heads = 4
ff_dimension = 4
input_vocab_size = 900
target_vocab_size = 900
learning_rate = 0.2
num_epochs = 200

In [11]:
# model set up
model = Transformer(num_layers, embedding_dimension, num_heads, ff_dimension, input_vocab_size, target_vocab_size, max_len_input=MAX_SENTENCE, max_len_output=MAX_SENTENCE)
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
optimizer = tf.keras.optimizers.Adam(learning_rate)

NameError: name 'Transformer' is not defined

In [None]:
# training loop
for epoch in range(num_epochs):
    for inputs, targets in train_dataset:
        # create the masks
        encoding_padding_mask, future_mask, decoding_padding_mask = create_mask(inputs,targets)
        
        with tf.GradientTape() as tape:
            predictions, _, _ = model(inputs, targets, True, encoding_padding_mask, future_mask, decoding_padding_mask)
            loss = loss_object(targets, predictions)
            
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        
    # validation loop
    total_val_loss = 0.0
    num_val_batches = 0
    for inputs, targets in val_dataset:
        encoding_padding_mask, future_mask, decoding_padding_mask = create_mask(inputs, targets)
        predictions, _, _ = model(inputs, targets, False, encoding_padding_mask, future_mask, decoding_padding_mask)
        val_loss = loss_object(targets, predictions)
        total_val_loss += val_loss.np().sum()
        num_val_batches += 1
        
    # print average validation loss
    ave_val_loss = total_val_loss / num_val_batches
    print(f"Epoch {epoch+1}, Validation Loss: {ave_val_loss:.4f}")