In [1]:
!pip install nvidia-ml-py3



In [2]:
!nvidia-smi

Fri Jun  7 04:39:26 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   68C    P8              11W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [3]:
from pynvml import *

def print_gpu_utilization():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")

In [4]:
def activations_memory(num_layers, seq_len, batch_size, hidden_dim, num_heads, bits_per_param=4):
    "Amount of RAM (in GB) required to store intermediate activations for a Transformer Encoder"
    memory_in_bytes = num_layers * bits_per_param * batch_size * seq_len * hidden_dim * (
        16 + 2/bits_per_param + 2*num_heads*seq_len/hidden_dim + num_heads*seq_len/(bits_per_param*hidden_dim))
    return "RAM required in GB: " + str(round(memory_in_bytes / 10**9, 2))

In [5]:
activations_memory(12, 1024, 4, 768, 12, 4)

'RAM required in GB: 7.93'

In [6]:
import pandas as pd
import tensorflow as tf
import numpy as np
import time
import os

In [7]:
base_path = "/content/drive/MyDrive/Colab Notebooks/Projects/Project - Large Language Model"

model_path = base_path + "/transformer_decoder.py"
data_path = base_path + "/python-tokenized-data"

### Import model code

In [8]:
execfile(model_path)

### Define model config

In [9]:
star_coder_tokenizer_vocab_size = 49152
transformer_decoder_dim = 768

config = {
    "num_heads": 12,
    "is_masked": True,
    "embedding_dim": transformer_decoder_dim,
    "hidden_dim": transformer_decoder_dim,
    "first_layer_size": 4 * transformer_decoder_dim,
    "droptout_rate": 0.2,
    "vocab_size": star_coder_tokenizer_vocab_size,
    "max_seq_length": 1024,
    "num_decoder_blocks": 12
}

In [10]:
# try:
#     resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu="")
#     tf.config.experimental_connect_to_cluster(resolver)
#     tf.tpu.experimental.initialize_tpu_system(resolver)
#     strategy = tf.distribute.TPUStrategy(resolver)
# except ValueError:
#     print("TPU not found")

# with strategy.scope():
#     model = TransformerDecoderModel(config)

### Initialize and compile model

In [11]:
model = TransformerDecoderModel(config)

loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

model.compile(loss=loss, optimizer=optimizer)

### Define training callbacks

In [12]:
model_save_path = base_path + "/transformer_decoder_model.keras"
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=model_save_path,
                                                               monitor="loss",
                                                               save_best_only=True)

date_and_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
tensorboard_logs_dir = base_path + "/tensorboard_logs/" + date_and_time
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=tensorboard_logs_dir,
                                                      histogram_freq=1)

### Model training

In [None]:
data_files = os.listdir(data_path)

for data_file in data_files:
    data_file_subdir = data_path + "/" + data_file
    x_y_files_in_dir = os.listdir(data_file_subdir)
    num_x_y_files = len(x_y_files_in_dir) // 2
    for i in range(1, num_x_y_files + 1):
        x_file_path = "x_" + data_file + "_part_" + str(i)
        y_file_path = "y_" + data_file + "_part_" + str(i)

        x_data_path = data_file_subdir + "/" + x_file_path + ".csv"
        y_data_path = data_file_subdir + "/" + y_file_path + ".csv"

        x_train = pd.read_csv(x_data_path, dtype=np.int32)
        y_train = pd.read_csv(y_data_path, dtype=np.int32)

        model.fit(x_train,
                  y_train,
                  epochs=1,
                  batch_size=4,
                  callbacks=[model_checkpoint_callback, tensorboard_callback])

