In [5]:
!pip install mlflow

Collecting mlflow
  Downloading mlflow-3.9.0-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-skinny==3.9.0 (from mlflow)
  Downloading mlflow_skinny-3.9.0-py3-none-any.whl.metadata (32 kB)
Collecting mlflow-tracing==3.9.0 (from mlflow)
  Downloading mlflow_tracing-3.9.0-py3-none-any.whl.metadata (19 kB)
Collecting Flask-CORS<7 (from mlflow)
  Downloading flask_cors-6.0.2-py3-none-any.whl.metadata (5.3 kB)
Collecting Flask<4 (from mlflow)
  Downloading flask-3.1.2-py3-none-any.whl.metadata (3.2 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.18.3-py3-none-any.whl.metadata (7.2 kB)
Collecting cryptography<47,>=43.0.0 (from mlflow)
  Downloading cryptography-46.0.4-cp311-abi3-manylinux_2_34_x86_64.whl.metadata (5.7 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting huey<3,>=2.5.4 (from mlflow)


In [1]:
# Load model directly

import os, sys
# sys.path += ["/teamspace/studios/this_studio"]

import torch
import random
from torch import nn

from typing import Iterator, Tuple
import json
import numpy as np
import pandas as pd
from gpt2tiny.tokenizer import Tokenizer
import glob
from dataclasses import dataclass
import math
from pytorch_lightning.loggers import MLFlowLogger

from gpt2tiny.model import GPT2, GPTConfig
# from dataset import PreTokDataset
from gpt2tiny.dataset import PreTokDataset 
from gpt2tiny.trainer import GPT2Module, TrainingConfig
import torch.distributed as dist
from typing import Iterator, Tuple
from pathlib import Path
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
import mlflow

mlflow.set_tracking_uri("http://127.0.0.1:5000")

BASE_DIR = "/teamspace/studios/this_studio/gpt2tiny"
DEFAULT_DATA_DIR = f"{BASE_DIR}/data/TinyStories_all_data/"

__file__:  /teamspace/studios/this_studio/gpt2tiny/gpt2tiny/dataset.py


In [2]:
mlf_logger = MLFlowLogger(
    experiment_name="pretok",
    tracking_uri=f"{BASE_DIR}/mlruns",  # Colab-local (ephemeral) filesystem
)

  return FileStore(store_uri, store_uri)


In [3]:
torch.cuda.is_available()

True

In [4]:
config = GPTConfig(flash=True)#,n_layer=1, n_head=2, n_embed=8)

In [5]:
config

GPTConfig(block_size=512, vocab_size=4096, n_layer=8, n_head=8, n_embed=512, dropout=0.2, bias=False, use_rotary=False, flash=True)

In [6]:
trainer_config = TrainingConfig(batch_size=64, num_workers=4)

In [7]:
trainer_config

TrainingConfig(learning_rate=0.0006, max_iters=30000, weight_decay=0.1, beta1=0.9, beta2=0.95, grad_clip=1.0, decay_lr=True, warmup_iters=1000, lr_decay_iters=30000, min_lr=6e-05, eval_interval=100, log_interval=10, eval_iters=200, gradient_accumulation_steps=4, batch_size=64, num_workers=4, device='cuda', dtype='bfloat16', compile=True)

In [8]:
tokenizer = Tokenizer(f"{BASE_DIR}/data/tok4096.model")

In [9]:
train_dataloader = DataLoader(
    PreTokDataset(
        trainer_config.batch_size,
        split="train"
    ),
    batch_size=trainer_config.batch_size,
    num_workers=trainer_config.num_workers,
)

eval_dataloader = DataLoader(
    PreTokDataset(
        trainer_config.batch_size,
        split="validation"
    ),
    batch_size=trainer_config.batch_size,
    num_workers=trainer_config.num_workers,
)

In [10]:
model = GPT2Module(config, tokenizer, gen_every_n_epochs=500)#, gen_max_new_tokens=100)

In [11]:
checkpoint_cb = ModelCheckpoint(
    monitor="val_loss",     # must match the name you log (self.log("val_loss", ...))
    mode="min",
    save_top_k=1,           # keep only the best
    filename="best-{step}-{val_loss:.4f}",
)

In [12]:
trainer = pl.Trainer(
    accelerator="auto",
    devices=1,             # number of GPUs
    precision="16-mixed",  # optional, T4 benefits from AMP    
    max_steps=trainer_config.max_iters,        # total training steps (defines run length)
    val_check_interval=trainer_config.eval_interval,  # run validation every 2k training steps
    limit_val_batches=200,   # cap validation to 200 batches per val loop
    logger=mlf_logger,
    callbacks=[checkpoint_cb],
    log_every_n_steps=trainer_config.log_interval,
    accumulate_grad_batches=trainer_config.gradient_accumulation_steps,
    gradient_clip_val=trainer_config.grad_clip,
)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores


In [None]:
trainer.fit(model, train_dataloader, eval_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/zeus/miniconda3/envs/cloudspace/lib/python3.12/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary.  Estimated model size in MB will not be accurate. Using 32 bits instead.


Output()

In [15]:
model

GPT2Module(
  (model): GPT2(
    (transformer): ModuleDict(
      (wte): Embedding(4096, 512)
      (drop): Dropout(p=0.2, inplace=False)
      (h): ModuleList(
        (0-7): 8 x Block(
          (ln_1): RMSNorm((512,), eps=None, elementwise_affine=True)
          (attn): CausalSelfAttention(
            (c_attn): Linear(in_features=512, out_features=1536, bias=False)
            (c_proj): Linear(in_features=512, out_features=512, bias=False)
            (attn_dropout): Dropout(p=0.2, inplace=False)
            (resid_dropout): Dropout(p=0.2, inplace=False)
          )
          (ln_2): RMSNorm((512,), eps=None, elementwise_affine=True)
          (ffd): FeedForward(
            (w1): Linear(in_features=512, out_features=1365, bias=False)
            (w2): Linear(in_features=1365, out_features=512, bias=False)
            (w3): Linear(in_features=512, out_features=1365, bias=False)
            (dropout): Dropout(p=0.2, inplace=False)
          )
        )
      )
      (ln_f): RMSNorm(

In [18]:
prompt = "There once was a man"
prompt = "A dragon in a cave"
raw_model = model.model
_ = raw_model.eval()
output = model.generate(prompt, 55, top_k=50, top_p=None, tokenizer=tokenizer, temperature=1.5)
print(output)

A dragon in a cave One day, two friends were walking to the cave they noticed many trees and trees. They went over to think it sounded funny and fun to hear what the loud lights diren of planets. It seemed scary and a little bug started to shiver over the dark.
