In [40]:

import sys
import os
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.optim import AdamW
import yaml

project_root = os.path.dirname(os.path.abspath(__file__))
src_path = os.path.join(project_root, 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

from model import MiniGPT
from utils import *

CONFIG_PATH = 'config.yaml'


In [41]:
try :
  with open('config.yaml','r') as f:
    config = yaml.safe_load(f)
except FileNotFoundError :
  print(f"Error: Configuration file not found at {CONFIG_PATH}")
  exit()
except Exception as e :
  print(f"Error loading configuration file: {e}")
  exit()




In [None]:
corpus_path = config.get('corpus_path', 'data/sample.txt')
train_split = config.get('split', 0.9)

# Model parameters
context_len = config.get('context_len', 128)
n_emb = config.get('n_emb', 384)
n_heads = config.get('n_heads', 6)
n_layers = config.get('n_layers', 6)

# Training parameters
batch_size = config.get('batch_size', 64)
train_iterations = config.get('train_iterations', 5000)
learning_rate = config.get('learning_rate', 3e-4)


/content/drive/MyDrive/GPT


In [42]:
model_save_dir = os.path.join(project_root, os.path.dirname(config.get('model_load_path', 'model/mini_gpt_model.pth')))
model_save_filename =  'mini_gpt_model.pth' # you can change the name if you want to save the model weights with a different name.
model_save_path = os.path.join(model_save_dir, model_save_filename)
full_corpus_path = os.path.join(project_root, corpus_path)

head_size = n_emb // n_heads
device = torch.cuda.is_available()

In [37]:
torch.manual_seed(146432)

if device == 'cuda':
    torch.cuda.manual_seed(146432)


/content/drive/My Drive/GPT
/content/drive/MyDrive/GPT


In [None]:
text = load_data(corpus_path)
vocab_size,stoi,itos,enc_data=encode_data(text)
x_train,x_val=train_test_split(enc_data,0.9)

'/content/drive/MyDrive/GPT'

In [None]:
model = MiniGPT(vocab_size,n_emb,head_size,n_heads,context_len,stoi,itos,encode,decode,n_layers)
model = model.to(device)
opt = AdamW(model.parameters(),lr=learning_rate)
scaler = GradScaler()


for i in range(train_iterations) :

  x_train_batch,y_train_batch = generate_batch(x_train,batch_size,context_len)
  logits,loss=model(x_train_batch,y_train_batch)
  opt.zero_grad()
  scaler.scale(loss).backward()
  scaler.step(opt)
  scaler.update()
  model.eval()
  with torch.no_grad():  # Disable gradient calculation during validation
      x_val_batch, y_val_batch = generate_batch(x_val, batch_size, context_len)
      logits_val, loss_val = model(x_val_batch, y_val_batch)
  model.train()

  if i%500 == 0 :
    print(f'-train--loss : {loss},\n -val--loss : {loss_val}')

print('training_done')

'/content/train.ipynb'

In [None]:
os.makedirs(os.path.dirname(model_save_path), exist_ok=True)
torch.save(model.state_dict(), model_save_path)
print("Model saved successfully.")