In [1]:
import os
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import sys
sys.path.append('/content/drive/MyDrive/GPT-anvit-opt')

In [3]:
!pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/1.2 MB[0m [31m5.0 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.2/1.2 MB[0m [31m19.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.9.0


In [4]:
import os
import torch
from torch.utils.data import DataLoader
from torch import nn, optim
from tqdm.notebook import tqdm
from model import GPTModel
from gpt_config import GPTConfig
from data_utils import load_tokenizer, load_data
from utils import generate_text
import time


In [5]:
data_files = ["/content/drive/MyDrive/GPT-anvit/data/finance_corpus.txt"]  # your data
tokenizer = load_tokenizer("gpt2")
pad_id = tokenizer.encode("<|pad|>")[0]

In [6]:
block_size = 256
vocab_size = tokenizer.n_vocab
train_dataset, val_dataset = load_data(data_files, tokenizer, block_size=block_size)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=16, num_workers=4, pin_memory=True)

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

config = GPTConfig(vocab_size=vocab_size, max_len=1024, n_layer=6, n_head=8, n_embd=512)
model = GPTModel(config).to(device)
model = torch.compile(model)

In [8]:
from torch.amp import autocast, GradScaler


total_start_time = time.time()

epochs = 10
learning_rate = 3e-4
criterion = nn.CrossEntropyLoss(ignore_index=pad_id)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scaler = GradScaler()

for epoch in range(1, epochs + 1):
    start_time = time.time()
    model.train()
    total_loss = 0.0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch}/{epochs}", leave=False)
    for inputs, targets in progress_bar:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        with autocast(device_type='cuda',dtype=torch.float16):
            outputs = model(inputs)
            loss = criterion(outputs.view(-1, outputs.size(-1)), targets.view(-1))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

    end_time = time.time()
    epoch_time = end_time - start_time

    print(f"\nEpoch {epoch} completed in {epoch_time:.2f} seconds.")
    print(f"Total training loss: {total_loss:.4f}")

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch} Training Loss: {avg_loss:.4f}")

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            with autocast(device_type='cuda',dtype=torch.float16):
                outputs = model(inputs)
                loss = criterion(outputs.view(-1, outputs.size(-1)), targets.view(-1))
            val_loss += loss.item()
    avg_val_loss = val_loss / len(val_loader)
    perplexity = torch.exp(torch.tensor(avg_val_loss))
    print(f"Epoch {epoch} Validation Loss: {avg_val_loss:.4f}, Perplexity: {perplexity:.4f}")


total_end_time = time.time()
total_duration = total_end_time - total_start_time
print(f"Total training time: {total_duration:.2f} seconds")


Epoch 1/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 1 completed in 109.14 seconds.
Total training loss: 9882.4771
Epoch 1 Training Loss: 4.3382
Epoch 1 Validation Loss: 3.3659, Perplexity: 28.9602


Epoch 2/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 2 completed in 78.06 seconds.
Total training loss: 7170.6866
Epoch 2 Training Loss: 3.1478
Epoch 2 Validation Loss: 2.8785, Perplexity: 17.7879


Epoch 3/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 3 completed in 77.84 seconds.
Total training loss: 6248.0356
Epoch 3 Training Loss: 2.7428
Epoch 3 Validation Loss: 2.6500, Perplexity: 14.1536


Epoch 4/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 4 completed in 78.00 seconds.
Total training loss: 5694.7608
Epoch 4 Training Loss: 2.4999
Epoch 4 Validation Loss: 2.5192, Perplexity: 12.4184


Epoch 5/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 5 completed in 77.98 seconds.
Total training loss: 5304.4513
Epoch 5 Training Loss: 2.3286
Epoch 5 Validation Loss: 2.4445, Perplexity: 11.5248


Epoch 6/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 6 completed in 78.03 seconds.
Total training loss: 5003.4903
Epoch 6 Training Loss: 2.1964
Epoch 6 Validation Loss: 2.3841, Perplexity: 10.8497


Epoch 7/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 7 completed in 77.93 seconds.
Total training loss: 4761.3335
Epoch 7 Training Loss: 2.0901
Epoch 7 Validation Loss: 2.3341, Perplexity: 10.3203


Epoch 8/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 8 completed in 78.07 seconds.
Total training loss: 4561.3361
Epoch 8 Training Loss: 2.0023
Epoch 8 Validation Loss: 2.2980, Perplexity: 9.9539


Epoch 9/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 9 completed in 77.84 seconds.
Total training loss: 4390.9755
Epoch 9 Training Loss: 1.9276
Epoch 9 Validation Loss: 2.2975, Perplexity: 9.9489


Epoch 10/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 10 completed in 77.84 seconds.
Total training loss: 4243.2082
Epoch 10 Training Loss: 1.8627
Epoch 10 Validation Loss: 2.2515, Perplexity: 9.5025
Total training time: 842.49 seconds


In [None]:
torch.save(model.state_dict(), "/content/drive/MyDrive/GPT-anvit-opt/finance_gpt_tik.pth")
print("Model saved to finance_gpt.pth")#model.load_state_dict(torch.load("finance_gpt.pth", map_location=device))


Model saved to finance_gpt.pth


In [None]:
#model.load_state_dict(torch.load("/content/drive/MyDrive/GPT-anvit-opt/finance_gpt.pth", map_location=device))

<All keys matched successfully>

In [17]:
prompt = "Which stocks are promising?"

generated = generate_text(model, tokenizer, prompt, max_length=248, top_k=20, device=device)
print(generated)


Which stocks are promising?

Vertex Pharmaceuticals Incorporated (VRTX)

Zacks Rank #2 (Buy) stands against other Renaissance Technologies Holding (FV) stocks with low P/E ratio of 1.06. The forward P/E is unScope, significantly higher than the sector’s mean of 8.47.

Over the past five years, the industry traded as high as 4.49, low as 13.43% annually, indicates growth opportunities continue to grow.

The company’s shares have benefited from continued momentum in its share of this growth. For 2025, the company has achieved an impressive sales trajectory, expanding by more than 25% in the past, suggesting that growth are temporarily buoyed to the stock. As of Feb. 18, 2025, the sector’s forward earnings multiple sits at a P/E of 22.6.41X, higher than 20.Zacks Investment Research

Image Source: Zacks Investment Research

Should VRTX Stock Going to Hold?

The company recently secured a solid growth forecast for 2025. The 2025 adjusted earnings is expected to grow in mid-single digits ove