In [None]:
import os
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
print(os.listdir('/content/drive/MyDrive/GPT-anvit-opt/data'))

['finance_corpus.txt']


In [None]:
import sys
sys.path.append('/content/drive/MyDrive/GPT-anvit-opt')

In [None]:
!pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m67.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.9.0


In [None]:
import os
import torch
from torch.utils.data import DataLoader
from torch import nn, optim
from tqdm.notebook import tqdm
from model import GPTModel
from gpt_config import GPTConfig
from data_utils import load_tokenizer, load_data
from utils import generate_text, calculate_perplexity
import time


In [None]:
data_files = ["/content/drive/MyDrive/GPT-anvit/data/finance_corpus.txt"]  # your data
tokenizer = load_tokenizer("gpt2")
pad_id = tokenizer.encode("<|pad|>")[0]

In [None]:
block_size = 256
vocab_size = tokenizer.n_vocab
train_dataset, val_dataset = load_data(data_files, tokenizer, block_size=block_size)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=16, num_workers=4, pin_memory=True)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

config = GPTConfig(vocab_size=vocab_size, max_len=1024, n_layer=6, n_head=8, n_embd=512)
model = GPTModel(config).to(device)
model = torch.compile(model)

In [None]:
from torch.amp import autocast, GradScaler


total_start_time = time.time()

epochs = 10
learning_rate = 3e-4
criterion = nn.CrossEntropyLoss(ignore_index=pad_id)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scaler = GradScaler()

for epoch in range(1, epochs + 1):
    start_time = time.time()
    model.train()
    total_loss = 0.0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch}/{epochs}", leave=False)
    for inputs, targets in progress_bar:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        with autocast(device_type='cuda',dtype=torch.float16):
            outputs = model(inputs)
            loss = criterion(outputs.view(-1, outputs.size(-1)), targets.view(-1))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

    end_time = time.time()
    epoch_time = end_time - start_time

    print(f"\nEpoch {epoch} completed in {epoch_time:.2f} seconds.")
    print(f"Total training loss: {total_loss:.4f}")

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch} Training Loss: {avg_loss:.4f}")

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            with autocast(device_type='cuda',dtype=torch.float16):
                outputs = model(inputs)
                loss = criterion(outputs.view(-1, outputs.size(-1)), targets.view(-1))
            val_loss += loss.item()
    avg_val_loss = val_loss / len(val_loader)
    perplexity = torch.exp(torch.tensor(avg_val_loss))
    print(f"Epoch {epoch} Validation Loss: {avg_val_loss:.4f}, Perplexity: {perplexity:.4f}")


total_end_time = time.time()
total_duration = total_end_time - total_start_time
print(f"Total training time: {total_duration:.2f} seconds")


Epoch 1/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 1 completed in 112.39 seconds.
Total training loss: 9887.3820
Peak GPU memory used: 4100.64 MB
Epoch 1 Training Loss: 4.3404
Epoch 1 Validation Loss: 3.3807, Perplexity: 29.3918


Epoch 2/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 2 completed in 82.42 seconds.
Total training loss: 7157.9705
Peak GPU memory used: 4150.07 MB
Epoch 2 Training Loss: 3.1422
Epoch 2 Validation Loss: 2.8819, Perplexity: 17.8484


Epoch 3/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 3 completed in 82.23 seconds.
Total training loss: 6240.1186
Peak GPU memory used: 4098.94 MB
Epoch 3 Training Loss: 2.7393
Epoch 3 Validation Loss: 2.6539, Perplexity: 14.2095


Epoch 4/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 4 completed in 81.75 seconds.
Total training loss: 5690.3147
Peak GPU memory used: 4098.60 MB
Epoch 4 Training Loss: 2.4979
Epoch 4 Validation Loss: 2.5292, Perplexity: 12.5439


Epoch 5/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 5 completed in 82.33 seconds.
Total training loss: 5301.2090
Peak GPU memory used: 4098.60 MB
Epoch 5 Training Loss: 2.3271
Epoch 5 Validation Loss: 2.4384, Perplexity: 11.4542


Epoch 6/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 6 completed in 82.46 seconds.
Total training loss: 5002.2311
Peak GPU memory used: 4098.60 MB
Epoch 6 Training Loss: 2.1959
Epoch 6 Validation Loss: 2.3778, Perplexity: 10.7811


Epoch 7/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 7 completed in 81.74 seconds.
Total training loss: 4761.4781
Peak GPU memory used: 4098.60 MB
Epoch 7 Training Loss: 2.0902
Epoch 7 Validation Loss: 2.3296, Perplexity: 10.2742


Epoch 8/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 8 completed in 81.96 seconds.
Total training loss: 4560.7066
Peak GPU memory used: 4098.65 MB
Epoch 8 Training Loss: 2.0021
Epoch 8 Validation Loss: 2.3032, Perplexity: 10.0066


Epoch 9/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 9 completed in 82.07 seconds.
Total training loss: 4390.6338
Peak GPU memory used: 4098.60 MB
Epoch 9 Training Loss: 1.9274
Epoch 9 Validation Loss: 2.2834, Perplexity: 9.8101


Epoch 10/10:   0%|          | 0/2278 [00:00<?, ?it/s]


Epoch 10 completed in 81.88 seconds.
Total training loss: 4242.4526
Peak GPU memory used: 4098.60 MB
Epoch 10 Training Loss: 1.8624
Epoch 10 Validation Loss: 2.2572, Perplexity: 9.5559
Total training time: 884.40 seconds


In [None]:
torch.save(model.state_dict(), "/content/drive/MyDrive/GPT-anvit-opt/finance_gpt_tik.pth")
print("Model saved to finance_gpt.pth")#model.load_state_dict(torch.load("finance_gpt.pth", map_location=device))


Model saved to finance_gpt.pth


In [None]:
#model.load_state_dict(torch.load("/content/drive/MyDrive/GPT-anvit-opt/finance_gpt.pth", map_location=device))

<All keys matched successfully>

In [None]:
prompt = "Write a financial analysis of promising stocks. Include:\n- A general market overview\n- Mention of specific companies (e.g. IBM)\n- Key financial stats (earnings, revenue, beta)\n- Analyst sentiment\n- Final investment suggestion"

generated = generate_text(model, tokenizer, prompt, max_length=500, top_k=20, device=device)
print(generated)


Write a financial analysis of promising stocks. Include:
- A general market overview
- Mention of specific companies (e.g. IBM)
- Key financial stats (earnings, revenue, beta)
- Analyst sentiment
- Final investment suggestion, but not limited to long-term potential. The historical price action remains unchanged. Despite a high historical price, there are signs that there are other factors driving IBM’s share price growth prospects and share price momentum. While the current share price may hold a fairly volatile period, this may reflect the fundamental factors such as the market’s expectations. After the end of the day, peers the short-term challenges may cause the company to remain weak.

Over the last five years, IBM has achieved a total shareholder return of 183.74%, reflecting a substantial return. This performance was fueled by substantial gain of 15.06%. This performance reflects a challenging market conditions in relation to the company's strategic initiatives which have led to 