In [None]:
from gpt2 import GPT124M
from  generate import generate_text
import tiktoken
from batch_config  import token_ids_to_text , text_to_token_ids

In [72]:
GPT_CONFIG_124M = {
    "vocab_size": 50304,
    "context_size": 32,   
    "emb_dim": 32, 
    "n_heads": 4,          
    "n_layers": 4,         
    "drop_rate": 0.5,     
    "qkv_bias": True        
}

In [73]:
model = GPT124M(GPT_CONFIG_124M)

In [41]:
import torch

In [74]:
model.load_state_dict(torch.load('model_final_v2.pth', map_location='cuda'))

<All keys matched successfully>

In [44]:
tokenizer = tiktoken.encoding_for_model("gpt2")

In [75]:
sample = "hi?"

In [76]:
sample = text_to_token_ids(sample, tokenizer)

In [77]:
sample

tensor([[5303,   30]])

In [79]:
out = generate_text(model, sample, GPT_CONFIG_124M['context_size'], 10000, 0.7, 60).to('cuda')

In [80]:
print(token_ids_to_text(out, tokenizer))

hi? Good!

While Sessions is far is not intend about whom he says Eduardo. And Sessions can spoke to ignore him strong,’ss lead in the same nowadays. And it’s ambassador. He says Eduardo. It’s capacity at the Democratic leader Nancy Pelosi proposed a preview of the donation OK to serve as attorney general in the race.
’s favor.


In fact, Sanders was charged by attorney general. He says Eduardo. He says he says Eduardo. He did not going to win this together from a Trump is a large stature believe that was far.
’s ambassador to ignore him any states with G-involved slayings of removing himself himself himself addressed concerns to be criminal harassment and Sanders said this week with Sanders’s one thing to the Democratic leader Chuck Schumer agreed a calendar that we could be employed by Sessions’s ambassador—who should be the Democratic leader Nancy Pelosi proposed a couple of the United States, who are him strong, Sanders’s ambassador.
’s one ad Mook wrote. He says T. He says Eduardo

In [30]:
number_of_params  =  sum(p.numel() for p in  model.parameters())

In [32]:
number_of_params   *  2

30207232

In [37]:
# Dados do seu treino
B = 32  # batch size
T = 256  # sequence length (context_size)
num_batches = 2848

# Tokens processados por época
tokens_por_epoca = B * T * num_batches
print(f"Tokens processados por época: {tokens_por_epoca:,}")
# = 32 × 256 × 2848 = 23,330,816 tokens (~23.3M tokens)

# FLOPs totais por época
parametros = 15_103_616
flops_forward = 6 * parametros * T  # por sequência
flops_por_epoca = flops_forward * (tokens_por_epoca / T) * 3  # ×3 para backward

print(f"\nFLOPs por época: {flops_por_epoca:.2e}")
# = 6.35 × 10¹⁵ FLOPs

# GPU RTX 4050
flops_gpu = 70e12  # 70 TFLOPs

# Tempo teórico (ideal, 100% eficiência)
tempo_teorico = flops_por_epoca / flops_gpu
print(f"Tempo teórico: {tempo_teorico:.1f} segundos = {tempo_teorico/60:.2f} minutos")
# ≈ 90.7 segundos = 1.5 minutos

# Tempo real observado
tempo_real = 37.5 * 60  # 37.5 minutos em segundos
eficiencia = (flops_por_epoca / tempo_real) / flops_gpu * 100

print(f"\n=== COMPARAÇÃO ===")
print(f"Tempo teórico (100%): {tempo_teorico/60:.1f} minutos")
print(f"Tempo real: {tempo_real/60:.1f} minutos")
print(f"Eficiência da GPU: {eficiencia:.2f}%")
print(f"Throughput: {tokens_por_epoca/tempo_real:.0f} tokens/segundo")

Tokens processados por época: 23,330,816

FLOPs por época: 6.34e+15
Tempo teórico: 90.6 segundos = 1.51 minutos

=== COMPARAÇÃO ===
Tempo teórico (100%): 1.5 minutos
Tempo real: 37.5 minutos
Eficiência da GPU: 4.03%
Throughput: 10369 tokens/segundo


In [34]:
# FLOPs do modelo (forward pass completo)
parametros = 15_103_616
seq_length = 256

flops_modelo = 6 * parametros * seq_length
print(f"FLOPs do modelo: {flops_modelo:.2e}")  # 2.32e10 FLOPs

# FLOPs da GPU RTX 4050
flops_gpu = 70e12  # 70 TFLOPs (FP16)

# Tempo
tempo_segundos = flops_modelo / flops_gpu
tempo_minutos = tempo_segundos / 60

print(f"\n=== RESULTADO ===")
print(f"Tempo: {tempo_segundos:.10f} segundos")
print(f"Tempo: {tempo_minutos:.10f} minutos")
print(f"Tempo: {tempo_minutos * 1e6:.4f} microssegundos")

FLOPs do modelo: 2.32e+10

=== RESULTADO ===
Tempo: 0.0003314165 segundos
Tempo: 0.0000055236 minutos
Tempo: 5.5236 microssegundos
