![RadioGPT Banner](https://openfileserver.chloelavrat.com/workshops/RadioGPT/assets/radiogpt-banner.png)



> 💡 **PLEASE CONNECT USING A GPU SESSION FOR MORE COMPUTE POWER** :
>
> `Runtime > Change runtime type > T4 GPU > Save`


In [6]:
#@title Initialize the notebook
!git clone https://github.com/chloelavrat/RadioGPT.git > /dev/null 2>&1
!cd RadioGPT && git checkout clavrat/first-version > /dev/null 2>&1
!pip install torch datasets tqdm transformers > /dev/null 2>&1

# 📀 Dataset Overview

In [8]:
#@title Select your radio dataset
#@markdown Please select your favorit Radio Station ! Then run the cell to load the dataset 🌱
import os
import subprocess
from RadioGPT.gptmodel.core.dataset import AlpacaDataset

radio_station = 'France Inter' # @param ["France Inter", "Mouv’", "France Culture"]

def download_dataset(url, destination):
    print("Downloading dataset...")
    os.makedirs("dataset", exist_ok=True)
    subprocess.run(["wget", url, "-O", destination])
    print("Dataset downloaded!")

base_url = "https://openfileserver.chloelavrat.com/workshops/RadioGPT/dataset/"

if radio_station == 'France Inter':
  block_size = 64
  file = "Acquiesce_data_110k_instructions.json"
  destination = "dataset/inter.json"

if radio_station == 'Mouv’':
  print("bli")
  # load Mouv’ dataset

if radio_station == 'France Culture':
  print("bli")
  # load France Culture dataset

download_dataset(base_url+file, destination)
dataset = AlpacaDataset(destination, block_size)
print("Dataset loaded !")

Downloading dataset...
Dataset downloaded!
Loaded 110368 conversations
Maximum sequence length: 64
Dataset loaded !


# 🧠 Model definition

In [9]:
#@title Loading ...
# Load RadioGPT's checkpoint :)
from RadioGPT.gptmodel.core.model import GPTlite
from RadioGPT.gptmodel.core.utils import load_model
import os, subprocess, torch
# get device
device = (
    torch.device("mps") if torch.backends.mps.is_available() else
    torch.device("cuda") if torch.cuda.is_available() else
    torch.device("cpu")
)

# downloading model
print("Downloading model...")
os.makedirs("models", exist_ok=True)
subprocess.run(["wget", "https://openfileserver.chloelavrat.com/workshops/RadioGPT/models/model_gpt_chat_best.pth", "-O", "models/model_gpt_chat_best.pth"])

def load_model(model_path, device, config):
    # Load the model checkpoint
    checkpoint = torch.load(
        model_path, map_location=device, weights_only=False)

    model = GPTlite(config)
    model.load_state_dict(checkpoint)

    return model

# Hyperparameters
block_size = 64
n_embd = 512
n_head = 8
n_layer = 10
dropout = 0.2

config = {
    'context_size': block_size,
    'vocab_size': dataset.vocab_size,
    'embedding_dim': n_embd,
    'num_heads': n_head,
    'num_layers': n_layer,
    'dropout': dropout
}

# Loading model in memory
print("Loading model...")
model = load_model("models/model_gpt_chat_best.pth", device, config)
model = model.to(device)
print("Model loaded !")

# Get the total number of parameters in the model
total_params = sum(p.numel() for p in model.parameters())

print(f"You will use your {device}")
print(f"Total number of parameters   {total_params / 1e6:.1f}M")

Downloading model...
Loading model...
Model loaded !
You will use your cpu
Total number of parameters   83.1M


# ⛳️ RadioGPT Training



In [10]:
# training parameters
learning_rate = 1e-3
epochs = 1000
batch_size = 128

In [None]:
#@title Casual Training Loop
import torch
import torch.nn.functional as F
from tqdm import tqdm

# Initialize the optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.1, betas=(0.9, 0.999))

# Initialize the scaler
scaler = torch.amp.GradScaler()

# Trinaing
import torch
import torch.nn.functional as F
from tqdm import tqdm

print("Let's fry some eggs!! (your loss should be less than 5, restart cell if not...)")
print("-" * 5)

# Training loop with tqdm progress bar
pbar = tqdm(range(epochs), desc="Training", ncols=120)

# Trining Loop
for steps in pbar:
    # Evaluation loop
    @torch.no_grad()
    def eval_loss():
        # Get a batch of validation data
        idx, targets = dataset.get_batch(batch_size)
        idx, targets = idx.to(device), targets.to(device)

        # Forward pass
        logits, loss = model(idx, targets)

        # Print evaluation loss on the right side of the tqdm bar
        pbar.set_postfix(eval_loss=f"{loss.item():.2f}")
        return loss

    # Get a batch of training data
    idx, targets = dataset.get_batch(batch_size)
    idx, targets = idx.to(device), targets.to(device)

    # Forward pass
    logits, loss = model(idx, targets)

    # Backward pass with gradient scaling
    scaler.scale(loss).backward()
    scaler.step(optimizer)
    scaler.update()

    # Update the tqdm description with loss value
    if steps % 100 == 0:
        pbar.write(f" Eval {eval_loss().item():.2f}")

    pbar.set_postfix(loss=f"{loss.item():.2f}")


# 🎨 Let's Play

In [None]:
#@title Selected prompt

# play with the model
def generate_response(model, dataset, prompt, device, max_new_tokens):
    model.eval()
    # Encode the prompt
    input_tensor = dataset.encode(prompt).unsqueeze(0).to(device)

    # Generate text using the model's generate method
    with torch.no_grad():
        generated_indices = model.generate(input_tensor, max_new_tokens)
        generated_text = dataset.decode(generated_indices[0].tolist())

    # Return only the newly generated part (after the prompt)
    return generated_text[len(prompt):]


prompt = 'Prépare une recette de pâtes à la carbonara.' # @param ["Prépare une recette de pâtes à la carbonara.", "Quel est l'élément chimique avec le numéro atomique 29 ?", "Rédige un court paragraphe sur le thème de l'amitié et de la confiance."]
max_new_tokens = 106 # @param {type:"slider", min:5, max:500, step:1}


prompt = f"Question: {prompt}\nAnswer:"
prompt = prompt + generate_response(model, dataset, prompt, device, max_new_tokens)
print(prompt)

In [None]:
#@title Let's prompt it!

# play with the model
def generate_response(model, dataset, prompt, device, max_new_tokens):
    model.eval()
    # Encode the prompt
    input_tensor = dataset.encode(prompt).unsqueeze(0).to(device)

    # Generate text using the model's generate method
    with torch.no_grad():
        generated_indices = model.generate(input_tensor, max_new_tokens)
        generated_text = dataset.decode(generated_indices[0].tolist())

    # Return only the newly generated part (after the prompt)
    return generated_text[len(prompt):]


prompt = 'Décris les différences entre le modèle GPT-2 et le modèle GPT-3.' # @param {type:"string"}
max_new_tokens = 180 # @param {type:"slider", min:5, max:500, step:1}


prompt = f"Question: {prompt}\nAnswer:"
prompt = prompt + generate_response(model, dataset, prompt, device, max_new_tokens)
print(prompt)