# GPT‑OSS Active Learning Template — From HF Link to Lesson

[Open in Colab](https://colab.research.google.com/github/daniel-p-green/alain-ai-learning-platform/blob/main/path/to/this.ipynb)

- Outcomes: Turn a Hugging Face model link into an engaging lesson with minimal setup, MCQs, a golden‑set evaluation, and token/latency logging.
- Audience: Practitioners evaluating or teaching local/open models.
- Time: ~45–75 minutes.


## Parameters (Colab form)
Provide model and runtime settings.

(When using Colab, this cell appears as a form; values are stored in variables.)


In [None]:
#@title Model and Runtime
HF_MODEL = 'org/name' #@param {type:'string'}
RUNTIME = 'gpt-oss' #@param ['gpt-oss','transformers']
GPT_OSS_MODEL = 'gpt-oss:20b' #@param {type:'string'}
OPENAI_BASE_URL = 'http://localhost:11434/v1' #@param {type:'string'}
TEMPERATURE = 0.0 #@param {type:'number'}


## Install (pinned)
If running on Colab, uncomment and run.


In [None]:
# !pip -q install openai==1.43.0 transformers==4.44.2 datasets==2.20.0 ipywidgets==8.1.3 requests==2.32.3


## Setup & Seeds
Environment info, seeds, and device checks.


In [None]:
import os, sys, platform, random, time
print('Python:', sys.version)
print('Platform:', platform.platform())

import numpy as np
SEED=42
random.seed(SEED); np.random.seed(SEED)
try:
    import torch
    torch.manual_seed(SEED)
    if torch.cuda.is_available():
        print('CUDA device:', torch.cuda.get_device_name(0))
    else:
        print('CUDA not available')
except Exception as e:
    print('Torch not installed; skipping torch seed.', e)


## Secrets
Keys are read from environment if needed; no hardcoding.


In [None]:
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', 'ollama')
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')
print('Have OPENAI_API_KEY:', bool(OPENAI_API_KEY))


## Quickstart
Run a smallest working example via GPT‑OSS or Transformers based on `RUNTIME`.


In [None]:
def chat_oss(prompt: str, model: str = None, temperature: float = None):
    from openai import OpenAI
    model = model or GPT_OSS_MODEL
    temperature = temperature if temperature is not None else TEMPERATURE
    client = OpenAI(base_url=OPENAI_BASE_URL, api_key=OPENAI_API_KEY)
    t0 = time.time()
    resp = client.chat.completions.create(
        model=model,
        messages=[{'role':'user','content':prompt}],
        temperature=temperature
    )
    dt = time.time()-t0
    txt = resp.choices[0].message.content
    usage = getattr(resp,'usage',None)
    print(txt)
    if usage:
        print('Tokens total:', getattr(usage,'total_tokens',None))
    print(f'Latency: {dt:.2f}s')
    return txt

def infer_transformers(inp: str):
    from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
    try:
        tok = AutoTokenizer.from_pretrained(HF_MODEL)
        mdl = AutoModelForCausalLM.from_pretrained(HF_MODEL)
        gen = pipeline('text-generation', model=mdl, tokenizer=tok)
        out = gen(inp, max_new_tokens=32)[0]['generated_text']
        print(out)
        return out
    except Exception as e:
        print('Transformers fallback failed:', e)
        return ''

_ = chat_oss('Say hello in five words.') if RUNTIME=='gpt-oss' else infer_transformers('Hello, world')


## Guided Steps
1. Explore core parameters (e.g., temperature, max tokens).
2. Add a structured output (JSON) or schema validation.
3. Run a small batch; measure latency and (if exposed) tokens.
4. Optional: simple RAG/fine‑tune illustration for this model family.


## Release Notes & License
Key details to know before you ship or evaluate.


In [None]:
try:
    from huggingface_hub import HfApi
    import re, requests
    api=HfApi()
    info=api.model_info(HF_MODEL)
    print('Last modified:', getattr(info,'lastModified', None))
    print('License:', getattr(info,'license', None))
    url=f'https://huggingface.co/{HF_MODEL}/raw/main/README.md'
    r=requests.get(url,timeout=10)
    if r.status_code==200:
        md=r.text
        def extract(section):
            m=re.search(r'(^|\n)#+\s*'+re.escape(section)+r'[^\n]*\n(.+?)(\n#+|\Z)', md, re.S|re.I)
            return (m.group(2).strip() if m else None)
        for sec in ['Intended Use','Use cases','Limitations','Risks','Training data','Model details','License']:
            txt=extract(sec)
            if txt:
                print(f'--- {sec} ---')
                print('
'.join(txt.splitlines()[:20]))
except Exception as e:
    print('Could not load release notes/license details:', e)


### License Snippet & What It Means
Pulls license file text when available and summarizes practical implications.


In [None]:
try:
    from huggingface_hub import HfApi, hf_hub_download
    import os
    api=HfApi()
    info=api.model_info(HF_MODEL)
    lic=getattr(info,'license',None)
    print('License identifier:', lic)
    path=None
    for name in ['LICENSE','LICENSE.txt','LICENSE.md','LICENSE.MD','license','LICENSE.rst']:
        try:
            path=hf_hub_download(repo_id=HF_MODEL, filename=name, revision='main')
            break
        except Exception:
            pass
    if path and os.path.exists(path):
        text=open(path,'r',encoding='utf-8',errors='ignore').read()
        print('
--- License snippet (first 1500 chars) ---
')
        print(text[:1500])
    else:
        print('License file not found; see model card.')
    summary={
      'mit':'Permissive: commercial use allowed, attribution and license notice required; no warranty.',
      'apache-2.0':'Permissive with patent grant: commercial use allowed; keep NOTICE; mind patents/trademarks.',
      'bsd-3-clause':'Permissive: commercial use allowed; attribution required; no endorsement.',
      'gpl-3.0':'Copyleft: derivatives must be GPL; not suitable for closed-source distribution.',
      'lgpl-3.0':'Weak copyleft: dynamic linking OK; modified library must remain LGPL.',
      'agpl-3.0':'Network copyleft: providing as a service triggers source-sharing obligations.',
      'cc-by-4.0':'Attribution required; commercial use allowed; keep notices.',
      'cc-by-nc-4.0':'Non-commercial: no commercial use permitted; attribution required.',
      'openrail':'Responsible AI License: usage restrictions apply; review terms for safety/commercial limits.',
      'openrail++':'Responsible AI License: stricter usage constraints; check allowed and disallowed uses.',
    }
    key=(lic or '').lower()
    explain=None
    for k,v in summary.items():
        if k in key:
            explain=v; break
    print('
--- What this license means for you ---
')
    if explain:
        print(explain)
    else:
        print('Review the license in the model card. Typical items: attribution, redistribution terms, commercial-use scope, and safety constraints.')
except Exception as e:
    print('License lookup error:', e)


### Intended Use & Limitations (from Model Card)
Quickly scan the most relevant sections from the README.


In [None]:
import re, requests
def fetch_readme(org_model: str, timeout: int = 10):
    url = f'https://huggingface.co/{org_model}/raw/main/README.md'
    try:
        r = requests.get(url, timeout=timeout)
        if r.status_code == 200:
            return r.text
    except Exception as e:
        print('Fetch error:', e)
    return None
def extract_sections(md_text: str, sections):
    out = {}
    for sec in sections:
        m = re.search(r'(^|\n)#+\s*' + re.escape(sec) + r'[^\n]*\n(.+?)(\n#+|\Z)', md_text, re.S | re.I)
        if m:
            out[sec] = m.group(2).strip()
    return out
md = fetch_readme(HF_MODEL)
secs = extract_sections(md or '', ['Intended Use','Use cases','Limitations','Risks','Training data'])
for k,v in secs.items():
    print(f'
--- {k} ---
')
    print('
'.join(v.splitlines()[:30]))


In [None]:
# Structured output hint (Pydantic)
try:
    from pydantic import BaseModel
    class Item(BaseModel):
        title: str
        rating: int
    print('Define JSON schema with Item.model_json_schema() and validate outputs.')
except Exception:
    print('Install pydantic to validate structured outputs.')


## Evaluation (Golden Set)
Deterministic checks for success criteria.


In [None]:
golden = [
    {'prompt':'2+2?','expect':'4'},
    {'prompt':'Capital of France?','expect':'Paris'},
]
ok=0
for ex in golden:
    out = chat_oss(ex['prompt']) if RUNTIME=='gpt-oss' else infer_transformers(ex['prompt'])
    ok += int(ex['expect'].lower() in (out or '').lower())
acc = ok/len(golden)
print(f'Accuracy: {acc:.2%} ({ok}/{len(golden)})')


## MCQ — Understanding Parameters


In [None]:
question = 'Which parameter most reduces randomness?'
options = ['top_p','temperature','max_tokens','presence_penalty']
correct_index = 1
explanation = 'Lower temperature yields more deterministic outputs.'
print(question)
for i,o in enumerate(options):
    print(f'  {i}) {o}')
try:
    import ipywidgets as W
    from IPython.display import display
    dd = W.Dropdown(options=[(o,i) for i,o in enumerate(options)], description='Answer:')
    btn = W.Button(description='Submit')
    out = W.Output()
    def on_click(_):
        with out:
            out.clear_output()
            print('Correct!' if dd.value==correct_index else f'Not quite. {explanation}')
    btn.on_click(on_click)
    display(dd, btn, out)
except Exception:
    choice = int(input('Your choice (0-3): ').strip() or -1)
    print('Correct!' if choice==correct_index else f'Not quite. {explanation}')


## Cost & Observability
Show tokens (if exposed) and latency; run a small batch and summarize.


In [None]:
prompts = ['List 3 cities in 5 words','Name 3 fruits in 5 words']
t0=time.time();
outs=[]
for p in prompts:
    outs.append(chat_oss(p)) if RUNTIME=='gpt-oss' else outs.append(infer_transformers(p))
dt=time.time()-t0
print('Batch latency:', round(dt,2),'s for', len(prompts),'items')


## Exercises
- Add a JSON schema and validate outputs.
- Introduce a tool/function and parse its response.
- Expand the golden set to 20 items and report accuracy.
- Log average latency over 10 trials at two temperatures.


## Troubleshooting
- Connection errors: verify `OPENAI_BASE_URL` and that your server exposes `/models`.
- 401/403: check keys/permissions; never hardcode secrets.
- OOM: reduce sequence length/batch; switch to CPU or smaller model.
- JSON parse errors: strip code fences; validate/repair before loads.


In [None]:
# Diagnostics
import requests
try:
    r = requests.get(OPENAI_BASE_URL + '/models', timeout=5)
    print(r.status_code, r.text[:200])
except Exception as e:
    print('Conn error:', e)
