# Day 3
Outlines example

In [2]:
import sys
if 'google.colab' in sys.modules:  # If in Google Colab environment
    # Mount google drive to enable access to data files
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)

    # Installing requisite packages
    !pip install transformers accelerate outlines pydantic pymupdf &> /dev/null

    # Change working directory to day_3
    %cd /content/drive/MyDrive/LLM4BeSci_2025MetaRep/day_3

Mounted at /content/drive
/content/drive/MyDrive/LLM4BeSci_2025MetaRep/day_3


In [3]:
import outlines
from outlines.inputs import Chat
from transformers import AutoModelForCausalLM, AutoTokenizer
from pydantic import BaseModel
from typing import List
import fitz

In [None]:
# huggingface token
hf_token = "YOUR_TOKEN"

# load text gen model and tokenizer
hf_model = AutoModelForCausalLM.from_pretrained("google/gemma-3-1b-it",
                                                device_map="cuda",
                                                token = hf_token)
hf_tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it",
                                             token = hf_token)

# Create the Outlines model
model = outlines.from_transformers(hf_model, hf_tokenizer)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


## Creating a character
Generic example using outlines

In [4]:
prompt = Chat([
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Create a character"}
    ])


class Character(BaseModel):
    name: str
    age: int
    skills: List[str]

In [5]:
result = model(prompt, output_type=Character, max_new_tokens=200)
print(result)

{ "name": "Elowen Blackwood", "age":34, "skills": ["Herbalism", "Tracking", "Woodcarving", "Basic First Aid", "Lockpicking", "Observational Skills"] }


## Retrieving from PDF

Applying to article PDF



In [5]:
# Extract text from PDF
pdf = fitz.open('llm_tutorial.pdf')
tutorial = ""
for page in pdf[8:11]:
    tutorial += page.get_text()

tutorial[:1000]  # Display the first 1000 characters to verify content extraction

'123\nrichly commented code is available in notebook format at\ngithub.com/Zak-Hussain/LLM4BeSci.git, a GitHub reposi-\ntory with instructions for running the code online in a Google\nColab environment. The repository also provides a means of\nkeepingthecodebaseforthistutorialuptodate.Keepinmind\nthat the Hugging Face ecosystem is in active development,\nmaking it likely that speciÔ¨Åc aspects of the code presented\nin this paper will be deprecated by the time of reading. We\nplan to regularly update the GitHub repository and respond to\nupdate requests, which can be submitted as GitHub issues at\ngithub.com/Zak-Hussain/LLM4BeSci/issues/new. For fur-\nther information on Hugging Face, we suggest the Hugging\nFace textbook by Tunstall et al. (2022).\nFeature extraction\nRelating personality measures\nFeature extraction from LLMs is already being leveraged\nin diverse ways to assist research in personality psychol-\nogy (e.g., Abdurahman et al., 2023; Cutler & Condon, 2023;\nWulff & Mata

In [6]:
prompt = Chat([
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "List the model names of the LLMs used in the exercises in the following article:\n\n" + tutorial}
    ])


class LLMs(BaseModel):
    model_names: List[str]

In [7]:
result = model(prompt, output_type=LLMs, max_new_tokens=200)
print(result)

{ "model_names": ["distilbert", "BERT"] }
