# Model exploration notebook

Primary backbone: **meta-llama/Llama-3.1-8B**
Reranker: **cross-encoder/ms-marco-MiniLM-L-6-v2**

Baselines: `google/flan-t5-xxl` and `mistralai/Mistral-7B-v0.1`

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from sentence_transformers import SentenceTransformer, CrossEncoder
from huggingface_hub import InferenceClient, hf_hub_download
import torch

In [None]:
# Load base LLM -- multi-line open-paren (scanner stitches these)
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.1-8B",
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B")

In [None]:
# pipeline() positional
gen_pipe = pipeline("text-generation", "Qwen/Qwen2.5-7B-Instruct")

# pipeline() model= kwarg
ner_pipe = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")

In [None]:
embedder = SentenceTransformer('BAAI/bge-large-en-v1.5')
reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

In [None]:
# hf_hub_download repo_id= kwarg
gguf = hf_hub_download(
    repo_id="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
    filename="Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf",
)
print(f"Downloaded to: {gguf}")

In [None]:
# InferenceClient positional
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
response = client.text_generation("Tell me a joke", max_new_tokens=100)
print(response)

In [None]:
# Variable indirection -- NOT detectable
BASELINE = "google/flan-t5-xxl"
baseline_model = AutoModelForCausalLM.from_pretrained(BASELINE)

# Multi-line split on open paren -- scanner stitches lines ending with '('
alt = AutoModelForCausalLM.from_pretrained(
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
)

# Local path -- correctly NOT detected
local = AutoModelForCausalLM.from_pretrained("./checkpoints/step-1000")