# <center>Critical AI</center>
<center>ENGL 54.41</center>
<center>Dartmouth College</center>
<center>Winter 2026</center>
<pre>Created: 02/18/2026

In [None]:
from openai import OpenAI
import numpy as np
import requests
from pprint import pprint
import matplotlib
from IPython.display import display, HTML
import matplotlib.colors as mcolors

## OpenAI API and Dartmouth Chat

In order to use this API, we'll need to obtain our API key from DartmoutChat. 
1. Visit https://chat.dartmouth.edu and login
2. Click on the person icon (far right, top corner)
3. Select "Settings"
4. Select "Account"
5. Select API keys. Click "Show" and cut-and-pase string below (in quotes) as the value of api_key

In [None]:
# we'll test this by select the GPT OSS 120B model. This is OpenAI's "open weights" model. 
# it is running locally on GPU hardware at Dartmouth. Nothing leaves campus. There are no
# token limits with using this local model.

model_name = "openai.gpt-oss-120b"
api_key = "API_KEY_GOES_HERE"

client = OpenAI(base_url="https://chat.dartmouth.edu/api", 
                api_key=api_key)

In [None]:
# let's make sure that we are authenticated correctly and obtain a list of models. Running
# this cell should display a list of models. You'll see some familiar ones here along with 
# some others that are more specialized and less well-known.
response = requests.get(
    "https://chat.dartmouth.edu/api/models",
    headers={"Authorization": "bearer " + api_key}, 
)
models = sorted([model['id'] for model in response.json()["data"] ])
pprint(models)

In [None]:
# generation function -- this will submit our prompt to the API with a system prompt.
def generate(prompt):
    chat_completion = client.chat.completions.create(
        model = model_name,
        messages = [
            {"role": "system", "content": "You are a helpful assistant. You are also a creative writer." },
            {"role": "user", "content": prompt}
        ],
        logprobs = True,
        stream = False)
    return chat_completion

# we'll use this function later to help us interpret generated tokens.
def colorize(logprobs):
    tokens = [l[0] for l in logprobs]
    color_array = np.array([l[1] for l in logprobs])
    color_array = np.exp(color_array)
    norm = mcolors.Normalize(vmin=color_array.min(), vmax=color_array.max())
    scaled = 1 - norm(color_array)
    cmap = matplotlib.colormaps['Greys']
    template = '<span class="barcode"; style="color: black; background-color: {}">{}</span>'
    colored_string = ''
    for tk, color in zip(tokens, scaled):
        color = matplotlib.colors.rgb2hex(cmap(color)[:3])
        colored_string += template.format(color, '&nbsp' + tk + '&nbsp')
    return colored_string

In [None]:
# save output for processing its individual components separately
output = generate("Write a short story about the Robert Frost statue at Dartmouth College.")

In [None]:
# display "usage" information. How many tokens were generated? How many did we submit 
# as part of prompt?
output.usage

In [None]:
# if our model is a "reasoning" model, the trace will be returned separately
reasoning_trace = output.choices[0].message.reasoning_content

# message content is the output after the end of the reasoning trace.
response = output.choices[0].message.content

In [None]:
# Let's view the reasoning trace first to see how the model has been fine-tuned to insert 
# additional "prompts" in the form of these generated tokens:
print(reasoning_trace)

In [None]:
# And here is our response after that preamble reasoning trace:
print(response)

In [None]:
# We've requested that the model return "logprobs" for the generated output. This will give 
# us some sense of the uncertainty in the outputs. Lower values will indicate a lower 
# probability. These have been preprocessed by softmax for us through the API on the
# server side. We'll collect the tokens and their probabilities.
logprobs = [[ctlp.token, ctlp.logprob] for ctlp in output.choices[0].logprobs.content]

In [None]:
display(HTML(colorize(logprobs)))