# <center>Critical AI</center>
<center>ENGL 54.41</center>
<center>Dartmouth College</center>
<center>Winter 2026</center>
<pre>Created: 02/18/2026

In [None]:
from openai import OpenAI
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.cluster import KMeans
from matplotlib import pyplot as plt

In [None]:
model_name = "openai.gpt-oss-120b"
api_key = "API_KEY_GOES_HERE"

client = OpenAI(base_url="https://chat.dartmouth.edu/api", 
                api_key=api_key)

In [None]:
# generation function -- this will submit our prompt to the API with a system prompt.
def generate(prompt):
    chat_completion = client.chat.completions.create(
        model = model_name,
        temperature = 0.25,
        messages = [{"role": "user", "content": prompt}],
        stream = False)
    return chat_completion

In [None]:
# Example: Prompting for completion
prompt = """Instructions: please complete the following text. Use only what you have seen in your training data.

The woods are lovely, dark and deep,   
But I have promises to keep,
"""

output = generate(prompt)

In [None]:
# display response
response = output.choices[0].message.content
print(response)

In [None]:
# Example: Masked Prompting for completion
prompt = """Instructions: In the following replace [MASK] with the correct word. Use only a single word.
Make sure to use only the words found from samples in your training data. You must make a guess, 
even if you are uncertain.

Example: 

Input: Whenever Richard [MASK] went down town,
Output <token>Cory</token>

Input: But a caged [MASK] stands on the grave of dreams
Output:"""

output = generate(prompt)
response = output.choices[0].message.content
print(response)

In [None]:
prompt = """What is the next token. Provide just a single word. 
Dartmouth College's mission is to educate promising students and prepare them for a 
lifetime of learning and responsible"""
output = generate(prompt)
response = output.choices[0].message.content
print(response)

## Iterative Generation

In [None]:
# define prompt for iteration
prompt = "Write a short story about a student's first year attending Dartmouth College. Use normal paragraph structure. Include experiences and events in each of the three major terms: fall, winter, and spring."
iterations = 10 

# create list to store generated texts.
outputs = list()

# iterate through and save stories to output list
for i in range(iterations):
          outputs.append(generate(prompt).choices[0].message.content)

In [None]:
# vectorize and create document-term matrix for modeling
vectorizer = CountVectorizer(input='content',
                             strip_accents='unicode',
                             stop_words='english')
dtm = vectorizer.fit_transform(outputs)
idx2voc = {v:k for k, v in vectorizer.vocabulary_.items()}

In [None]:
vocab_sums = dtm.sum(axis=0)
sorted_vocab = [(v, vocab_sums[0, i]) for v, i in vectorizer.vocabulary_.items()]
sorted_vocab = sorted(sorted_vocab, key = lambda x: x[1], reverse=True)

# display top twenty-five words
for i in range(25):
    print(sorted_vocab[i][0],"=>",sorted_vocab[i][1])

In [None]:
# small helper function to examining presence of specific words
def term_debug(term):
    if term in vectorizer.vocabulary_:
        idx = vectorizer.vocabulary_[term]
    else:
        print("Error: {0} not on vocabulary".format(term))
        return
    tc = int(np.sum(dtm,axis=0)[:, idx].item())
    tm = float(np.mean(dtm,axis=0)[:, idx].item())
    return pd.DataFrame({'Total Count':tc,'Mean Count':tm},  index=[term])

In [None]:
term_debug('library')

In [None]:
kmeans = KMeans(n_clusters=3, random_state=0, n_init="auto")
kmeans.fit(dtm)

labels = kmeans.labels_
centroids = kmeans.cluster_centers_ 

svd = TruncatedSVD(n_components=2, random_state=0)
dtm_svd = svd.fit_transform(dtm)           
clusters_svd = svd.transform(centroids)      

plt.scatter(dtm_svd[:, 0], dtm_svd[:, 1], c=labels, alpha=0.5)
plt.scatter(clusters_svd[:, 0], clusters_svd[:, 1], 
            marker='o', 
            s=150, 
            edgecolor='black')
plt.title("k-Means Clustering of DTM")
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.show()

In [None]:
from IPython.display import display, HTML
for story in outputs:
  display(HTML('<div>' + story + '</div>'))
  display(HTML('<hr>'))