In [None]:
from openai import OpenAI
import json

CHALLENGE: Using the prompt engineering tools you learned this week, design a prompt that extracts something you might want to know about the article.
This should be something you might want to convert into data to analyse later, like political slant, sentiment, or a rating of the article's quality

In [None]:
system_prompt = """
   YOUR PROMPT HERE
"""

In [None]:
# Read the article content from the file
with open('./section-2/article.txt', 'r', encoding='utf-8') as file:
    article_content = file.read()

In [None]:
# Create a chat completion request to analyze the article
completion = client.chat.completions.create(
    model="gpt-4o",
     messages=[
         #might need to add something here?
        {"role": "user", "content": "Analyze this article as instructed:\n" + article_content}
    ],
    #or sometihng here? does this change with what model you use?
)

In [None]:
# Parse the response and print the JSON object
response_content = completion.choices[0].message.content
print("The model returned:" + response_content)


# Section 2: Article Analyzer

This Colab notebook analyzes the tarrif article using OpenAI's **Responses API** .
It demonstrates **structured outputs** (JSON Schema) so you can convert model judgements into clean data (e.g., political slant, sentiment, and article quality).

### What you'll learn
- How to call the Responses API with `response_format={"type":"json_schema", ... , "strict": true}`.
- How to design a robust extraction prompt with clear rubrics to reduce ambiguity.
- How to parse and store the model's JSON into a pandas DataFrame for later analysis.



## 1) Setup

In [9]:
# @title
#@ Title
import requests
import json, pandas as pd
import openai
from openai import OpenAI
import os
from google.colab import userdata

OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

if not OPENAI_API_KEY:
    try:
        import getpass
        OPENAI_API_KEY = getpass.getpass("Enter your OpenAI API key (input hidden): ").strip()
    except Exception:
        raise ValueError("Unable to capture API key input. Set os.environ['OPENAI_API_KEY'] manually.")

if not OPENAI_API_KEY:
    raise ValueError("Missing API key. Please provide a valid OpenAI API key.")

# Make the key available to the SDK
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
openai.api_key = OPENAI_API_KEY

client = OpenAI(api_key=OPENAI_API_KEY)


Loaded from URL: https://raw.githubusercontent.com/calisley/dpi-681/refs/heads/main/section-2/article.txt (5,964 chars)



## 2.0) Working with text in Python

- A useful Python trick for combining strings is using the `+` sign.

For example:



In [10]:
text = "Hello " + "World"

print(text) # -> "Hello World"


HelloWorld


In [11]:
name = "Calvin"


print("Hello " + name) # --> "Hello Calvin#

Hello Calvin


## 2.1) Analyze the article using the Responses API

### First, lets read in the article text. You don't need to understand how this works, but exposed so you can see the article text is stored in a variable called `article_text`


In [13]:
URL = "https://raw.githubusercontent.com/calisley/dpi-681/refs/heads/main/section-2/article.txt"
res = requests.get(URL, timeout=30)
res.raise_for_status()
res.encoding = res.encoding or "utf-8"
article_text = res.text

print(f"Loaded from URL: {URL} ({len(article_text):,} chars)")


Loaded from URL: https://raw.githubusercontent.com/calisley/dpi-681/refs/heads/main/section-2/article.txt (5,964 chars)


### Here, use the prompt engineering tricks we discussed in class to get the model to tell you something about the article

- We use that string concatenation trick here to add it to our input prompt

Play around with a few different system prompts and be ready to discuss what you found.

In [None]:

resp = client.responses.create(
    model="gpt-5-mini",
    instructions="[Your system prompt here]",
    input="Analyze this article" + article_text #String trick we learned
)

print(resp.output_text)



## 3) Large Scale Data Analysis

- For one student or quick experiments, saving to Colab's /content is fine but ephemeral.
- For class use, create a Google Drive folder, e.g. `MyDrive/course_data/section2/`. Keep a stable path.
- If you want students to download a canonical CSV reliably, place it in Drive and share a view-only link, or host it at a public URL (e.g., GitHub raw).

Below is a helper to fetch a CSV from:
- Drive via file ID using `gdown`, or
- URL via `pandas.read_csv(URL)`.


In [None]:

# Example: fetch CSV from Google Drive by file ID (recommended for class distribution)
# 1) Put CSV in Drive, right-click -> "Get link" -> copy the file ID from the URL.
# 2) Paste the ID below.
USE_GDRIVE_CSV = False
GDRIVE_FILE_ID = "1abcDEF_fake_id_example"  # <-- replace with real file ID
CSV_OUT = "/content/my_dataset.csv"

if USE_GDRIVE_CSV:
    import gdown, pandas as pd
    url = f"https://drive.google.com/uc?id={GDRIVE_FILE_ID}"
    gdown.download(url, CSV_OUT, quiet=False)
    df_csv = pd.read_csv(CSV_OUT)
    print(df_csv.head())


In [None]:

# Alternative: load a CSV from a public URL
USE_URL_CSV = False
CSV_URL = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv"  # example

if USE_URL_CSV:
    import pandas as pd
    df_csv = pd.read_csv(CSV_URL)
    print("Loaded CSV from URL:", CSV_URL)
    print(df_csv.head())
