In [None]:
# Import the functions
from labelgenius import classification_GPT, generate_GPT_finetune_jsonl, finetune_GPT,price_estimation, auto_verification

api_key = "XXX"

Demo 1: Single-Category Classification using N24News Dataset
-------------------------------------------------------------

This demo shows how to classify a single news article into one of 24 category
using the N24News dataset. Each article in the dataset includes both textual
and visual information.

Source: https://aclanthology.org/2022.lrec-1.729/


Each article contains the following fields:
- 'section': Ground truth label (one of 24 category)
- 'headline': Title of the article
- 'abstract': Short summary of the article
- 'article': Full text content
- 'article_url': Link to the original article
- 'image': Encoded image or metadata (optional)
- 'caption': Image caption
- 'image_id': Unique image identifier
- 'img_dir': Path to the associated image (e.g., 'N24News/imgs_200_sample1/12345.jpg')
- 'article_id': Unique article identifier

Image file: Multimodal_image

Example category (See prompt_D1 for the complete category):
------------------------
1. Health
2. Science
3. Television
...
24. Global Business

Reference:
----------
Wang, Z., Shan, X., Zhang, X., & Yang, J. (2022).
N24News: A New Dataset for Multimodal News Classification.
In *Proceedings of the Thirteenth Language Resources and Evaluation Conference* (pp. 6768–6775). LREC.


### Demo 1a: Single-Category Text Classification

**Datasets:**
- `D1_1.csv`: Used for initial labeling and fine-tuning.
- `D1_1.csv`: Used for testing the fine-tuned model's performance.


## GPT: third-party API labeling


In [None]:
# Define the list of 24 category labels
category_D1_GPT = [
    "1", "2", "3", "4", "5", "6",
    "7", "8", "9", "10", "11", "12",
    "13", "14", "15", "16", "17", "18",
    "19", "20", "21", "22", "23", "24"
]


# Define the descriptive prompts for each category
prompt_D1_GPT = ["""You are given a short news article. Based on its content, return the most appropriate category label from the list below. Respond with only a number from 1 to 24, corresponding to the best-matching category:

1. Health – medical news, public health, fitness, mental health, wellness
2. Science – scientific discoveries, research studies, space, innovations
3. Television – TV shows, reviews, industry news, streaming
4. Travel – tourism, destinations, travel guides, airlines, vacation trends
5. Movies – film news, reviews, box office, upcoming releases
6. Dance – ballet, contemporary styles, street dance, performances
7. Real Estate – housing trends, property sales, architecture, urban planning
8. Economy – macroeconomics, inflation, markets, GDP, financial policies
9. Sports – competitions, athletes, Demo_results, professional sports
10. Theater – plays, Broadway, live performances, stage reviews
11. Opinion – editorials, commentary, analysis
12. Music – albums, artists, concerts, festivals, music trends
13. Books – literature, bestsellers, author interviews, book reviews
14. Art & Design – fine arts, museums, exhibitions, visual/design trends
15. Style – fashion trends, beauty, personal style, aesthetics
16. Media – journalism, publishing, digital media, mass communication
17. Food – restaurants, recipes, cooking, culinary culture
18. Well – lifestyle, self-care, mental well-being, personal development
19. Fashion – clothing, designers, fashion weeks, industry insights
20. Technology – AI, gadgets, software, cybersecurity, tech innovations
21. Your Money – personal finance, investing, budgeting, financial planning
22. Education – schools, universities, learning methods, education policies
23. Automobiles – car news, EVs, reviews, industry trends
24. Global Business – international trade, corporations, mergers, global markets

Return the categorty label only (e.g., 5), no extra words."""]





## Price estimation

In [None]:
import os
import time
import pandas as pd
from openai import OpenAI
from tqdm.auto import tqdm


# ── CONFIG ────────────────────────────────────────────────────────────────
os.environ["OPENAI_API_KEY"] = api_key
client = OpenAI()

# ── Testing (copied from OpenAI playground) ───────────────────────────────
response = client.responses.create(
  model="o4-mini",
  input=[
    {
      "role": "developer",
      "content": [
        {
          "type": "input_text",
          "text": """You are given a short news article. Based on its content, return the most appropriate category label from the list below. Respond with only a number from 1 to 24, corresponding to the best-matching category:

1. Health – medical news, public health, fitness, mental health, wellness
2. Science – scientific discoveries, research studies, space, innovations
3. Television – TV shows, reviews, industry news, streaming
4. Travel – tourism, destinations, travel guides, airlines, vacation trends
5. Movies – film news, reviews, box office, upcoming releases
6. Dance – ballet, contemporary styles, street dance, performances
7. Real Estate – housing trends, property sales, architecture, urban planning
8. Economy – macroeconomics, inflation, markets, GDP, financial policies
9. Sports – competitions, athletes, Demo_results, professional sports
10. Theater – plays, Broadway, live performances, stage reviews
11. Opinion – editorials, commentary, analysis
12. Music – albums, artists, concerts, festivals, music trends
13. Books – literature, bestsellers, author interviews, book reviews
14. Art & Design – fine arts, museums, exhibitions, visual/design trends
15. Style – fashion trends, beauty, personal style, aesthetics
16. Media – journalism, publishing, digital media, mass communication
17. Food – restaurants, recipes, cooking, culinary culture
18. Well – lifestyle, self-care, mental well-being, personal development
19. Fashion – clothing, designers, fashion weeks, industry insights
20. Technology – AI, gadgets, software, cybersecurity, tech innovations
21. Your Money – personal finance, investing, budgeting, financial planning
22. Education – schools, universities, learning methods, education policies
23. Automobiles – car news, EVs, reviews, industry trends
24. Global Business – international trade, corporations, mergers, global markets

Return the complete label only (e.g., 1), no extra words."""
            }
      ]
    },
    {
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "\"\"\" Our guide to the city's best classical music and opera.\"\"\"\n      "
        }
      ]
    }
  ],
  text={
    "format": {
      "type": "text"
    }
  },
  reasoning={
    "effort": "medium"
  },
  tools=[],
  store=True
)

In [None]:
response

In [None]:
price_estimation (response,
                  num_rows = 20,
                  input_cost_per_million = 1.10,
                  output_cost_per_million = 4.44,
                  num_votes = 3)

# https://openai.com/api/pricing/

## GPT-4o

In [None]:
D1a_GPT_4o_inital_lableing = classification_GPT(
    text_path="Demo_data/D1_1.csv",
    category = category_D1_GPT,
    prompt = prompt_D1_GPT,
    column_4_labeling=["headline", "article", "abstract"],
    model = "gpt-4o-mini",
    api_key = api_key,
    temperature= 1, #a value range from 0 to 2
    mode = "text",
    output_column_name="D1a_GPT_4o_inital_lableing",
    num_themes = 1,
    num_votes = 3)

D1a_GPT_4o_inital_lableing.to_csv("Demo_result/D1a_GPT_4o_inital_lableing.csv", index=False)

In [None]:
D1a_GPT_4o_inital_lableing

In [None]:
## Check the accuracy: GPT 4o inital labeling
D1a_GPT_4o_inital_lableing['D1a_GPT_4o_inital_lableing'] = pd.to_numeric(D1a_GPT_4o_inital_lableing['D1a_GPT_4o_inital_lableing'], errors='coerce')
D1a_GPT_4o_inital_lableing['section_numeric'] = pd.to_numeric(D1a_GPT_4o_inital_lableing['section_numeric'], errors='coerce')

auto_verification(
    D1a_GPT_4o_inital_lableing,
    predicted_cols="D1a_GPT_4o_inital_lableing",
    true_cols="section_numeric",
    category=category_D1_GPT
)

### Note: Fine-Tune GPT models:

Ensure that fine-tuning is performed on snapshot models, meaning the model's state is preserved with the training data up to that point. This approach allows the model to learn from incremental updates effectively.


GPT reasoning models (i.e., o-series) can not be fine-tuned for now

#### Minimum Data Requirement:
Fine-tuning for GPT-4o starts to show effective learning with around ~20 examples each category. Insufficient data may not capture nuanced learning effectively.

#### Bad Perormance Tiny Datasets:

When fine-tuning with very small datasets, the optimizer may converge to simply output the majority token as this is the easiest way to minimize loss.

This behavior is not representative of true learning—it reflects a bias towards frequent labels instead of understanding contextual differences.


In [None]:
# Prepare the data for GPT-4o 
# The JSONL should have three parts: 
# 1. system_prompt: coding instruction
# 2. input_col: The information needed to be labeled
# 3. label_col: the label


generate_GPT_finetune_jsonl(
    D1a_GPT_4o_inital_lableing,
    output_path="Demo_result/D1a_GPT_4o_inital_lableing.jsonl",
    system_prompt=prompt_D1_GPT,
    input_col=["headline", "article", "abstract"],
    label_col=["section_numeric"]
)


# GPT-4o Fine-Tune Hyperparameters

---

##  **1️⃣ Batch Size Considerations**
The batch size determines how many samples are processed at once. Larger batch sizes may speed up training but can lead to instability, especially with limited data.

###  **Recommendations:**
| Dataset Size         | Recommended Batch Size |
|-----------------------|------------------------|
| **< 1,000 samples**  | `batch_size: 4`       |
| **1,000 - 10,000**   | `batch_size: 8` or `16` |
| **> 10,000**         | `batch_size: 32`      |
| **> 100,000**        | `batch_size: 64`      |

###  **Caution:**
- Increasing batch size can lead to overfitting if the data is not diverse.
- If training loss is unstable, consider lowering the batch size.

---

##  **2️⃣ Learning Rate Multiplier**
The learning rate multiplier scales the base learning rate of the model. A value of **0.1** means the effective learning rate is **10%** of the model's default.

###  **Recommendations:**
| Data Type                         | Learning Rate Multiplier |
|-----------------------------------|--------------------------|
| **High variance text** (e.g., news articles, social media posts) | `0.02` to `0.05` |
| **Domain-specific text** (e.g., scientific abstracts, legal documents) | `0.02` to `0.1`  |
| **Noisy or mixed-domain data** (e.g., user-generated content) | `0.01` to `0.02` |
| **Highly structured data** (e.g., technical manuals) | `0.05` to `0.1` |

###  **Caution:**
- If loss suddenly spikes, reduce the learning rate multiplier.
- If the model underfits (low accuracy and low loss), slightly increase the multiplier.
- Avoid setting too high a learning rate (`> 0.1`) unless you have very clean and structured data.

---

In [None]:
# Fine-tune GPT-4o
D1a_GPT_4o_model_finetune = finetune_GPT(
    training_file_path="Demo_result/D1a_GPT_4o_inital_lableing.jsonl",
    model="gpt-4o-mini-2024-07-18",  
    hyperparameters={"batch_size":8, "learning_rate_multiplier":0.01},
    api_key= api_key  
)

In [None]:
D1a_GPT_4o_model_finetune

#### Note. You can wait in the code untill status = finish 
##### Or you will receive an email from OpenAI when the fine tune is done 

#### Example email
Hi JCs‘ research,
Your fine-tuning job ftjob-xxxx has successfully completed, and a new model **ft:gpt-4o-mini-2024-07-18:xxx::xxxx** has been created for your use.

Copy the identifier to the classification_GPT()'s model.

In [None]:
# Classify with fine‑tuned 4o model
D1_GPT_4o_finetune = classification_GPT(
    text_path="Demo_data/D1_2.csv",
    category = category_D1_GPT,
    prompt = prompt_D1_GPT,
    column_4_labeling=["headline", "article", "abstract"],
    model = D1a_GPT_4o_model_finetune,
    api_key = api_key,
    temperature = 1,
    mode = "text",
    output_column_name="D1_GPT_4o_finetune",
    num_themes = 1,
    num_votes = 1)



D1_GPT_4o_finetune.to_csv("Demo_result/D1_GPT_4o_finetune.csv", index=False)

In [None]:
## Check the accuracy: GPT o4 initial labeling
auto_verification(
    D1_GPT_4o_finetune,
    predicted_cols="D1_GPT_4o_finetune",
    true_cols="section_numeric",
    category=category_D1_GPT
);

#### Note:
The error observed here is expected and occurs because OpenAI did not return a result from the anticipated category. The code is designed to handle this automatically by sending another request (Default = 3). However, if you consistently encounter this issue throughout the output, please verify the prompt settings in the Playground.

## GPT-o4 (reasoning model)


In [None]:
D1a_GPT_o4_inital_lableing = classification_GPT(
    text_path="Demo_data/D1_1.csv",
    category = category_D1_GPT,
    prompt = prompt_D1_GPT,
    column_4_labeling=["headline", "article", "abstract"],
    model = "o4-mini",
    api_key = api_key,
    effort = "medium",
    mode = "text",
    output_column_name="D1a_GPT_o4_inital_lableing",
    num_themes = 1,
    num_votes = 1)


D1a_GPT_o4_inital_lableing.to_csv("Demo_result/D1a_GPT_o4_inital_lableing.csv", index=False)

In [None]:
## Check the accuracy: GPT o4 initial labeling
auto_verification(
    D1a_GPT_o4_inital_lableing,
    predicted_cols="D1a_GPT_o4_inital_lableing",
    true_cols="section_numeric",
    category=category_D1_GPT
);

#### Note: GPT reasnong models (i.e., o-series) can not be finetuned for now

# Demo 1b: Classify single-category image data

**Datasets:**
- `imgs_40`: Used for initial labeling and fine-tuning.
- `imgs_40_2`: Used for testing the fine-tuned model's performance.


In [None]:
# Define the list of 24 category labels
category_D1_GPT = [
    "1", "2", "3", "4", "5", "6",
    "7", "8", "9", "10", "11", "12",
    "13", "14", "15", "16", "17", "18",
    "19", "20", "21", "22", "23", "24"
]


# Define the descriptive prompts for each category
prompt_D1_GPT = ["""You are given an image. Based on its content, return the most appropriate category label from the list below. Respond with only a number from 1 to 24, corresponding to the best-matching category:

1. Health – medical news, public health, fitness, mental health, wellness
2. Science – scientific discoveries, research studies, space, innovations
3. Television – TV shows, reviews, industry news, streaming
4. Travel – tourism, destinations, travel guides, airlines, vacation trends
5. Movies – film news, reviews, box office, upcoming releases
6. Dance – ballet, contemporary styles, street dance, performances
7. Real Estate – housing trends, property sales, architecture, urban planning
8. Economy – macroeconomics, inflation, markets, GDP, financial policies
9. Sports – competitions, athletes, Demo_results, professional sports
10. Theater – plays, Broadway, live performances, stage reviews
11. Opinion – editorials, commentary, analysis
12. Music – albums, artists, concerts, festivals, music trends
13. Books – literature, bestsellers, author interviews, book reviews
14. Art & Design – fine arts, museums, exhibitions, visual/design trends
15. Style – fashion trends, beauty, personal style, aesthetics
16. Media – journalism, publishing, digital media, mass communication
17. Food – restaurants, recipes, cooking, culinary culture
18. Well – lifestyle, self-care, mental well-being, personal development
19. Fashion – clothing, designers, fashion weeks, industry insights
20. Technology – AI, gadgets, software, cybersecurity, tech innovations
21. Your Money – personal finance, investing, budgeting, financial planning
22. Education – schools, universities, learning methods, education policies
23. Automobiles – car news, EVs, reviews, industry trends
24. Global Business – international trade, corporations, mergers, global markets

Return the categorty label only (e.g., 5), no extra words."""]





## GPT-4o


In [None]:
D1b_GPT_4o_inital_lableing = classification_GPT(
    image_dir="Demo_data/D1_imgs_1",
    text_path="Demo_data/D1_1.csv",
    category = category_D1_GPT,
    prompt = prompt_D1_GPT,
    model = "gpt-4o-mini",
    api_key = api_key,
    temperature = 0.8,
    mode = "image",
    output_column_name="D1b_GPT_4o_inital_lableing",
    num_themes = 1,
    num_votes = 3)


D1b_GPT_4o_inital_lableing.to_csv("Demo_result/D1b_GPT_4o_inital_lableing.csv", index=False)

In [None]:
D1b_GPT_4o_inital_lableing

In [None]:
## Check the accuracy: GPT 4o inital labeling
# Merge the Demo_result of the image with the human label
D1b_CLIP_human = pd.read_csv("Demo_data/D1_1.csv")
D1b_GPT_4o_inital_lableing = pd.merge(D1b_GPT_4o_inital_lableing, D1b_CLIP_human, on="image_id", how="inner")
D1b_GPT_4o_inital_lableing.to_csv("Demo_result/D1b_GPT_4o_inital_lableing.csv", index=False)


auto_verification(
    D1b_GPT_4o_inital_lableing,
    predicted_cols="D1b_GPT_4o_inital_lableing",
    true_cols="section_numeric",
    category=category_D1_GPT
);

## GPT-o4 (reasoning model)


In [None]:
D1b_GPT_o4_inital_lableing = classification_GPT(
    image_dir="Demo_data/D1_imgs_1",
    text_path="Demo_data/D1_1.csv",
    category = category_D1_GPT,
    prompt = prompt_D1_GPT,
    model = "o4-mini",
    api_key = api_key,
    effort = "medium",
    mode = "image",
    output_column_name="D1b_GPT_o4_inital_lableing",
    num_themes = 1,
    num_votes = 1)


D1b_GPT_o4_inital_lableing.to_csv("Demo_result/D1b_GPT_o4_inital_lableing.csv", index=False)

In [None]:
D1b_GPT_o4_inital_lableing

In [None]:
## Check the accuracy: GPT o4 initial labeling
D1b_CLIP_human = pd.read_csv("Demo_data/D1_1.csv")
D1b_GPT_o4_inital_lableing = pd.merge(D1b_GPT_o4_inital_lableing, D1b_CLIP_human, on="image_id", how="inner")
D1b_GPT_o4_inital_lableing.to_csv("Demo_result/D1b_GPT_4o_inital_lableing.csv", index=False)


auto_verification(
    D1b_GPT_o4_inital_lableing,
    predicted_cols="D1b_GPT_o4_inital_lableing",
    true_cols="section_numeric",
    category=category_D1_GPT
);

#### Note: the current GPT models have strict restrictions on fine-tuning images. So it is not used here.

# Demo 1c: Classify single-category text + image data

**Datasets:**
- `nytimes_40.csv`: Used for initial labeling and fine-tuning.
- `imgs_40`: Used for initial labeling and fine-tuning.

- `nytimes_40_2.csv`: Used for testing the fine-tuned model's performance.
- `imgs_40_2`: Used for testing the fine-tuned model's performance.


The text dataset should contain a column `img_dir` to map the images for each row.

## CLIP: local labeling

In [None]:
# Define the list of 24 category labels
category_D1_GPT = [
    "1", "2", "3", "4", "5", "6",
    "7", "8", "9", "10", "11", "12",
    "13", "14", "15", "16", "17", "18",
    "19", "20", "21", "22", "23", "24"
]


# Define the descriptive prompts for each category
prompt_D1_GPT = ["""You are given an article and image. Based on its content, return the most appropriate category label from the list below. Respond with only a number from 1 to 24, corresponding to the best-matching category:

1. Health – medical news, public health, fitness, mental health, wellness
2. Science – scientific discoveries, research studies, space, innovations
3. Television – TV shows, reviews, industry news, streaming
4. Travel – tourism, destinations, travel guides, airlines, vacation trends
5. Movies – film news, reviews, box office, upcoming releases
6. Dance – ballet, contemporary styles, street dance, performances
7. Real Estate – housing trends, property sales, architecture, urban planning
8. Economy – macroeconomics, inflation, markets, GDP, financial policies
9. Sports – competitions, athletes, Demo_results, professional sports
10. Theater – plays, Broadway, live performances, stage reviews
11. Opinion – editorials, commentary, analysis
12. Music – albums, artists, concerts, festivals, music trends
13. Books – literature, bestsellers, author interviews, book reviews
14. Art & Design – fine arts, museums, exhibitions, visual/design trends
15. Style – fashion trends, beauty, personal style, aesthetics
16. Media – journalism, publishing, digital media, mass communication
17. Food – restaurants, recipes, cooking, culinary culture
18. Well – lifestyle, self-care, mental well-being, personal development
19. Fashion – clothing, designers, fashion weeks, industry insights
20. Technology – AI, gadgets, software, cybersecurity, tech innovations
21. Your Money – personal finance, investing, budgeting, financial planning
22. Education – schools, universities, learning methods, education policies
23. Automobiles – car news, EVs, reviews, industry trends
24. Global Business – international trade, corporations, mergers, global markets

Return the categorty label only (e.g., 5), no extra words."""]





## GPT: third-party API labeling


## GPT-4o


In [None]:
D1c_GPT_4o_inital_lableing = classification_GPT(
    text_path="Demo_data/D1_1.csv",
    image_dir="Demo_data/D1_imgs_1",
    category=category_D1_GPT,
    prompt=prompt_D1_GPT,
    column_4_labeling=["headline", "article", "abstract"],
    model = "gpt-4o-mini",
    api_key = api_key,
    temperature = 1,
    mode = "both",
    output_column_name="D1c_GPT_4o_inital_lableing",
    num_themes = 1,
    num_votes = 1)


D1c_GPT_4o_inital_lableing.to_csv("Demo_result/D1c_GPT_4o_inital_lableing.csv", index=False)

In [None]:
D1c_GPT_4o_inital_lableing

In [None]:
## Check the accuracy: GPT 4o inital labeling
auto_verification(
    D1c_GPT_4o_inital_lableing,
    predicted_cols="D1c_GPT_4o_inital_lableing",
    true_cols="section_numeric",
    category=category_D1_GPT
);

## GPT-o4 (reasoning model)

##### The structure and usage are identical to the previous example. However, instead of adjusting the temperature parameter to control randomness, the effort parameter is used to influence the model's reasoning depth and complexity.


In [None]:
D1c_GPT_o4_inital_lableing = classification_GPT(
    text_path="Demo_data/D1_1.csv",
    image_dir="Demo_data/D1_imgs_1",
    category=category_D1_GPT,
    prompt=prompt_D1_GPT,
    column_4_labeling=["headline", "article", "abstract"],
    model = "o4-mini",
    api_key = api_key,
    effort = "medium",
    mode = "both",
    output_column_name="D1c_GPT_o4_inital_lableing",
    num_themes = 1,
    num_votes = 1)


D1c_GPT_o4_inital_lableing.to_csv("Demo_result/D1c_GPT_o4_inital_lableing.csv", index=False)


In [None]:
## Check the accuracy: GPT o4 initial labeling
auto_verification(
    D1c_GPT_o4_inital_lableing,
    predicted_cols="D1c_GPT_o4_inital_lableing",
    true_cols="section_numeric",
    category=category_D1_GPT
);

Demo 2: Multi-Label Theme Classification using News Headlines
--------------------------------------------------------------------

This demo shows how to classify U.S. immigration-related news headlines into
one or two dominant thematic category using a multi-label text-only classification task.
The coding scheme is adapted from Guo et al. (2023).

Dataset: Guo et al. (2023) Immigration News Dataset
Source: https://doi.org/10.1177/19401612231204535


**Datasets:**
- `Text_multiple_category_40.xlsx`: Used for initial labeling and fine-tuning.
- `Text_multiple category_40_2.xlsx`: Used for testing the fine-tuned model's performance.


Each sample in the dataset includes:
- A single new **Post_Title**


Themes:
-------
1) Economic consequences  
2) Crime/safety  
3) Family  
4) Immigrant wellbeing  
5) Culture/society  
6) Politics  
7) Legislation/regulation  
8) Public opinion  
99) None of the above


Reference:
----------
Guo, L., Su, C. C., & Chen, H.-T. (2023).
Do News Frames Really Have Some Influence in the Real World?
A Computational Analysis of Cumulative Framing Effects on Emotions and Opinions About Immigration.
The International Journal of Press/Politics. https://doi.org/10.1177/19401612231204535


#### Note: Based on our tests, CLIP did not perform well for multi-category classification. Its embedding-based matching struggles with distinguishing the order of categories and handling missing categories effectively. Therefore, we recommend using GPT models for multi-category classification tasks for improved accuracy and reliability.

## GPT: third-party API labeling


## GPT 4o-mini

In [None]:
category_D2_GPT = [
    "0", "1"
]

prompt_D2_GPT = '''Here's a news article headline. Please label if it belongs to the following theme.
            Return <1> if this headline belongs to these themes and return <0> if it does not belong to the themes.
            Please code for each of the following 8 topics.
            Please identify up to two dominant themes from the headline, which means you can have a max of 2 <1> in the answer you generated.
            You don't have to label two topic if you don't fint it apply. Just enter 0s.
            - Economic consequences: The story is about economic benefits or costs, or the costs involving immigration-related issues, including: Cost of mass deportation; Economic benefits of immigration (more tax revenue, cheap labor; Economic costs of immigration (taking jobs from Americans, immigrants using healthcare and educational services, overcrowding, housing concerns)
            - Crime/safety: The story is about threats to American's safety, including: Immigration described as a major cause of increased rates of crime, gangs, drug trafficking, etc; Immigrants described as law-breakers who deserve punishment; Immigration described as a threat to national security via terrorism
            - Family: The story is about the impact of immigration on families, including: Separating children from parents; Breaking up multi-generational families; Interfering with children's continued schooling
            - Immigrant wellbeing: This story is about the negative impact of the immigration process on immigrants, including: Prejudice and bias toward immigrants; Physical and/or mental health or safety of immigrants; Immigration policies described as violations of immigrants' civil rights and liberties; Immigration policies regarding illegal immigrants described as unfair to immigrants who have waited to become citizens the legal way
            - Culture/society: This story is about societal-wide factors or consequences related to immigration, including:; Immigration as a threat to American cultural identity, way of living, the predominance of English and Christianity, etc.; Immigrants as isolated from the rest of America, unable to assimilate into communities; Immigration as part of the celebrated history of immigration in America / America-as-melting-pot; Immigration policies as exemplars of society's immorality; Impact of immigration on a specific subculture/community in the US
            - Politics:The story is mainly about the political issues around immigration, including: Political campaigns and upcoming elections (e.g., using immigration as a wedge issue or motivating force to get people to the polls); Fighting between the Democratic and Republican parties, or politicians; One political party or one politician’s stance on immigration. Therefore, when the news headline mentions a politician’s name, it often indicates the theme of politics
            - Legislation/regulation: The story is about issues related to regulating immigration through legislation and other institutional measures: New immigration legislation being introduced/argued over; Flaws in current/old legislation; Enforcement of current legislation
            - Public opinion: The study is about the public’s, including a specific community’s, reactions to immigration-related issues, including: Public opinion polls; Protests; Social media backlash; Community outrage; Celebrity responses/protests
            Answer using the following format [0, 0, 0, 0, 0, 0, 0, 0]. Do not provide any other information'''




In [None]:
D2_GPT_4o_inital_lableing = classification_GPT(
    text_path="Demo_data/D2_1.xlsx",
    category=["0", "1"],
    prompt=prompt_D2_GPT,          
    column_4_labeling=["Post_Title"],
    model="gpt-4o-mini",
    api_key=api_key,
    temperature=0.8,
    mode="text",
    output_column_name="D2_GPT_4o_initial_labeling",
    num_themes=8,
    num_votes=3,
)



D2_GPT_4o_inital_lableing.to_csv("Demo_result/D2_GPT_4o_inital_lableing.csv", index=False)

In [None]:
D2_GPT_4o_inital_lableing

In [None]:
auto_verification(
    D2_GPT_4o_inital_lableing,
    predicted_cols=[
        "D2_GPT_4o_initial_labeling_1",
        "D2_GPT_4o_initial_labeling_2",
        "D2_GPT_4o_initial_labeling_3",
        "D2_GPT_4o_initial_labeling_4",
        "D2_GPT_4o_initial_labeling_5",
        "D2_GPT_4o_initial_labeling_6",
        "D2_GPT_4o_initial_labeling_7",
        "D2_GPT_4o_initial_labeling_8"
    ],
    true_cols=[
        "Q3_1", "Q3_2", "Q3_3", "Q3_4",
        "Q3_5", "Q3_6", "Q3_7", "Q3_8"
    ],
    category=category_D2_GPT
)


### finetune: GPT 4o

In [None]:
# Prepare the data for GPT-4o 
# The JSONL should have three parts: 
# 1. system_prompt: coding instruction
# 2. input_col: The information needed to be labeled
# 3. label_col: the label

generate_GPT_finetune_jsonl(D2_GPT_4o_inital_lableing, 
                        output_path="Demo_result/D2_GPT_4o_inital_lableing.jsonl", 
                        system_prompt = prompt_D2_GPT,
                        input_col = ["Post_Title"],
                        label_col=["Q3_clean"])


In [None]:
# Fine-tune GPT-4o
D2_GPT_4o_model_finetune = finetune_GPT(
    training_file_path="Demo_result/D2_GPT_4o_inital_lableing.jsonl",
    model="gpt-4o-mini-2024-07-18",  
    hyperparameters={"batch_size":8, "learning_rate_multiplier":0.01},
    api_key= api_key 
)

In [None]:
D2_GPT_4o_model_finetune

In [None]:
# test the fineune model to see the example result


response = client.responses.create(
  model=D2_GPT_4o_model_finetune,
  input=[
    {
      "role": "developer",
      "content": [
        {
          "type": "input_text",
          "text": prompt_D2_GPT,

            }
      ]
    },
    {
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "\"\"\" Our guide to the city's best classical music and opera.\"\"\"\n      "
        }
      ]
    }
  ],
  text={
    "format": {
      "type": "text"
    }
  },

  tools=[],
  store=True
)

In [None]:
response

In [None]:
# Classify with fine‑tuned 4o model
D2_GPT_4o_finetune = classification_GPT(
    text_path="Demo_data/D2_2.xlsx",
    category = category_D2_GPT,
    prompt = prompt_D2_GPT,
    column_4_labeling=["Post_Title"],
    model = D2_GPT_4o_model_finetune,
    api_key = api_key,
    temperature = 0.8,
    mode = "text",
    output_column_name="D2_GPT_4o_finetune",
    num_themes = 8,
    num_votes = 1)



D2_GPT_4o_finetune.to_csv("Demo_result/D2_GPT_4o_finetune.csv", index=False)

In [None]:
D2_GPT_4o_finetune

In [None]:
auto_verification(
    D2_GPT_4o_finetune,
    predicted_cols=[
        "D2_GPT_4o_finetune_1",
        "D2_GPT_4o_finetune_2",
        "D2_GPT_4o_finetune_3",
        "D2_GPT_4o_finetune_4",
        "D2_GPT_4o_finetune_5",
        "D2_GPT_4o_finetune_6",
        "D2_GPT_4o_finetune_7",
        "D2_GPT_4o_finetune_8"
    ],
    true_cols=[
        "Q3_1", "Q3_2", "Q3_3", "Q3_4",
        "Q3_5", "Q3_6", "Q3_7", "Q3_8"
    ],
    category=category_D2_GPT
)


## o4-mini (reasning model)

##### The structure and usage are identical to the previous example. However, instead of adjusting the temperature parameter to control randomness, the effort parameter is used to influence the model's reasoning depth and complexity.


#### Note: GPT reasnong models (i.e., o-series) can not be finetuned for now