ollama run llama3.2:1b

In [1]:
import mlflow

mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("DSPy")

<Experiment: artifact_location='mlflow-artifacts:/374362034103955121', creation_time=1741686562632, experiment_id='374362034103955121', last_update_time=1741686562632, lifecycle_stage='active', name='DSPy', tags={}>

In [2]:
mlflow.dspy.autolog()

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import dspy

lm = dspy.LM('ollama_chat/llama3.2:1b', api_base='http://localhost:11434')
dspy.configure(lm=lm)

In [5]:
from typing import Literal

class Categorize(dspy.Signature):
    """Classify historic events."""

    event: str = dspy.InputField()
    category: Literal[
        "Wars and Conflicts",
        "Politics and Governance",
        "Science and Innovation",
        "Cultural and Artistic Movements",
        "Exploration and Discovery",
        "Economic Events",
        "Social Movements",
        "Man-Made Disasters and Accidents",
        "Natural Disasters and Climate",
        "Sports and Entertainment",
        "Famous Personalities and Achievements"
    ] = dspy.OutputField()
    confidence: float = dspy.OutputField()

classify = dspy.Predict(Categorize)

# Here is how we call this module
classification = classify(event="World War II[b] or the Second World War (1 September 1939 – 2 September 1945) was a global conflict between two coalitions: the Allies and the Axis powers. Nearly all of the world's countries participated, with many nations mobilising all resources in pursuit of total war. Tanks and aircraft played major roles, enabling the strategic bombing of cities and delivery of the first and only nuclear weapons ever used in war. World War II was the deadliest conflict in history, resulting in 70 to 85 million deaths, more than half of which were civilians. Millions died in genocides, including the Holocaust, and by massacres, starvation, and disease. After the Allied victory, Germany, Austria, Japan, and Korea were occupied, and German and Japanese leaders were tried for war crimes.")
classification

Prediction(
    category='Wars and Conflicts',
    confidence=0.95
)

In [6]:
classify(event="Second Boer War: In the Battle of Magersfontein the Boers commanded by general Piet Cronjé inflict a defeat on the forces of the British Empire commanded by Lord Methuen trying to relieve the Siege of Kimberley.")

Prediction(
    category='Wars and Conflicts',
    confidence=1.0
)

In [18]:
import pandas as pd

with open('events.txt', 'r') as f:
    events = f.readlines()

def classify_event(event_text):
    result = classify(event=event_text)
    return result.category, result.confidence

events = pd.DataFrame(events, columns=['event_string'])

with dspy.context(lm=dspy.LM('ollama_chat/llama3.2:1b', api_base='http://localhost:11434')):

    classifications = events['event_string'].apply(classify_event)

    events['category_32_1b'] = [result[0] for result in classifications]
    events['confidence_32_1b'] = [result[1] for result in classifications]

events

Unnamed: 0,event_string,category_32_1b,confidence_32_1b
0,627 – Battle of Nineveh: A Byzantine army unde...,Wars and Conflicts,2.0
1,1388 – Maria of Enghien sells the lordship of ...,Wars and Conflicts,1.0
2,1787 – Pennsylvania becomes the second state t...,Politics and Governance,0.95
3,1862 – American Civil War: USS Cairo sinks on ...,Wars and Conflicts,0.5
4,1866 – Oaks explosion: The worst mining disast...,Wars and Conflicts,0.55
5,1870 – Joseph H. Rainey of South Carolina beco...,Politics and Governance,0.5
6,1901 – Guglielmo Marconi receives the first tr...,Science and Innovation,0.95
7,1915 – Yuan Shikai declares the establishment ...,Wars and Conflicts,2.0
8,1917 – Father Edward J. Flanagan founds Boys T...,Wars and Conflicts,2.5
9,"1935 – The Lebensborn Project, a Nazi reproduc...",Wars and Conflicts,0.8


In [19]:
with dspy.context(lm=dspy.LM('fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct')):

    classifications = events['event_string'].apply(classify_event)

    events['category_33_70b'] = [result[0] for result in classifications]
    events['confidence_33_70b'] = [result[1] for result in classifications]

events

Unnamed: 0,event_string,category_32_1b,confidence_32_1b,category_33_70b,confidence_33_70b
0,627 – Battle of Nineveh: A Byzantine army unde...,Wars and Conflicts,2.0,Wars and Conflicts,0.95
1,1388 – Maria of Enghien sells the lordship of ...,Wars and Conflicts,1.0,Politics and Governance,0.9
2,1787 – Pennsylvania becomes the second state t...,Politics and Governance,0.95,Politics and Governance,0.95
3,1862 – American Civil War: USS Cairo sinks on ...,Wars and Conflicts,0.5,Wars and Conflicts,0.95
4,1866 – Oaks explosion: The worst mining disast...,Wars and Conflicts,0.55,Man-Made Disasters and Accidents,1.0
5,1870 – Joseph H. Rainey of South Carolina beco...,Politics and Governance,0.5,Politics and Governance,0.95
6,1901 – Guglielmo Marconi receives the first tr...,Science and Innovation,0.95,Science and Innovation,0.95
7,1915 – Yuan Shikai declares the establishment ...,Wars and Conflicts,2.0,Politics and Governance,0.95
8,1917 – Father Edward J. Flanagan founds Boys T...,Wars and Conflicts,2.5,Social Movements,0.8
9,"1935 – The Lebensborn Project, a Nazi reproduc...",Wars and Conflicts,0.8,Politics and Governance,0.95


In [21]:
# Find rows where the categories disagree
disagreements = events[events['category_32_1b'] != events['category_33_70b']]

# Print each disagreement with both model predictions
for idx, row in disagreements.iterrows():
    print(f"\nEvent: {row['event_string']}")
    print(f"1B Model: {row['category_32_1b']} (confidence: {row['confidence_32_1b']:.2f})")
    print(f"70B Model: {row['category_33_70b']} (confidence: {row['confidence_33_70b']:.2f})")




Event: 1388 – Maria of Enghien sells the lordship of Argos and Nauplia to the Republic of Venice.[2]

1B Model: Wars and Conflicts (confidence: 1.00)
70B Model: Politics and Governance (confidence: 0.90)

Event: 1866 – Oaks explosion: The worst mining disaster in England kills 361 miners and rescuers.[5]

1B Model: Wars and Conflicts (confidence: 0.55)
70B Model: Man-Made Disasters and Accidents (confidence: 1.00)

Event: 1915 – Yuan Shikai declares the establishment of the Empire of China and proclaims himself Emperor.[8]

1B Model: Wars and Conflicts (confidence: 2.00)
70B Model: Politics and Governance (confidence: 0.95)

Event: 1917 – Father Edward J. Flanagan founds Boys Town as a farm village for wayward boys.[9]

1B Model: Wars and Conflicts (confidence: 2.50)
70B Model: Social Movements (confidence: 0.80)

Event: 1935 – The Lebensborn Project, a Nazi reproduction program, is founded by Heinrich Himmler.[10]

1B Model: Wars and Conflicts (confidence: 0.80)
70B Model: Politics a

In [22]:
len(disagreements)

17

In [30]:
def validate_category(example, prediction, trace=None):
    return prediction.category == example.category

In [31]:
cost = sum([x['cost'] for x in lm.history if x['cost'] is not None])  # cost in USD, as calculated by LiteLLM for certain providers
cost

0

In [32]:
# Open and read the 2023 events file
with open('wiki-events-2023.txt', 'r') as f:
    events_2023 = f.readlines()
# Open and read the 2024 events file
with open('wiki-events-2024.txt', 'r') as f:
    events_2024 = f.readlines()

# Combine the events from both years
all_events = events_2023 + events_2024

# Create a new DataFrame with the combined events
all_events_df = pd.DataFrame(all_events, columns=['description'])

# Load environment variables from .env file
from dotenv import load_dotenv
load_dotenv()

import json

with dspy.context(lm=dspy.LM('anthropic/claude-3-5-haiku-20241022')):  
    for event in all_events_df['description']:
        result = classify(event=event)
        # Write the result to a jsonl file in append mode
        with open('classifications.jsonl', 'a') as f:
            json.dump({
                'event': event,
                'category': result.category,
                'confidence': result.confidence
            }, f)
            f.write('\n')

    cost = sum([x['cost'] for x in lm.history if x['cost'] is not None])  # cost in USD, as calculated by LiteLLM for certain providers



In [33]:
cost

0

In [34]:
# Load the classifications from the JSONL file
classifications = []
with open('classifications.jsonl', 'r') as f:
    for line in f:
        classifications.append(json.loads(line))

# Convert to DataFrame
classifications_df = pd.DataFrame(classifications)

shuffled_df = classifications_df.sample(frac=1, random_state=42)

split_point = int(0.8 * len(shuffled_df))

# Split the data
train_df = shuffled_df.iloc[:split_point].reset_index(drop=True)
test_df = shuffled_df.iloc[split_point:].reset_index(drop=True)

# Reset indices
train_df = train_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

print(f"Training set size: {len(train_df)}")
print(f"Testing set size: {len(test_df)}")

Training set size: 390
Testing set size: 98


In [37]:
test_df

Unnamed: 0,event,category,confidence
0,2024 Comorian presidential election: Amid an o...,Politics and Governance,0.95
1,November 27 – After forming a coalition Govern...,Politics and Governance,0.95
2,December 12 – At the COP28 climate summit in D...,Politics and Governance,0.95
3,June 6–9 – The 2024 European Parliament electi...,Politics and Governance,0.95
4,November 29 – 2024 Irish general election: Cen...,Politics and Governance,0.95
...,...,...,...
93,Russian invasion of Ukraine: The Nova Kakhovka...,Wars and Conflicts,0.95
94,April 1 – Israel attacks the Iranian embassy i...,Wars and Conflicts,0.95
95,\n,Politics and Governance,0.50
96,French-Algerian writer Boualem Sansal is put i...,Politics and Governance,0.85


In [45]:
from dspy import Evaluate

testset = [dspy.Example(event=row['event'], category=row['category']).with_inputs("event") for _, row in test_df.iterrows()]
evaluator = Evaluate(devset=testset, num_threads=1, display_progress=True, display_table=5)
evaluator(classify, metric=validate_category)

Average Metric: 33.00 / 98 (33.7%): 100%|██████████| 98/98 [01:16<00:00,  1.28it/s]

2025/03/13 16:45:28 INFO dspy.evaluate.evaluate: Average Metric: 33 / 98 (33.7%)





Unnamed: 0,event,example_category,pred_category,confidence,validate_category
0,"2024 Comorian presidential election: Amid an opposition boycott, i...",Politics and Governance,Politics and Governance,0.62,✔️ [True]
1,November 27 – After forming a coalition Government with the right-...,Politics and Governance,Wars and Conflicts,0.1,
2,"December 12 – At the COP28 climate summit in Dubai, a consensus is...",Politics and Governance,Economic Events,0.5,
3,"June 6–9 – The 2024 European Parliament election is held. The EPP,...",Politics and Governance,Wars and Conflicts,1.0,
4,November 29 – 2024 Irish general election: Centre-right party Fian...,Politics and Governance,Wars and Conflicts,2.5,


33.67

In [46]:
trainset = [dspy.Example(event=row['event'], category=row['category']).with_inputs("event") for _, row in train_df.iterrows()]

lm = dspy.LM('ollama_chat/llama3.2:1b', api_base='http://localhost:11434')
dspy.configure(lm=lm)

# Optimize
tp = dspy.MIPROv2(metric=validate_category, auto="light")
optimized_classify = tp.compile(classify, trainset=trainset, max_labeled_demos=0, max_bootstrapped_demos=0)

2025/03/13 16:46:35 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 7
minibatch: True
num_candidates: 7
valset size: 100

2025/03/13 16:46:37 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/03/13 16:46:37 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used for informing instruction proposal.

2025/03/13 16:46:37 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=7 sets of demonstrations...


Bootstrapping set 1/7
Bootstrapping set 2/7


 17%|█▋        | 13/78 [00:08<00:40,  1.61it/s]


Bootstrapped 3 full traces after 13 examples for up to 1 rounds, amounting to 13 attempts.
Bootstrapping set 3/7


 15%|█▌        | 12/78 [00:06<00:38,  1.73it/s]


Bootstrapped 3 full traces after 12 examples for up to 1 rounds, amounting to 12 attempts.
Bootstrapping set 4/7


 10%|█         | 8/78 [00:03<00:29,  2.38it/s]


Bootstrapped 3 full traces after 8 examples for up to 1 rounds, amounting to 8 attempts.
Bootstrapping set 5/7


  5%|▌         | 4/78 [00:01<00:25,  2.94it/s]


Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 6/7


  1%|▏         | 1/78 [00:01<01:29,  1.16s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 7/7


  6%|▋         | 5/78 [00:06<01:37,  1.33s/it]
2025/03/13 16:47:05 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/03/13 16:47:05 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 3 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.


2025/03/13 16:47:43 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...

2025/03/13 16:48:18 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/03/13 16:48:18 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Classify historic events.

2025/03/13 16:48:18 INFO dspy.teleprompt.mipro_optimizer_v2: 1: Prompt a Language Model to classify historic political events, such as elections and government appointments, using their language understanding capabilities. The model could be trained on a dataset of labeled examples to learn the patterns and characteristics of these events, allowing it to accurately classify future instances.

2025/03/13 16:48:18 INFO dspy.teleprompt.mipro_optimizer_v2: 2: To classify historic events, I will use a combination of natural language processing (NLP) techniques, specifically part-of-speech tagging, named entity recognition, and dependency parsing. The input event will be tokenized into individual words or subwords, 

Average Metric: 26.00 / 100 (26.0%): 100%|██████████| 100/100 [00:53<00:00,  1.88it/s]

2025/03/13 16:49:12 INFO dspy.evaluate.evaluate: Average Metric: 26 / 100 (26.0%)
2025/03/13 16:49:12 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 26.0

2025/03/13 16:49:12 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 2 / 8 - Minibatch ==



Average Metric: 18.00 / 25 (72.0%): 100%|██████████| 25/25 [00:17<00:00,  1.40it/s]

2025/03/13 16:49:30 INFO dspy.evaluate.evaluate: Average Metric: 18 / 25 (72.0%)
2025/03/13 16:49:30 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 72.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 1'].
2025/03/13 16:49:30 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [72.0]
2025/03/13 16:49:30 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [26.0]
2025/03/13 16:49:30 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 26.0


2025/03/13 16:49:30 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 3 / 8 - Minibatch ==



Average Metric: 14.00 / 25 (56.0%): 100%|██████████| 25/25 [00:50<00:00,  2.02s/it]

2025/03/13 16:50:20 INFO dspy.evaluate.evaluate: Average Metric: 14 / 25 (56.0%)
2025/03/13 16:50:20 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 56.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 5'].
2025/03/13 16:50:20 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [72.0, 56.0]
2025/03/13 16:50:20 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [26.0]
2025/03/13 16:50:20 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 26.0


2025/03/13 16:50:20 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 4 / 8 - Minibatch ==



Average Metric: 18.00 / 25 (72.0%): 100%|██████████| 25/25 [00:56<00:00,  2.26s/it]

2025/03/13 16:51:17 INFO dspy.evaluate.evaluate: Average Metric: 18 / 25 (72.0%)
2025/03/13 16:51:17 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 72.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 2'].
2025/03/13 16:51:17 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [72.0, 56.0, 72.0]
2025/03/13 16:51:17 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [26.0]
2025/03/13 16:51:17 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 26.0


2025/03/13 16:51:17 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 5 / 8 - Minibatch ==



Average Metric: 17.00 / 25 (68.0%): 100%|██████████| 25/25 [00:31<00:00,  1.26s/it]

2025/03/13 16:51:48 INFO dspy.evaluate.evaluate: Average Metric: 17 / 25 (68.0%)
2025/03/13 16:51:48 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 68.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 5'].
2025/03/13 16:51:48 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [72.0, 56.0, 72.0, 68.0]
2025/03/13 16:51:48 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [26.0]
2025/03/13 16:51:48 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 26.0


2025/03/13 16:51:48 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 6 / 8 - Minibatch ==



Average Metric: 18.00 / 25 (72.0%): 100%|██████████| 25/25 [00:39<00:00,  1.59s/it]

2025/03/13 16:52:28 INFO dspy.evaluate.evaluate: Average Metric: 18 / 25 (72.0%)
2025/03/13 16:52:28 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 72.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4'].
2025/03/13 16:52:28 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [72.0, 56.0, 72.0, 68.0, 72.0]
2025/03/13 16:52:28 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [26.0]
2025/03/13 16:52:28 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 26.0


2025/03/13 16:52:28 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 7 / 8 - Minibatch ==



Average Metric: 14.00 / 25 (56.0%): 100%|██████████| 25/25 [00:09<00:00,  2.72it/s]

2025/03/13 16:52:37 INFO dspy.evaluate.evaluate: Average Metric: 14 / 25 (56.0%)
2025/03/13 16:52:37 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 56.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 1'].
2025/03/13 16:52:37 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [72.0, 56.0, 72.0, 68.0, 72.0, 56.0]
2025/03/13 16:52:37 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [26.0]
2025/03/13 16:52:37 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 26.0


2025/03/13 16:52:37 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 8 / 8 - Full Evaluation =====
2025/03/13 16:52:37 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 72.0) from minibatch trials...



Average Metric: 68.00 / 100 (68.0%): 100%|██████████| 100/100 [02:28<00:00,  1.49s/it]

2025/03/13 16:55:06 INFO dspy.evaluate.evaluate: Average Metric: 68 / 100 (68.0%)
2025/03/13 16:55:06 INFO dspy.teleprompt.mipro_optimizer_v2: [92mNew best full eval score![0m Score: 68.0
2025/03/13 16:55:06 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [26.0, 68.0]
2025/03/13 16:55:06 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 68.0
2025/03/13 16:55:06 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/03/13 16:55:06 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 68.0!





In [47]:
evaluator(optimized_classify, metric=validate_category)

Average Metric: 70.00 / 98 (71.4%): 100%|██████████| 98/98 [06:42<00:00,  4.11s/it]

2025/03/13 17:06:32 INFO dspy.evaluate.evaluate: Average Metric: 70 / 98 (71.4%)





Unnamed: 0,event,example_category,pred_category,confidence,validate_category
0,"2024 Comorian presidential election: Amid an opposition boycott, i...",Politics and Governance,Politics and Governance,0.8,✔️ [True]
1,November 27 – After forming a coalition Government with the right-...,Politics and Governance,Politics and Governance,0.8,✔️ [True]
2,"December 12 – At the COP28 climate summit in Dubai, a consensus is...",Politics and Governance,Economic Events,0.8,
3,"June 6–9 – The 2024 European Parliament election is held. The EPP,...",Politics and Governance,Politics and Governance,0.8,✔️ [True]
4,November 29 – 2024 Irish general election: Centre-right party Fian...,Politics and Governance,Politics and Governance,0.8,✔️ [True]


71.43

In [None]:
lm.inspect_history(n=1)

In [49]:
optimized_classify(event="Maria of Enghien sells the lordship of Argos and Nauplia to the Republic of Venice.")

Prediction(
    category='Politics and Governance',
    confidence=0.8
)

In [50]:
def classify_event_optimized(event_text):
    result = optimized_classify(event=event_text)
    return result.category, result.confidence

classifications = events['event_string'].apply(classify_event_optimized)

events['category_31_1b_optimized'] = [result[0] for result in classifications]
events['confidence_31_1b_optimized'] = [result[1] for result in classifications]

events

Unnamed: 0,event_string,category_32_1b,confidence_32_1b,category_33_70b,confidence_33_70b,category_31_1b_optimized,confidence_31_1b_optimized
0,627 – Battle of Nineveh: A Byzantine army under Emperor Heraclius ...,Wars and Conflicts,2.0,Wars and Conflicts,0.95,Wars and Conflicts,0.8
1,1388 – Maria of Enghien sells the lordship of Argos and Nauplia to...,Wars and Conflicts,1.0,Politics and Governance,0.9,Politics and Governance,0.8
2,1787 – Pennsylvania becomes the second state to ratify the US Cons...,Politics and Governance,0.95,Politics and Governance,0.95,Politics and Governance,0.9
3,1862 – American Civil War: USS Cairo sinks on the Yazoo River.[4]\n,Wars and Conflicts,0.5,Wars and Conflicts,0.95,Wars and Conflicts,0.8
4,1866 – Oaks explosion: The worst mining disaster in England kills ...,Wars and Conflicts,0.55,Man-Made Disasters and Accidents,1.0,Wars and Conflicts,0.9
5,1870 – Joseph H. Rainey of South Carolina becomes the second black...,Politics and Governance,0.5,Politics and Governance,0.95,Politics and Governance,0.8
6,1901 – Guglielmo Marconi receives the first transatlantic radio si...,Science and Innovation,0.95,Science and Innovation,0.95,Science and Innovation,0.9
7,1915 – Yuan Shikai declares the establishment of the Empire of Chi...,Wars and Conflicts,2.0,Politics and Governance,0.95,Politics and Governance,0.8
8,1917 – Father Edward J. Flanagan founds Boys Town as a farm villag...,Wars and Conflicts,2.5,Social Movements,0.8,Politics and Governance,0.9
9,"1935 – The Lebensborn Project, a Nazi reproduction program, is fou...",Wars and Conflicts,0.8,Politics and Governance,0.95,Politics and Governance,0.8


In [51]:
# Save optimized model
optimized_classify.save("optimized_classify_31_1b.json")

In [54]:
disagreements = events[events['category_31_1b_optimized'] != events['category_33_70b']]
# len(disagreements)

print("old: ", (len(events) - 17)/len(events))
print("new: ", (len(events) - len(disagreements))/len(events))

old:  0.46875
new:  0.625


In [55]:
from dspy.teleprompt import *

# Load our model
lm = dspy.LM('ollama_chat/llama3.2:1b', api_base='http://localhost:11434')
prompt_gen_lm = dspy.LM('fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct')
dspy.configure(lm=lm)

# Optimize
tp = dspy.MIPROv2(metric=validate_category, auto="light", prompt_model=prompt_gen_lm, task_model=lm)
optimized_classify = tp.compile(classify, trainset=trainset, max_labeled_demos=0, max_bootstrapped_demos=0)

2025/03/13 17:16:12 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 7
minibatch: True
num_candidates: 7
valset size: 100

2025/03/13 17:16:13 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/03/13 17:16:13 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used for informing instruction proposal.

2025/03/13 17:16:13 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=7 sets of demonstrations...


Bootstrapping set 1/7
Bootstrapping set 2/7


 17%|█▋        | 13/78 [00:00<00:00, 556.69it/s]


Bootstrapped 3 full traces after 13 examples for up to 1 rounds, amounting to 13 attempts.
Bootstrapping set 3/7


 15%|█▌        | 12/78 [00:00<00:00, 755.42it/s]


Bootstrapped 3 full traces after 12 examples for up to 1 rounds, amounting to 12 attempts.
Bootstrapping set 4/7


 10%|█         | 8/78 [00:00<00:00, 1036.56it/s]


Bootstrapped 3 full traces after 8 examples for up to 1 rounds, amounting to 8 attempts.
Bootstrapping set 5/7


  5%|▌         | 4/78 [00:00<00:00, 682.50it/s]


Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 6/7


  1%|▏         | 1/78 [00:00<00:00, 416.68it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 7/7


  6%|▋         | 5/78 [00:00<00:00, 845.63it/s]
2025/03/13 17:16:13 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/03/13 17:16:13 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 3 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.


2025/03/13 17:16:49 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...

2025/03/13 17:17:45 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/03/13 17:17:45 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Classify historic events.

2025/03/13 17:17:45 INFO dspy.teleprompt.mipro_optimizer_v2: 1: Given a historic event description, categorize the event into a specific category, such as "Politics and Governance", and provide a confidence score between 0 and 2.0, indicating the certainty of the categorization. The event description may include dates, names of individuals, countries, and other relevant details. Analyze the event description using natural language processing techniques to extract relevant features and make predictions about the event's category and confidence level.

2025/03/13 17:17:45 INFO dspy.teleprompt.mipro_optimizer_v2: 2: Analyze the given historical event description, which includes specific dates, names of individual

Average Metric: 26.00 / 100 (26.0%): 100%|██████████| 100/100 [00:00<00:00, 3979.30it/s]

2025/03/13 17:17:45 INFO dspy.evaluate.evaluate: Average Metric: 26 / 100 (26.0%)
2025/03/13 17:17:45 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 26.0

2025/03/13 17:17:45 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 2 / 8 - Minibatch ==



Average Metric: 15.00 / 25 (60.0%): 100%|██████████| 25/25 [00:32<00:00,  1.32s/it]

2025/03/13 17:18:18 INFO dspy.evaluate.evaluate: Average Metric: 15 / 25 (60.0%)
2025/03/13 17:18:18 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 60.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 1'].
2025/03/13 17:18:18 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [60.0]
2025/03/13 17:18:18 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [26.0]
2025/03/13 17:18:18 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 26.0


2025/03/13 17:18:18 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 3 / 8 - Minibatch ==



Average Metric: 13.00 / 25 (52.0%): 100%|██████████| 25/25 [00:37<00:00,  1.49s/it]

2025/03/13 17:18:55 INFO dspy.evaluate.evaluate: Average Metric: 13 / 25 (52.0%)
2025/03/13 17:18:55 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 52.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 5'].
2025/03/13 17:18:55 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [60.0, 52.0]
2025/03/13 17:18:55 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [26.0]
2025/03/13 17:18:55 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 26.0


2025/03/13 17:18:55 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 4 / 8 - Minibatch ==



Average Metric: 15.00 / 25 (60.0%): 100%|██████████| 25/25 [00:43<00:00,  1.75s/it]

2025/03/13 17:19:39 INFO dspy.evaluate.evaluate: Average Metric: 15 / 25 (60.0%)
2025/03/13 17:19:39 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 60.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 2'].
2025/03/13 17:19:39 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [60.0, 52.0, 60.0]
2025/03/13 17:19:39 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [26.0]
2025/03/13 17:19:39 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 26.0


2025/03/13 17:19:39 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 5 / 8 - Minibatch ==



Average Metric: 14.00 / 25 (56.0%): 100%|██████████| 25/25 [00:28<00:00,  1.13s/it]

2025/03/13 17:20:07 INFO dspy.evaluate.evaluate: Average Metric: 14 / 25 (56.0%)
2025/03/13 17:20:07 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 56.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 5'].
2025/03/13 17:20:07 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [60.0, 52.0, 60.0, 56.0]
2025/03/13 17:20:07 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [26.0]
2025/03/13 17:20:07 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 26.0


2025/03/13 17:20:07 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 6 / 8 - Minibatch ==



Average Metric: 15.00 / 25 (60.0%): 100%|██████████| 25/25 [00:52<00:00,  2.11s/it]

2025/03/13 17:21:00 INFO dspy.evaluate.evaluate: Average Metric: 15 / 25 (60.0%)
2025/03/13 17:21:00 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 60.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4'].
2025/03/13 17:21:00 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [60.0, 52.0, 60.0, 56.0, 60.0]
2025/03/13 17:21:00 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [26.0]
2025/03/13 17:21:00 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 26.0


2025/03/13 17:21:00 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 7 / 8 - Minibatch ==



Average Metric: 13.00 / 25 (52.0%): 100%|██████████| 25/25 [00:23<00:00,  1.08it/s]

2025/03/13 17:21:23 INFO dspy.evaluate.evaluate: Average Metric: 13 / 25 (52.0%)
2025/03/13 17:21:23 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 52.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 1'].
2025/03/13 17:21:23 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [60.0, 52.0, 60.0, 56.0, 60.0, 52.0]
2025/03/13 17:21:23 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [26.0]
2025/03/13 17:21:23 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 26.0


2025/03/13 17:21:23 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 8 / 8 - Full Evaluation =====
2025/03/13 17:21:23 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 60.0) from minibatch trials...



Average Metric: 62.00 / 100 (62.0%): 100%|██████████| 100/100 [02:05<00:00,  1.25s/it]

2025/03/13 17:23:28 INFO dspy.evaluate.evaluate: Average Metric: 62 / 100 (62.0%)
2025/03/13 17:23:28 INFO dspy.teleprompt.mipro_optimizer_v2: [92mNew best full eval score![0m Score: 62.0
2025/03/13 17:23:28 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [26.0, 62.0]
2025/03/13 17:23:28 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 62.0
2025/03/13 17:23:28 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/03/13 17:23:28 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 62.0!





In [56]:
optimized_classify.save("optimized_classify_31_1b_2.json")

In [57]:
evaluator(optimized_classify, metric=validate_category)

Average Metric: 62.00 / 98 (63.3%): 100%|██████████| 98/98 [05:34<00:00,  3.42s/it]

2025/03/13 17:31:56 INFO dspy.evaluate.evaluate: Average Metric: 62 / 98 (63.3%)





Unnamed: 0,event,example_category,pred_category,confidence,validate_category
0,"2024 Comorian presidential election: Amid an opposition boycott, i...",Politics and Governance,Politics and Governance,0.8,✔️ [True]
1,November 27 – After forming a coalition Government with the right-...,Politics and Governance,Politics and Governance,0.8,✔️ [True]
2,"December 12 – At the COP28 climate summit in Dubai, a consensus is...",Politics and Governance,Politics and Governance,0.8,✔️ [True]
3,"June 6–9 – The 2024 European Parliament election is held. The EPP,...",Politics and Governance,Politics and Governance,0.8,✔️ [True]
4,November 29 – 2024 Irish general election: Centre-right party Fian...,Politics and Governance,Politics and Governance,0.8,✔️ [True]


63.27

In [None]:
classifications = events['event_string'].apply(classify_event_optimized)

events['category_31_1b_optimized_2'] = [result[0] for result in classifications]
events['confidence_31_1b_optimized_2'] = [result[1] for result in classifications]

events