In [1]:
import nest_asyncio
nest_asyncio.apply()

In [2]:
import dspy
from dspy.teleprompt.mipro_optimizer_v2 import MIPROv2
from dspy.teleprompt.random_search import BootstrapFewShotWithRandomSearch
from dspy.evaluate import Evaluate
import asyncio
import os
import numpy as np
from dotenv import load_dotenv
from datasets import load_dataset
import logging
import pickle

from nano_graphrag._utils import compute_mdhash_id
from nano_graphrag.entity_extraction.extract import generate_dataset
from nano_graphrag.entity_extraction.module import EntityRelationshipExtractor
from nano_graphrag.entity_extraction.metric import relationship_similarity_metric, entity_recall_metric

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
WORKING_DIR = "./nano_graphrag_cache_finetune_entity_relationship_dspy"

load_dotenv()

logging.basicConfig(level=logging.WARNING)
logging.getLogger("nano-graphrag").setLevel(logging.DEBUG)

np.random.seed(1337)

In [11]:
system_prompt = """
    You are a world-class AI system, capable of complex reasoning and reflection. 
    Reason through the query, and then provide your final response. 
    If you detect that you made a mistake in your reasoning at any point, correct yourself.
    Think carefully.
"""
lm = dspy.OpenAI(
    model="deepseek-chat", 
    model_type="chat", 
    api_key=os.environ["DEEPSEEK_API_KEY"], 
    base_url=os.environ["DEEPSEEK_BASE_URL"], 
    system_prompt=system_prompt, 
    temperature=1.0,
    top_p=1.0,
    max_tokens=4096
)
dspy.settings.configure(lm=lm, experimental=True)

In [15]:
os.makedirs(WORKING_DIR, exist_ok=True)
train_len = 20
val_len = 5
dev_len = 5
entity_relationship_trainset_path = os.path.join(WORKING_DIR, "entity_relationship_extraction_news_trainset.pkl")
entity_relationship_valset_path = os.path.join(WORKING_DIR, "entity_relationship_extraction_news_valset.pkl")
entity_relationship_devset_path = os.path.join(WORKING_DIR, "entity_relationship_extraction_news_devset.pkl")
entity_relationship_rs_path = os.path.join(WORKING_DIR, "entity_relationship_extraction_news_rs.json")
entity_relationship_miprov2_path = os.path.join(WORKING_DIR, "entity_relationship_extraction_news_miprov2.json")
fin_news = load_dataset("ashraq/financial-news-articles")
fin_shuffled_indices = np.random.permutation(len(fin_news['train']))
train_data = fin_news['train'].select(fin_shuffled_indices[:train_len])
val_data = fin_news['train'].select(fin_shuffled_indices[train_len:train_len+val_len])
dev_data = fin_news['train'].select(fin_shuffled_indices[train_len+val_len:train_len+val_len+dev_len])

  table = cls._concat_blocks(blocks, axis=0)


In [None]:
train_data['text'][:2]

In [None]:
val_data['text']

In [None]:
dev_data['text'][:2]

In [None]:
train_chunks = {compute_mdhash_id(text, prefix=f"chunk-"): {"content": text} for text in train_data["text"]}
trainset = asyncio.run(generate_dataset(chunks=train_chunks, filepath=entity_relationship_trainset_path))

In [8]:
trainset = [example for example in trainset if len(example.relationships) > 0 and len(example.entities) > 0]
trainset

 Example({'input_text': "Jan 22 (Reuters) - Shanghai Stock Exchange Filing\n* SHOWS BLOCK TRADE OF YONGHUI SUPERSTORES Co LTd's 166.3 MILLION SHARES INVOLVING 1.63 BILLION YUAN ($254.68 million) ON JAN 22 Source text in Chinese: bit.ly/2yJZikT Further company coverage: ($1 = 6.4003 Chinese yuan renminbi) (Reporting by Hong Kong newsroom)\n ", 'entities': [{'entity_name': 'SHANGHAI STOCK EXCHANGE', 'entity_type': 'ORGANIZATION', 'description': "The Shanghai Stock Exchange is a major stock exchange in China, where the block trade of YONGHUI SUPERSTORES Co Ltd's shares was filed.", 'importance_score': 0.9}, {'entity_name': 'YONGHUI SUPERSTORES CO LTD', 'entity_type': 'ORGANIZATION', 'description': 'YONGHUI SUPERSTORES Co Ltd is a company involved in the block trade of 166.3 million shares, valued at 1.63 billion yuan.', 'importance_score': 0.9}, {'entity_name': '166.3 MILLION SHARES', 'entity_type': 'MONEY', 'description': 'The number of shares involved in the block trade, which is 166.3 

In [None]:
for example in trainset:
    for relationship in example.relationships:
        if relationship['order'] == 2:
            print(relationship)

In [None]:
for example in trainset:
    for relationship in example.relationships:
        if relationship['order'] == 3:
            print(relationship)

In [None]:
trainset[0].relationships[:2]

In [None]:
val_chunks = {compute_mdhash_id(text, prefix=f"chunk-"): {"content": text} for text in val_data["text"]}
valset = asyncio.run(generate_dataset(chunks=val_chunks, filepath=entity_relationship_valset_path))

In [7]:
valset = [example for example in valset if len(example.relationships) > 0 and len(example.entities) > 0]
valset

[Example({'input_text': '0 COMMENTS Global stock markets were mixed Friday, with many recovering from declines a day earlier even as investors continued to weigh up the impact of higher inflation and interest rates on equities.\nThe Stoxx Europe 600 was roughly flat around noon in Europe, while most indexes in the Asia-Pacific region were higher.\nGermany’s DAX and France’s CAC 40 rose 0.8% and 1.4% this week, respectively, but a 1.5% fall in Italy’s FTSE MIB weighed on the broader European index.\nU.S. stocks were projected to open higher, with the S&P 500 and Dow Jones Industrial Average futures contracts up 0.2% and 0.4%, respectively. On Thursday, U.S. stocks rose to pare the week’s losses .\nAs 10-year U.S. Treasury yields have ticked higher this year, equity investors are watching bond markets closely for signs that higher yields are affecting corporate performance.\n“Historically, the probability of loss for the S&P 500 increases when the 10-year Treasury yield rises above 3%,” 

In [None]:
valset[0].relationships[:2]

In [None]:
for example in valset:
    for relationship in example.relationships:
        if relationship['order'] == 2:
            print(relationship)

In [None]:
for example in valset:
    for relationship in example.relationships:
        if relationship['order'] == 3:
            print(relationship)

In [None]:
dev_chunks = {compute_mdhash_id(text, prefix=f"chunk-"): {"content": text} for text in dev_data["text"]}
devset = asyncio.run(generate_dataset(chunks=dev_chunks, filepath=entity_relationship_devset_path))

In [6]:
devset = [example for example in devset if len(example.relationships) > 0 and len(example.entities) > 0]
devset

[Example({'input_text': 'Kendrick Lamar’s Pulitzer Prize win is the latest sign of the growing recognition of hip-hop—this time from one of America’s highest-profile cultural institutions.\nThe rapper, who won for his album “DAMN.,” is the first winner who isn’t a classical or jazz artist since the first Pulitzer for music was issued in 1943. Aside from previous award winners Wynton Marsalis, Henry Threadgill and Ornette Coleman, it has largely been a prize for classical composers.\n... To Read the Full Story Subscribe Sign In', 'entities': [{'entity_name': 'KENDRICK LAMAR', 'entity_type': 'PERSON', 'description': "A renowned rapper who won the Pulitzer Prize for his album 'DAMN.'. He is the first non-classical or jazz artist to win the Pulitzer for music since its inception in 1943.", 'importance_score': 0.9}, {'entity_name': 'PULITZER PRIZE', 'entity_type': 'AWARD', 'description': "A prestigious award in the United States, given in various categories including journalism, literature,

In [None]:
devset[0].relationships[:2]

In [None]:
for example in devset:
    for relationship in example.relationships:
        if relationship['order'] == 2:
            print(relationship)

In [None]:
for example in devset:
    for relationship in example.relationships:
        if relationship['order'] == 3:
            print(relationship)

In [None]:
devset

In [5]:
entity_relationship_trainset_path = os.path.join(WORKING_DIR, "entity_relationship_extraction_news_trainset.pkl")
entity_relationship_valset_path = os.path.join(WORKING_DIR, "entity_relationship_extraction_news_valset.pkl")
entity_relationship_devset_path = os.path.join(WORKING_DIR, "entity_relationship_extraction_news_devset.pkl")

trainset = pickle.load(open(entity_relationship_trainset_path, "rb"))
valset = pickle.load(open(entity_relationship_valset_path, "rb"))
devset = pickle.load(open(entity_relationship_devset_path, "rb"))

In [12]:
model = EntityRelationshipExtractor()
model

extractor = Predict(StringSignature(input_text, entity_types -> rationale, entities, relationships
    instructions='Signature for extracting both entities and relationships from input text.'
    input_text = Field(annotation=str required=True json_schema_extra={'desc': 'The text to extract entities and relationships from.', '__dspy_field_type': 'input', 'prefix': 'Input Text:'})
    entity_types = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Entity Types:', 'desc': '${entity_types}'})
    rationale = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${produce the output fields}. We ...', '__dspy_field_type': 'output'})
    entities = Field(annotation=str required=True json_schema_extra={'desc': '\n        Format:\n        {\n            "entities": [\n                {\n                    "entity_name": "ENTITY NAME",\n                    "entity_type": "ENTITY 

In [17]:
metrics = [entity_recall_metric, relationship_similarity_metric]
for metric in metrics:
    evaluate = Evaluate(
        devset=devset, 
        metric=metric, 
        num_threads=os.cpu_count(), 
        display_progress=True,
        display_table=5,
    )
    evaluate(model)

DEBUG:nano-graphrag:Entities: 7 | Missed Entities: 0 | Total Entities: 7
DEBUG:nano-graphrag:Entities: 5 | Missed Entities: 0 | Total Entities: 5
DEBUG:nano-graphrag:Entities: 10 | Missed Entities: 0 | Total Entities: 10
DEBUG:nano-graphrag:Entities: 7 | Missed Entities: 0 | Total Entities: 7
DEBUG:nano-graphrag:Entities: 8 | Missed Entities: 0 | Total Entities: 8
DEBUG:nano-graphrag:Relationships: 7 | Missed Relationships: 3 | Total Relationships: 10
DEBUG:nano-graphrag:Relationships: 4 | Missed Relationships: 7 | Total Relationships: 11
DEBUG:nano-graphrag:Relationships: 9 | Missed Relationships: 9 | Total Relationships: 18
DEBUG:nano-graphrag:Relationships: 6 | Missed Relationships: 5 | Total Relationships: 11
DEBUG:nano-graphrag:Relationships: 7 | Missed Relationships: 8 | Total Relationships: 15
DEBUG:nano-graphrag:Direct Relationships: 4 | Second-order: 6 | Third-order: 0 | Total Relationships: 10
Average Metric: 1.0 / 1  (100.0):   0%|          | 0/5 [00:00<?, ?it/s]DEBUG:nano-g

Unnamed: 0,input_text,example_entities,example_relationships,pred_entities,pred_relationships,entity_recall_metric
0,"Kendrick Lamar’s Pulitzer Prize win is the latest sign of the growing recognition of hip-hop—this time from one of America’s highest-profile cultural institutions. The rapper,...","[{'entity_name': 'KENDRICK LAMAR', 'entity_type': 'PERSON', 'description': ""A renowned rapper who won the Pulitzer Prize for his album 'DAMN.'. He is the first non-classical or jazz...","[{'src_id': 'KENDRICK LAMAR', 'tgt_id': 'PULITZER PRIZE', 'description': ""Kendrick Lamar won the Pulitzer Prize for music for his album 'DAMN.'. This win is significant as it...","[{'entity_name': 'KENDRICK LAMAR', 'entity_type': 'PERSON', 'description': ""A prominent hip-hop artist who won the Pulitzer Prize for his album 'DAMN.'. He is the first non-classical or...","[{'src_id': 'KENDRICK LAMAR', 'tgt_id': 'PULITZER PRIZE', 'description': ""Kendrick Lamar won the Pulitzer Prize for music for his album 'DAMN.'. This relationship signifies the recognition of...",✔️ [1.0]
1,Cramer Remix: The fall of cloud stocks could be a worrisome sign for tech 11 Hours Ago Jim Cramer deciphers the collapse of the “cloud...,"[{'entity_name': 'CRAMER', 'entity_type': 'PERSON', 'description': ""Jim Cramer, a well-known financial commentator and host of 'Mad Money' on CNBC, who is analyzing the fall of cloud...","[{'src_id': 'CRAMER', 'tgt_id': 'CLOUD STOCKS', 'description': 'Jim Cramer is analyzing the fall of cloud stocks, providing insights into what this decline could mean for the...","[{'entity_name': 'CRAMER', 'entity_type': 'PERSON', 'description': ""Jim Cramer, a well-known financial commentator and host of 'Mad Money' on CNBC, who is analyzing the fall of cloud...","[{'src_id': 'CRAMER', 'tgt_id': 'CLOUD STOCKS', 'description': 'Jim Cramer is analyzing the fall of cloud stocks and its implications.', 'weight': 0.9, 'order': 1}, {'src_id': 'CLOUD STOCKS',...",✔️ [1.0]
2,* Mulberry manufactures 55 pct of leather goods in UK * Import cost of raw materials has gone up after Brexit vote * Brand fully...,"[{'entity_name': 'MULBERRY', 'entity_type': 'ORGANIZATION', 'description': 'A handbag maker committed to manufacturing in Britain, facing higher leather import costs post-Brexit. The company has two manufacturing plants...","[{'src_id': 'MULBERRY', 'tgt_id': 'UK', 'description': 'Mulberry manufactures 55% of its leather goods in the UK and remains committed to the country despite post-Brexit challenges.', 'weight':...","[{'entity_name': 'MULBERRY', 'entity_type': 'ORGANIZATION', 'description': 'A handbag maker committed to manufacturing in Britain, facing higher leather import costs post-Brexit. It produces 55% of its leather...","[{'src_id': 'MULBERRY', 'tgt_id': 'UK', 'description': 'Mulberry manufactures 55% of its leather goods in the UK and considers the UK its biggest market.', 'weight': 0.9, 'order':...",✔️ [0.9090909090909091]
3,"COLUMBIA, Md., April 25, 2018 (GLOBE NEWSWIRE) -- W. R. Grace & Co. (NYSE:GRA) today announced that it has declared a quarterly cash dividend of...","[{'entity_name': 'W. R. GRACE & CO.', 'entity_type': 'ORGANIZATION', 'description': 'A leading global supplier of catalysts and engineered materials, with two industry-leading business segments: Catalysts Technologies...","[{'src_id': 'W. R. GRACE & CO.', 'tgt_id': 'NYSE', 'description': 'W. R. Grace & Co. is listed on the New York Stock Exchange under the symbol...","[{'entity_name': 'W. R. GRACE & CO.', 'entity_type': 'ORGANIZATION', 'description': 'A leading global supplier of catalysts and engineered materials, with two industry-leading business segments: Catalysts Technologies...","[{'src_id': 'W. R. GRACE & CO.', 'tgt_id': 'RICH BADMINGTON', 'description': 'Rich Badmington is the Media Relations contact for W. R. Grace & Co., responsible for...",✔️ [0.5]
4,"Top marketer joins company to lead global marketing and brand strategies FRAMINGHAM, Mass. & DUBLIN--(BUSINESS WIRE)-- Globoforce ® , a leading provider of human applications,...","[{'entity_name': 'MAGGIE FOX', 'entity_type': 'PERSON', 'description': ""A seasoned digital marketing executive who has served as CMO of Aeroplan and senior vice president of digital at...","[{'src_id': 'MAGGIE FOX', 'tgt_id': 'GLOBOFORCE', 'description': ""Maggie Fox has joined Globoforce as its new chief marketing officer, leading global marketing efforts and enhancing the company's...","[{'entity_name': 'MAGGIE FOX', 'entity_type': 'PERSON', 'description': ""A seasoned digital marketing executive who has served as CMO of Aeroplan and senior vice president of digital at...","[{'src_id': 'MAGGIE FOX', 'tgt_id': 'GLOBOFORCE', 'description': 'Maggie Fox joins Globoforce as the chief marketing officer, leading global marketing efforts and driving brand strategies.', 'weight': 1.0,...",✔️ [0.7777777777777778]


DEBUG:nano-graphrag:Entities: 7 | Missed Entities: 0 | Total Entities: 7
DEBUG:nano-graphrag:Entities: 5 | Missed Entities: 0 | Total Entities: 5
DEBUG:nano-graphrag:Entities: 10 | Missed Entities: 0 | Total Entities: 10
DEBUG:nano-graphrag:Entities: 7 | Missed Entities: 0 | Total Entities: 7
DEBUG:nano-graphrag:Entities: 8 | Missed Entities: 0 | Total Entities: 8
DEBUG:nano-graphrag:Relationships: 7 | Missed Relationships: 3 | Total Relationships: 10
DEBUG:nano-graphrag:Relationships: 4 | Missed Relationships: 7 | Total Relationships: 11
DEBUG:nano-graphrag:Relationships: 9 | Missed Relationships: 9 | Total Relationships: 18
DEBUG:nano-graphrag:Relationships: 6 | Missed Relationships: 5 | Total Relationships: 11
DEBUG:nano-graphrag:Relationships: 7 | Missed Relationships: 8 | Total Relationships: 15
DEBUG:nano-graphrag:Direct Relationships: 4 | Second-order: 6 | Third-order: 0 | Total Relationships: 10
DEBUG:nano-graphrag:Direct Relationships: 9 | Second-order: 2 | Third-order: 0 | To

Unnamed: 0,input_text,example_entities,example_relationships,pred_entities,pred_relationships,relationship_similarity_metric
0,"Kendrick Lamar’s Pulitzer Prize win is the latest sign of the growing recognition of hip-hop—this time from one of America’s highest-profile cultural institutions. The rapper,...","[{'entity_name': 'KENDRICK LAMAR', 'entity_type': 'PERSON', 'description': ""A renowned rapper who won the Pulitzer Prize for his album 'DAMN.'. He is the first non-classical or jazz...","[{'src_id': 'KENDRICK LAMAR', 'tgt_id': 'PULITZER PRIZE', 'description': ""Kendrick Lamar won the Pulitzer Prize for music for his album 'DAMN.'. This win is significant as it...","[{'entity_name': 'KENDRICK LAMAR', 'entity_type': 'PERSON', 'description': ""A prominent hip-hop artist who won the Pulitzer Prize for his album 'DAMN.'. He is the first non-classical or...","[{'src_id': 'KENDRICK LAMAR', 'tgt_id': 'PULITZER PRIZE', 'description': ""Kendrick Lamar won the Pulitzer Prize for music for his album 'DAMN.'. This relationship signifies the recognition of...",✔️ [0.51]
1,Cramer Remix: The fall of cloud stocks could be a worrisome sign for tech 11 Hours Ago Jim Cramer deciphers the collapse of the “cloud...,"[{'entity_name': 'CRAMER', 'entity_type': 'PERSON', 'description': ""Jim Cramer, a well-known financial commentator and host of 'Mad Money' on CNBC, who is analyzing the fall of cloud...","[{'src_id': 'CRAMER', 'tgt_id': 'CLOUD STOCKS', 'description': 'Jim Cramer is analyzing the fall of cloud stocks, providing insights into what this decline could mean for the...","[{'entity_name': 'CRAMER', 'entity_type': 'PERSON', 'description': ""Jim Cramer, a well-known financial commentator and host of 'Mad Money' on CNBC, who is analyzing the fall of cloud...","[{'src_id': 'CRAMER', 'tgt_id': 'CLOUD STOCKS', 'description': 'Jim Cramer is analyzing the fall of cloud stocks and its implications.', 'weight': 0.9, 'order': 1}, {'src_id': 'CLOUD STOCKS',...",✔️ [0.56]
2,* Mulberry manufactures 55 pct of leather goods in UK * Import cost of raw materials has gone up after Brexit vote * Brand fully...,"[{'entity_name': 'MULBERRY', 'entity_type': 'ORGANIZATION', 'description': 'A handbag maker committed to manufacturing in Britain, facing higher leather import costs post-Brexit. The company has two manufacturing plants...","[{'src_id': 'MULBERRY', 'tgt_id': 'UK', 'description': 'Mulberry manufactures 55% of its leather goods in the UK and remains committed to the country despite post-Brexit challenges.', 'weight':...","[{'entity_name': 'MULBERRY', 'entity_type': 'ORGANIZATION', 'description': 'A handbag maker committed to manufacturing in Britain, facing higher leather import costs post-Brexit. It produces 55% of its leather...","[{'src_id': 'MULBERRY', 'tgt_id': 'UK', 'description': 'Mulberry manufactures 55% of its leather goods in the UK and considers the UK its biggest market.', 'weight': 0.9, 'order':...",✔️ [0.28888888888888886]
3,"COLUMBIA, Md., April 25, 2018 (GLOBE NEWSWIRE) -- W. R. Grace & Co. (NYSE:GRA) today announced that it has declared a quarterly cash dividend of...","[{'entity_name': 'W. R. GRACE & CO.', 'entity_type': 'ORGANIZATION', 'description': 'A leading global supplier of catalysts and engineered materials, with two industry-leading business segments: Catalysts Technologies...","[{'src_id': 'W. R. GRACE & CO.', 'tgt_id': 'NYSE', 'description': 'W. R. Grace & Co. is listed on the New York Stock Exchange under the symbol...","[{'entity_name': 'W. R. GRACE & CO.', 'entity_type': 'ORGANIZATION', 'description': 'A leading global supplier of catalysts and engineered materials, with two industry-leading business segments: Catalysts Technologies...","[{'src_id': 'W. R. GRACE & CO.', 'tgt_id': 'RICH BADMINGTON', 'description': 'Rich Badmington is the Media Relations contact for W. R. Grace & Co., responsible for...",✔️ [0.15454545454545457]
4,"Top marketer joins company to lead global marketing and brand strategies FRAMINGHAM, Mass. & DUBLIN--(BUSINESS WIRE)-- Globoforce ® , a leading provider of human applications,...","[{'entity_name': 'MAGGIE FOX', 'entity_type': 'PERSON', 'description': ""A seasoned digital marketing executive who has served as CMO of Aeroplan and senior vice president of digital at...","[{'src_id': 'MAGGIE FOX', 'tgt_id': 'GLOBOFORCE', 'description': ""Maggie Fox has joined Globoforce as its new chief marketing officer, leading global marketing efforts and enhancing the company's...","[{'entity_name': 'MAGGIE FOX', 'entity_type': 'PERSON', 'description': ""A seasoned digital marketing executive who has served as CMO of Aeroplan and senior vice president of digital at...","[{'src_id': 'MAGGIE FOX', 'tgt_id': 'GLOBOFORCE', 'description': 'Maggie Fox joins Globoforce as the chief marketing officer, leading global marketing efforts and driving brand strategies.', 'weight': 1.0,...",✔️ [0.38076923076923064]


In [13]:
optimizer = BootstrapFewShotWithRandomSearch(
    metric=relationship_similarity_metric, 
    num_threads=os.cpu_count(),
    num_candidate_programs=4,
    max_labeled_demos=5,
    max_bootstrapped_demos=4,
)
rs_model = optimizer.compile(model, trainset=trainset, valset=valset)
rs_model

Going to sample between 1 and 4 traces per predictor.
Will attempt to bootstrap 4 candidate sets.


  0%|          | 0/3 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 6 | Missed Entities: 0 | Total Entities: 6
DEBUG:nano-graphrag:Relationships: 5 | Missed Relationships: 5 | Total Relationships: 10
DEBUG:nano-graphrag:Direct Relationships: 10 | Second-order: 0 | Third-order: 0 | Total Relationships: 10
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/compl

Score: 29.15 for set: [0, 0]
New best sscore: 29.15 for seed -3
Scores so far: [29.15]
Best score: 29.15


  0%|          | 0/3 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 10 | Missed Entities: 0 | Total Entities: 10
DEBUG:nano-graphrag:Relationships: 9 | Missed Relationships: 5 | Total Relationships: 14
DEBUG:nano-graphrag:Direct Relationships: 14 | Second-order: 0 | Third-order: 0 | Total Relationships: 14
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/com

Score: 23.24 for set: [5, 5]
Scores so far: [29.15, 23.24]
Best score: 29.15


  0%|          | 0/19 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 14 | Missed Entities: 0 | Total Entities: 14
DEBUG:nano-graphrag:Relationships: 9 | Missed Relationships: 6 | Total Relationships: 15
DEBUG:nano-graphrag:Direct Relationships: 10 | Second-order: 5 | Third-order: 0 | Total Relationships: 15
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/co

Bootstrapped 4 full traces after 5 examples in round 0.


  0%|          | 0/3 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 9 | Missed Entities: 0 | Total Entities: 9
DEBUG:nano-graphrag:Relationships: 8 | Missed Relationships: 4 | Total Relationships: 12
DEBUG:nano-graphrag:Direct Relationships: 12 | Second-order: 0 | Third-order: 0 | Total Relationships: 12
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/compl

Score: 22.57 for set: [5, 5]
Scores so far: [29.15, 23.24, 22.57]
Best score: 29.15
Average of max per entry across top 1 scores: 0.2915151515151515
Average of max per entry across top 2 scores: 0.3015151515151515
Average of max per entry across top 3 scores: 0.3015151515151515
Average of max per entry across top 5 scores: 0.3015151515151515
Average of max per entry across top 8 scores: 0.3015151515151515
Average of max per entry across top 9999 scores: 0.3015151515151515


  0%|          | 0/19 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 13 | Missed Entities: 0 | Total Entities: 13
DEBUG:nano-graphrag:Relationships: 12 | Missed Relationships: 11 | Total Relationships: 23
DEBUG:nano-graphrag:Direct Relationships: 20 | Second-order: 3 | Third-order: 0 | Total Relationships: 23
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/

Bootstrapped 4 full traces after 5 examples in round 0.


  0%|          | 0/3 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 10 | Missed Entities: 0 | Total Entities: 10
DEBUG:nano-graphrag:Relationships: 7 | Missed Relationships: 3 | Total Relationships: 10
DEBUG:nano-graphrag:Direct Relationships: 10 | Second-order: 0 | Third-order: 0 | Total Relationships: 10
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/com

Score: 22.72 for set: [5, 5]
Scores so far: [29.15, 23.24, 22.57, 22.72]
Best score: 29.15
Average of max per entry across top 1 scores: 0.2915151515151515
Average of max per entry across top 2 scores: 0.3015151515151515
Average of max per entry across top 3 scores: 0.3165151515151516
Average of max per entry across top 5 scores: 0.3165151515151516
Average of max per entry across top 8 scores: 0.3165151515151516
Average of max per entry across top 9999 scores: 0.3165151515151516


  0%|          | 0/19 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 8 | Missed Entities: 0 | Total Entities: 8
DEBUG:nano-graphrag:Relationships: 7 | Missed Relationships: 7 | Total Relationships: 14
DEBUG:nano-graphrag:Direct Relationships: 14 | Second-order: 0 | Third-order: 0 | Total Relationships: 14
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/comp

Bootstrapped 2 full traces after 3 examples in round 0.


  0%|          | 0/3 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 7 | Missed Entities: 0 | Total Entities: 7
DEBUG:nano-graphrag:Relationships: 6 | Missed Relationships: 4 | Total Relationships: 10
DEBUG:nano-graphrag:Direct Relationships: 10 | Second-order: 0 | Third-order: 0 | Total Relationships: 10
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/compl

Score: 20.38 for set: [5, 5]
Scores so far: [29.15, 23.24, 22.57, 22.72, 20.38]
Best score: 29.15
Average of max per entry across top 1 scores: 0.2915151515151515
Average of max per entry across top 2 scores: 0.3015151515151515
Average of max per entry across top 3 scores: 0.3165151515151516
Average of max per entry across top 5 scores: 0.3165151515151516
Average of max per entry across top 8 scores: 0.3165151515151516
Average of max per entry across top 9999 scores: 0.3165151515151516


  0%|          | 0/19 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 9 | Missed Entities: 0 | Total Entities: 9
DEBUG:nano-graphrag:Relationships: 9 | Missed Relationships: 3 | Total Relationships: 12
DEBUG:nano-graphrag:Direct Relationships: 8 | Second-order: 4 | Third-order: 0 | Total Relationships: 12
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/compl

Bootstrapped 1 full traces after 2 examples in round 0.


  0%|          | 0/3 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 8 | Missed Entities: 0 | Total Entities: 8
DEBUG:nano-graphrag:Relationships: 7 | Missed Relationships: 3 | Total Relationships: 10
DEBUG:nano-graphrag:Direct Relationships: 10 | Second-order: 0 | Third-order: 0 | Total Relationships: 10
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/compl

Score: 21.97 for set: [5, 5]
Scores so far: [29.15, 23.24, 22.57, 22.72, 20.38, 21.97]
Best score: 29.15
Average of max per entry across top 1 scores: 0.2915151515151515
Average of max per entry across top 2 scores: 0.3015151515151515
Average of max per entry across top 3 scores: 0.3165151515151516
Average of max per entry across top 5 scores: 0.3165151515151516
Average of max per entry across top 8 scores: 0.3165151515151516
Average of max per entry across top 9999 scores: 0.3165151515151516


  0%|          | 0/19 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 10 | Missed Entities: 0 | Total Entities: 10
DEBUG:nano-graphrag:Relationships: 8 | Missed Relationships: 6 | Total Relationships: 14
DEBUG:nano-graphrag:Direct Relationships: 14 | Second-order: 0 | Third-order: 0 | Total Relationships: 14
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/co

Bootstrapped 2 full traces after 3 examples in round 0.


  0%|          | 0/3 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 12 | Missed Entities: 0 | Total Entities: 12
DEBUG:nano-graphrag:Relationships: 8 | Missed Relationships: 2 | Total Relationships: 10
DEBUG:nano-graphrag:Direct Relationships: 10 | Second-order: 0 | Third-order: 0 | Total Relationships: 10
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/com

Score: 20.21 for set: [5, 5]
Scores so far: [29.15, 23.24, 22.57, 22.72, 20.38, 21.97, 20.21]
Best score: 29.15
Average of max per entry across top 1 scores: 0.2915151515151515
Average of max per entry across top 2 scores: 0.3015151515151515
Average of max per entry across top 3 scores: 0.3165151515151516
Average of max per entry across top 5 scores: 0.3165151515151516
Average of max per entry across top 8 scores: 0.3231818181818182
Average of max per entry across top 9999 scores: 0.3231818181818182
7 candidate programs found.


extractor = Predict(StringSignature(input_text, entity_types -> rationale, entities, relationships
    instructions='Signature for extracting both entities and relationships from input text.'
    input_text = Field(annotation=str required=True json_schema_extra={'desc': 'The text to extract entities and relationships from.', '__dspy_field_type': 'input', 'prefix': 'Input Text:'})
    entity_types = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Entity Types:', 'desc': '${entity_types}'})
    rationale = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${produce the output fields}. We ...', '__dspy_field_type': 'output'})
    entities = Field(annotation=str required=True json_schema_extra={'desc': '\n        Format:\n        {\n            "entities": [\n                {\n                    "entity_name": "ENTITY NAME",\n                    "entity_type": "ENTITY 

In [14]:
metrics = [entity_recall_metric, relationship_similarity_metric]
for metric in metrics:
    evaluate = Evaluate(
        devset=devset, 
        metric=metric, 
        num_threads=os.cpu_count(), 
        display_progress=True,
        display_table=5,
    )
    evaluate(rs_model)

DEBUG:nano-graphrag:Entities: 7 | Missed Entities: 0 | Total Entities: 7
DEBUG:nano-graphrag:Relationships: 6 | Missed Relationships: 5 | Total Relationships: 11
DEBUG:nano-graphrag:Entities: 8 | Missed Entities: 0 | Total Entities: 8
DEBUG:nano-graphrag:Direct Relationships: 10 | Second-order: 1 | Third-order: 0 | Total Relationships: 11
DEBUG:nano-graphrag:Relationships: 7 | Missed Relationships: 8 | Total Relationships: 15
Average Metric: 0.5 / 1  (50.0):   0%|          | 0/5 [00:00<?, ?it/s]DEBUG:nano-graphrag:Direct Relationships: 15 | Second-order: 0 | Third-order: 0 | Total Relationships: 15
DEBUG:nano-graphrag:Entities: 7 | Missed Entities: 0 | Total Entities: 7
DEBUG:nano-graphrag:Relationships: 7 | Missed Relationships: 3 | Total Relationships: 10
DEBUG:nano-graphrag:Direct Relationships: 4 | Second-order: 6 | Third-order: 0 | Total Relationships: 10
DEBUG:nano-graphrag:Entities: 5 | Missed Entities: 0 | Total Entities: 5
DEBUG:nano-graphrag:Relationships: 4 | Missed Relation

Unnamed: 0,input_text,example_entities,example_relationships,pred_entities,pred_relationships,entity_recall_metric
0,"Kendrick Lamar’s Pulitzer Prize win is the latest sign of the growing recognition of hip-hop—this time from one of America’s highest-profile cultural institutions. The rapper,...","[{'entity_name': 'KENDRICK LAMAR', 'entity_type': 'PERSON', 'description': ""A renowned rapper who won the Pulitzer Prize for his album 'DAMN.'. He is the first non-classical or jazz...","[{'src_id': 'KENDRICK LAMAR', 'tgt_id': 'PULITZER PRIZE', 'description': ""Kendrick Lamar won the Pulitzer Prize for music for his album 'DAMN.'. This win is significant as it...","[{'entity_name': 'KENDRICK LAMAR', 'entity_type': 'PERSON', 'description': ""A prominent hip-hop artist who won the Pulitzer Prize for his album 'DAMN.'. He is the first non-classical or...","[{'src_id': 'KENDRICK LAMAR', 'tgt_id': 'PULITZER PRIZE', 'description': ""Kendrick Lamar won the Pulitzer Prize for music for his album 'DAMN.'. This relationship signifies the recognition of...",✔️ [1.0]
1,Cramer Remix: The fall of cloud stocks could be a worrisome sign for tech 11 Hours Ago Jim Cramer deciphers the collapse of the “cloud...,"[{'entity_name': 'CRAMER', 'entity_type': 'PERSON', 'description': ""Jim Cramer, a well-known financial commentator and host of 'Mad Money' on CNBC, who is analyzing the fall of cloud...","[{'src_id': 'CRAMER', 'tgt_id': 'CLOUD STOCKS', 'description': 'Jim Cramer is analyzing the fall of cloud stocks, providing insights into what this decline could mean for the...","[{'entity_name': 'CRAMER', 'entity_type': 'PERSON', 'description': ""Jim Cramer, a well-known financial commentator and host of 'Mad Money' on CNBC, who is analyzing the fall of cloud...","[{'src_id': 'CRAMER', 'tgt_id': 'CLOUD STOCKS', 'description': 'Jim Cramer is analyzing the fall of cloud stocks and its implications.', 'weight': 0.9, 'order': 1}, {'src_id': 'CLOUD STOCKS',...",✔️ [1.0]
2,* Mulberry manufactures 55 pct of leather goods in UK * Import cost of raw materials has gone up after Brexit vote * Brand fully...,"[{'entity_name': 'MULBERRY', 'entity_type': 'ORGANIZATION', 'description': 'A handbag maker committed to manufacturing in Britain, facing higher leather import costs post-Brexit. The company has two manufacturing plants...","[{'src_id': 'MULBERRY', 'tgt_id': 'UK', 'description': 'Mulberry manufactures 55% of its leather goods in the UK and remains committed to the country despite post-Brexit challenges.', 'weight':...","[{'entity_name': 'MULBERRY', 'entity_type': 'ORGANIZATION', 'description': 'A handbag maker committed to manufacturing in Britain, facing higher leather import costs post-Brexit. It produces 55% of its leather...","[{'src_id': 'MULBERRY', 'tgt_id': 'UK', 'description': 'Mulberry manufactures 55% of its leather goods in the UK and considers the UK its biggest market.', 'weight': 0.9, 'order':...",✔️ [0.9090909090909091]
3,"COLUMBIA, Md., April 25, 2018 (GLOBE NEWSWIRE) -- W. R. Grace & Co. (NYSE:GRA) today announced that it has declared a quarterly cash dividend of...","[{'entity_name': 'W. R. GRACE & CO.', 'entity_type': 'ORGANIZATION', 'description': 'A leading global supplier of catalysts and engineered materials, with two industry-leading business segments: Catalysts Technologies...","[{'src_id': 'W. R. GRACE & CO.', 'tgt_id': 'NYSE', 'description': 'W. R. Grace & Co. is listed on the New York Stock Exchange under the symbol...","[{'entity_name': 'W. R. GRACE & CO.', 'entity_type': 'ORGANIZATION', 'description': 'A leading global supplier of catalysts and engineered materials, with two industry-leading business segments: Catalysts Technologies...","[{'src_id': 'W. R. GRACE & CO.', 'tgt_id': 'RICH BADMINGTON', 'description': 'Rich Badmington is the Media Relations contact for W. R. Grace & Co., responsible for...",✔️ [0.5]
4,"Top marketer joins company to lead global marketing and brand strategies FRAMINGHAM, Mass. & DUBLIN--(BUSINESS WIRE)-- Globoforce ® , a leading provider of human applications,...","[{'entity_name': 'MAGGIE FOX', 'entity_type': 'PERSON', 'description': ""A seasoned digital marketing executive who has served as CMO of Aeroplan and senior vice president of digital at...","[{'src_id': 'MAGGIE FOX', 'tgt_id': 'GLOBOFORCE', 'description': ""Maggie Fox has joined Globoforce as its new chief marketing officer, leading global marketing efforts and enhancing the company's...","[{'entity_name': 'MAGGIE FOX', 'entity_type': 'PERSON', 'description': ""A seasoned digital marketing executive who has served as CMO of Aeroplan and senior vice president of digital at...","[{'src_id': 'MAGGIE FOX', 'tgt_id': 'GLOBOFORCE', 'description': 'Maggie Fox joins Globoforce as the chief marketing officer, leading global marketing efforts and driving brand strategies.', 'weight': 1.0,...",✔️ [0.7777777777777778]


DEBUG:nano-graphrag:Entities: 7 | Missed Entities: 0 | Total Entities: 7
DEBUG:nano-graphrag:Entities: 5 | Missed Entities: 0 | Total Entities: 5
DEBUG:nano-graphrag:Entities: 10 | Missed Entities: 0 | Total Entities: 10
DEBUG:nano-graphrag:Entities: 7 | Missed Entities: 0 | Total Entities: 7
DEBUG:nano-graphrag:Entities: 8 | Missed Entities: 0 | Total Entities: 8
DEBUG:nano-graphrag:Relationships: 7 | Missed Relationships: 3 | Total Relationships: 10
DEBUG:nano-graphrag:Relationships: 4 | Missed Relationships: 7 | Total Relationships: 11
DEBUG:nano-graphrag:Relationships: 9 | Missed Relationships: 9 | Total Relationships: 18
DEBUG:nano-graphrag:Relationships: 6 | Missed Relationships: 5 | Total Relationships: 11
DEBUG:nano-graphrag:Relationships: 7 | Missed Relationships: 8 | Total Relationships: 15
DEBUG:nano-graphrag:Direct Relationships: 4 | Second-order: 6 | Third-order: 0 | Total Relationships: 10
DEBUG:nano-graphrag:Direct Relationships: 9 | Second-order: 2 | Third-order: 0 | To

Unnamed: 0,input_text,example_entities,example_relationships,pred_entities,pred_relationships,relationship_similarity_metric
0,"Kendrick Lamar’s Pulitzer Prize win is the latest sign of the growing recognition of hip-hop—this time from one of America’s highest-profile cultural institutions. The rapper,...","[{'entity_name': 'KENDRICK LAMAR', 'entity_type': 'PERSON', 'description': ""A renowned rapper who won the Pulitzer Prize for his album 'DAMN.'. He is the first non-classical or jazz...","[{'src_id': 'KENDRICK LAMAR', 'tgt_id': 'PULITZER PRIZE', 'description': ""Kendrick Lamar won the Pulitzer Prize for music for his album 'DAMN.'. This win is significant as it...","[{'entity_name': 'KENDRICK LAMAR', 'entity_type': 'PERSON', 'description': ""A prominent hip-hop artist who won the Pulitzer Prize for his album 'DAMN.'. He is the first non-classical or...","[{'src_id': 'KENDRICK LAMAR', 'tgt_id': 'PULITZER PRIZE', 'description': ""Kendrick Lamar won the Pulitzer Prize for music for his album 'DAMN.'. This relationship signifies the recognition of...",✔️ [0.51]
1,Cramer Remix: The fall of cloud stocks could be a worrisome sign for tech 11 Hours Ago Jim Cramer deciphers the collapse of the “cloud...,"[{'entity_name': 'CRAMER', 'entity_type': 'PERSON', 'description': ""Jim Cramer, a well-known financial commentator and host of 'Mad Money' on CNBC, who is analyzing the fall of cloud...","[{'src_id': 'CRAMER', 'tgt_id': 'CLOUD STOCKS', 'description': 'Jim Cramer is analyzing the fall of cloud stocks, providing insights into what this decline could mean for the...","[{'entity_name': 'CRAMER', 'entity_type': 'PERSON', 'description': ""Jim Cramer, a well-known financial commentator and host of 'Mad Money' on CNBC, who is analyzing the fall of cloud...","[{'src_id': 'CRAMER', 'tgt_id': 'CLOUD STOCKS', 'description': 'Jim Cramer is analyzing the fall of cloud stocks and its implications.', 'weight': 0.9, 'order': 1}, {'src_id': 'CLOUD STOCKS',...",✔️ [0.56]
2,* Mulberry manufactures 55 pct of leather goods in UK * Import cost of raw materials has gone up after Brexit vote * Brand fully...,"[{'entity_name': 'MULBERRY', 'entity_type': 'ORGANIZATION', 'description': 'A handbag maker committed to manufacturing in Britain, facing higher leather import costs post-Brexit. The company has two manufacturing plants...","[{'src_id': 'MULBERRY', 'tgt_id': 'UK', 'description': 'Mulberry manufactures 55% of its leather goods in the UK and remains committed to the country despite post-Brexit challenges.', 'weight':...","[{'entity_name': 'MULBERRY', 'entity_type': 'ORGANIZATION', 'description': 'A handbag maker committed to manufacturing in Britain, facing higher leather import costs post-Brexit. It produces 55% of its leather...","[{'src_id': 'MULBERRY', 'tgt_id': 'UK', 'description': 'Mulberry manufactures 55% of its leather goods in the UK and considers the UK its biggest market.', 'weight': 0.9, 'order':...",✔️ [0.28888888888888886]
3,"COLUMBIA, Md., April 25, 2018 (GLOBE NEWSWIRE) -- W. R. Grace & Co. (NYSE:GRA) today announced that it has declared a quarterly cash dividend of...","[{'entity_name': 'W. R. GRACE & CO.', 'entity_type': 'ORGANIZATION', 'description': 'A leading global supplier of catalysts and engineered materials, with two industry-leading business segments: Catalysts Technologies...","[{'src_id': 'W. R. GRACE & CO.', 'tgt_id': 'NYSE', 'description': 'W. R. Grace & Co. is listed on the New York Stock Exchange under the symbol...","[{'entity_name': 'W. R. GRACE & CO.', 'entity_type': 'ORGANIZATION', 'description': 'A leading global supplier of catalysts and engineered materials, with two industry-leading business segments: Catalysts Technologies...","[{'src_id': 'W. R. GRACE & CO.', 'tgt_id': 'RICH BADMINGTON', 'description': 'Rich Badmington is the Media Relations contact for W. R. Grace & Co., responsible for...",✔️ [0.15454545454545457]
4,"Top marketer joins company to lead global marketing and brand strategies FRAMINGHAM, Mass. & DUBLIN--(BUSINESS WIRE)-- Globoforce ® , a leading provider of human applications,...","[{'entity_name': 'MAGGIE FOX', 'entity_type': 'PERSON', 'description': ""A seasoned digital marketing executive who has served as CMO of Aeroplan and senior vice president of digital at...","[{'src_id': 'MAGGIE FOX', 'tgt_id': 'GLOBOFORCE', 'description': ""Maggie Fox has joined Globoforce as its new chief marketing officer, leading global marketing efforts and enhancing the company's...","[{'entity_name': 'MAGGIE FOX', 'entity_type': 'PERSON', 'description': ""A seasoned digital marketing executive who has served as CMO of Aeroplan and senior vice president of digital at...","[{'src_id': 'MAGGIE FOX', 'tgt_id': 'GLOBOFORCE', 'description': 'Maggie Fox joins Globoforce as the chief marketing officer, leading global marketing efforts and driving brand strategies.', 'weight': 1.0,...",✔️ [0.38076923076923064]


In [16]:
rs_model.save(entity_relationship_rs_path)

[('extractor', Predict(StringSignature(input_text, entity_types -> rationale, entities, relationships
    instructions='Signature for extracting both entities and relationships from input text.'
    input_text = Field(annotation=str required=True json_schema_extra={'desc': 'The text to extract entities and relationships from.', '__dspy_field_type': 'input', 'prefix': 'Input Text:'})
    entity_types = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Entity Types:', 'desc': '${entity_types}'})
    rationale = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${produce the output fields}. We ...', '__dspy_field_type': 'output'})
    entities = Field(annotation=str required=True json_schema_extra={'desc': '\n        Format:\n        {\n            "entities": [\n                {\n                    "entity_name": "ENTITY NAME",\n                    "entity_type": "ENTI

In [13]:
optimizer = MIPROv2(
    prompt_model=lm,
    task_model=lm,
    metric=entity_recall_metric,
    init_temperature=1.0,
    num_candidates=4,
    verbose=True
)
kwargs = dict(num_threads=os.cpu_count(), display_progress=True, display_table=0)
miprov2_model = optimizer.compile(
    model, 
    trainset=trainset[:4], 
    valset=valset, 
    requires_permission_to_run=False,
    num_batches=10, 
    max_labeled_demos=5, 
    max_bootstrapped_demos=4, 
    eval_kwargs=kwargs
)
miprov2_model


Please be advised that based on the parameters you have set, the maximum number of LM calls is projected as follows:


[93m- Prompt Model: [94m[1m10[0m[93m data summarizer calls + [94m[1m4[0m[93m * [94m[1m2[0m[93m lm calls in program + ([94m[1m3[0m[93m) lm calls in program aware proposer = [94m[1m21[0m[93m prompt model calls[0m
[93m- Task Model: [94m[1m25[0m[93m examples in minibatch * [94m[1m10[0m[93m batches + [94m[1m4[0m[93m examples in train set * [94m[1m1[0m[93m full evals = [94m[1m254[0m[93m task model calls[0m

[93m[1mEstimated Cost Calculation:[0m

[93mTotal Cost = (Number of calls to task model * (Avg Input Token Length per Call * Task Model Price per Input Token + Avg Output Token Length per Call * Task Model Price per Output Token) 
            + (Number of calls to prompt model * (Avg Input Token Length per Call * Task Prompt Price per Input Token + Avg Output Token Length per Call * Prompt Model Price per Output Token).[0m


INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"


summary: Prediction(
    summary='The dataset comprises news articles covering diverse topics such as political events, economic data, and corporate news, with each example detailing entities and their relationships. This structured content, presented in formal journalistic syntax, is highly relevant to current events and suitable for training models in entity recognition, relationship extraction, and summarization tasks.'
)
DATA SUMMARY: The dataset comprises news articles covering diverse topics such as political events, economic data, and corporate news, with each example detailing entities and their relationships. This structured content, presented in formal journalistic syntax, is highly relevant to current events and suitable for training models in entity recognition, relationship extraction, and summarization tasks.


  0%|          | 0/4 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 29 | Missed Entities: 0 | Total Entities: 29
DEBUG:nano-graphrag:Relationships: 19 | Missed Relationships: 7 | Total Relationships: 26
DEBUG:nano-graphrag:Direct Relationships: 26 | Second-order: 0 | Third-order: 0 | Total Relationships: 26
 25%|██▌       | 1/4 [03:32<10:37, 212.60s/it]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 6 | Missed Entities: 0 | Total Entities: 6
DEBUG:nano-graphrag:Relationships: 5 | Missed Relationships: 0 | Total Relationships: 5
DEBUG:nano-graphrag:Direct Relationships: 5 | Second-order: 0 | Third-order: 0 | Total Relationships: 5
 50%|█████     | 2/4 

Bootstrapped 4 full traces after 4 examples in round 0.


  0%|          | 0/4 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 17 | Missed Entities: 0 | Total Entities: 17
DEBUG:nano-graphrag:Relationships: 12 | Missed Relationships: 8 | Total Relationships: 20
DEBUG:nano-graphrag:Direct Relationships: 17 | Second-order: 3 | Third-order: 0 | Total Relationships: 20
 25%|██▌       | 1/4 [02:32<07:36, 152.01s/it]INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.deepseek.com/chat/completions "HTTP/1.1 200 OK"
DEBUG:nano-graphrag:Entities: 29 | Missed Entities: 0 | Total Entities: 29
DEBUG:nano-graphrag:Relationships: 18 | Missed Relationships: 10 | Total Relationships: 28
DEBUG:nano-graphrag:Direct Relationships: 28 | Second-order: 0 | Third-order: 0 | Total Relationships: 28
DEBUG:nano-gra

Bootstrapped 4 full traces after 4 examples in round 0.
Using a randomly generated configuration for our grounded proposer.
Selected tip: description





AssertionError: No input variables found in the example

In [14]:
lm.inspect_history(n=1)




Signature for combined self-reflection on extracted entities and relationships.
Self-reflection is on the completeness and quality of both the extracted entities and relationships.

---

Follow the following format.

Input Text: The original input text.

Entity Types: ${entity_types}

Entities:
List of extracted entities.
        Format:
        {
            "entities": [
                {
                    "entity_name": "ENTITY NAME",
                    "entity_type": "ENTITY TYPE",
                    "description": "Detailed description",
                    "importance_score": "Importance score of the entity. Should be between 0 and 1 with 1 being the most important."
                }
            ]
        }

Relationships:
List of extracted relationships.
        Format:
        {
            "relationships": [
                {
                    "src_id": "SOURCE ENTITY",
                    "tgt_id": "TARGET ENTITY",
                    "description": "Detailed descri

'\n\n\nSignature for combined self-reflection on extracted entities and relationships.\nSelf-reflection is on the completeness and quality of both the extracted entities and relationships.\n\n---\n\nFollow the following format.\n\nInput Text: The original input text.\n\nEntity Types: ${entity_types}\n\nEntities:\nList of extracted entities.\n        Format:\n        {\n            "entities": [\n                {\n                    "entity_name": "ENTITY NAME",\n                    "entity_type": "ENTITY TYPE",\n                    "description": "Detailed description",\n                    "importance_score": "Importance score of the entity. Should be between 0 and 1 with 1 being the most important."\n                }\n            ]\n        }\n\nRelationships:\nList of extracted relationships.\n        Format:\n        {\n            "relationships": [\n                {\n                    "src_id": "SOURCE ENTITY",\n                    "tgt_id": "TARGET ENTITY",\n              

In [None]:
metrics = [entity_recall_metric, relationship_similarity_metric]
for metric in metrics:
    evaluate = Evaluate(
        devset=devset, 
        metric=metric, 
        num_threads=os.cpu_count(), 
        display_progress=True,
        display_table=5,
    )
    evaluate(miprov2_model)