In [1]:
%pip install openai graphrag pandas requests python-dotenv langchain numpy tiktoken matplotlib scikit-learn pyyaml pydantic instructor
from IPython.display import clear_output ; clear_output()

In [3]:
from dotenv import load_dotenv
import os
load_dotenv()

is_azure = (
  os.getenv("AZURE_OPENAI_ENDPOINT", default="") != "" and
  os.getenv("OPENAI_API_KEY", default="") == ""
)

GPT_4_O_MODEL_NAME = os.getenv("GPT_4_O_MODEL_NAME", default="gpt-4o")
TEXT_EMBEDDING_3_LARGE_MODEL_NAME = os.getenv("TEXT_EMBEDDING_3_LARGE_MODEL_NAME", default="text-embedding-3-large")

if is_azure:
  AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
  AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
  AZURE_OPENAI_API_VERSION = "2024-05-01-preview"
  from openai import AzureOpenAI
  oai = AzureOpenAI(azure_endpoint=AZURE_OPENAI_ENDPOINT, api_key=AZURE_OPENAI_API_KEY, api_version=AZURE_OPENAI_API_VERSION)
else:
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
  from openai import OpenAI
  oai = OpenAI(api_key=OPENAI_API_KEY)

In [4]:
import requests
import os

if not os.path.exists('data'): os.makedirs('data')

if not os.path.exists('data/french_revolution.md'):
  french_revolution = requests.get("https://r.jina.ai/https://en.wikipedia.org/wiki/French_Revolution").text.split('\nSee also')[0]
  with open('data/french_revolution.md', 'w') as f:
    f.write(french_revolution)
else:
  with open('data/french_revolution.md', 'r') as f:
    french_revolution = f.read()

print(french_revolution[:123])

Title: French Revolution

URL Source: https://en.wikipedia.org/wiki/French_Revolution

Published Time: 2001-10-18T00:19:10Z


In [5]:
from langchain.text_splitter import MarkdownTextSplitter
import pandas as pd

if not os.path.exists('data/embeddings.parquet'):
  embeddings = pd.DataFrame(columns=['Topic', 'Text', 'Embedding'])

  splitter = MarkdownTextSplitter(chunk_size=300, chunk_overlap=100)

  chunks = splitter.split_text(french_revolution)
  chunk_embeddings = oai.embeddings.create(
    input=chunks,
    model=TEXT_EMBEDDING_3_LARGE_MODEL_NAME
  )
  for i, chunk in enumerate(chunks):
    try:
      topic = oai.chat.completions.create(
        model=GPT_4_O_MODEL_NAME,
        messages=[
          {
            "role": "system",
            "content": ("Read the user-provided text carefully and output its topic as a short sentence. "
                        "For example: 'Key events in the life of George Washington', 'Inflation in the Weimar republic'. "
                        "Do not add any additional text such as punctuation, markup, or quoting. Only output the topic.")},
          {"role": "user", "content": chunk}
        ],
        max_tokens=23,
        temperature=0.5,
      ).choices[0].message.content
    except Exception:
      pass
    embeddings.loc[len(embeddings)] = [topic or chunk[:23], chunk, chunk_embeddings.data[i].embedding]
  embeddings.to_parquet('data/embeddings.parquet')
else:
  embeddings = pd.read_parquet('data/embeddings.parquet')

embeddings

Unnamed: 0,Topic,Text,Embedding
0,The French Revolution,Title: French Revolution\n\nURL Source: https:...,"[-0.003698753658682108, -0.019621584564447403,..."
1,French Revolution,Markdown Content:\nJump to content\nMain menu\...,"[-0.01978607289493084, 0.009914387948811054, -..."
2,The French Revolution,"Tools\nFrom Wikipedia, the free encyclopedia\n...","[-0.032426539808511734, -0.009258555248379707,..."
3,The Storming of the Bastille,"The Storming of the Bastille, 14 July 1789","[0.006268054712563753, -0.0037634416949003935,..."
4,The French Revolution and its outcomes,"Date\t5 May 1789 – 9 November 1799\n(10 years,...","[-0.014528979547321796, 0.0037560712080448866,..."
...,...,...,...
465,Debate on the causes of the French Revolution,Alfred Cobban challenged Jacobin-Marxist socia...,"[-0.01219131052494049, -0.012063764035701752, ..."
466,Interpretation of the 1964 Revolution as a pol...,Revolution (1964). He argued the Revolution wa...,"[-0.0006750492611899972, 0.020813480019569397,..."
467,Interpretation of political decisions during t...,"In their 1965 work, La Revolution française, F...","[0.01703598164021969, 0.012475780211389065, -0..."
468,Historiography of the Revolution and the decli...,"From the 1990s, Western scholars largely aband...","[0.00682360865175724, 0.007321859709918499, -0..."


In [6]:
import numpy as np
import tiktoken

def cosine_similarity(vector1, vector2):
  dot_product = np.dot(vector1, vector2)
  norm1 = np.linalg.norm(vector1)
  norm2 = np.linalg.norm(vector2)
  similarity = dot_product / (norm1 * norm2)
  return similarity

tokenizer = tiktoken.encoding_for_model('gpt-4o')

def embeddings_search(query, max_tokens=10000, k=100, min_similarity=0.2):
  query_embedding = oai.embeddings.create(
    input=[query],
    model=TEXT_EMBEDDING_3_LARGE_MODEL_NAME
  ).data[0].embedding
  results = embeddings.copy()
  results['Similarity'] = results['Embedding'].apply(lambda x: cosine_similarity(x, query_embedding))
  results = results.sort_values(by='Similarity', ascending=False).head(k)
  results = results[results['Similarity'] >= min_similarity]
  results['Tokens'] = results['Text'].apply(lambda txt: len(tokenizer.encode(txt)))
  while results['Tokens'].sum() > max_tokens:
    results = results[:-1]
  return results

In [7]:
from graphrag.query.structured_search.global_search.reduce_system_prompt import REDUCE_SYSTEM_PROMPT as SYSTEM_PROMPT
import re

DEFAULT_RESPONSE_TYPE = 'Summarize and explain in 1-2 paragraphs with bullet points using at most 300 tokens'
DEFAULT_MAX_CONTEXT_TOKENS = 10000

def remove_data(text):
    return re.sub(r'\[Data:.*?\]', '', text).strip()

def ask_embeddings(query, response_type=DEFAULT_RESPONSE_TYPE):
  results = embeddings_search(query, max_tokens=DEFAULT_MAX_CONTEXT_TOKENS)
  response = oai.chat.completions.create(
    model=GPT_4_O_MODEL_NAME,
    messages=[
      {
        "role": "system",
        "content": SYSTEM_PROMPT.format(
          response_type=response_type,
          report_data="---\n---\n".join(results['Text'].tolist()),
        ),
      },
      {"role": "user", "content": query}
    ],
    max_tokens=4000,
    temperature=0.5,
  ).choices[0].message.content
  return remove_data(response)

In [8]:
from IPython.display import Markdown

result = ask_embeddings('Who was Robespierre and what was his role in the French revolution?')

Markdown(result)

### Maximilien Robespierre and His Role in the French Revolution

Maximilien Robespierre was a significant figure in the French Revolution, known for his radical and influential role.

- **Political Influence**: Robespierre was a leading member of the Jacobin club and a key figure in the National Convention. He opposed the criteria for "active citizens," gaining substantial support among the Parisian populace .
- **Reign of Terror**: He played a pivotal role during the Reign of Terror, a period marked by mass executions of perceived enemies of the revolution. Robespierre was instrumental in the establishment of the Committee of Public Safety, which oversaw these executions .
- **Radical Reforms**: He proposed and supported radical reforms, including universal male suffrage. He also influenced the drafting of a new Constitution in 1793, which was ratified but not implemented due to the ongoing conflict .
- **Downfall and Execution**: Robespierre's dominance ended with his arrest and execution on 28 July 1794, marking the end of the Reign of Terror. His death was followed by a wave of revenge killings against his supporters .

Robespierre's legacy is complex; while he was a champion of the revolution's radical phase, his methods and the resulting bloodshed have made him a controversial historical figure.

In [9]:
result = ask_embeddings('Timeline of the French revolution')

Markdown(result)

### Timeline of the French Revolution

The French Revolution, spanning from May 5, 1789, to November 9, 1799, was a decade-long period marked by significant political and social upheaval in France. Below is a summary of key events and outcomes:

- **May 1789**: Estates General convened, leading to the formation of the National Assembly.
- **July 14, 1789**: Storming of the Bastille, a symbolic event that triggered the abolition of feudalism.
- **August 1792**: Insurrection of 10 August led to the fall of the monarchy and the establishment of the French First Republic in September .
- **January 1793**: Execution of Louis XVI, followed by the Reign of Terror where approximately 16,000 people were executed .
- **July 1794**: End of the Reign of Terror marked by the execution of Robespierre.
- **1795**: Establishment of the Directory, a five-member committee that governed France.
- **November 1799**: The coup of 18 Brumaire led by Napoleon Bonaparte, resulting in the formation of the French Consulate and the end of the Revolutionary period .

### Key Outcomes

- **Abolition of the Ancien Régime**: End of feudal privileges and establishment of a constitutional monarchy.
- **Proclamation of the French First Republic**: Shift from monarchy to republic governance.
- **Reign of Terror**: Period of extreme political repression and mass executions.
- **Rise of Napoleon Bonaparte**: Culminated in the establishment of the Consulate, marking the end of the French Revolution .

These events collectively transformed France from a feudal society under absolute monarchy to a republic advocating principles of liberty, equality, and fraternity.

In [10]:
import yaml

if not os.path.exists('data/graphrag'):
  !python -m graphrag.index --init --root data/graphrag

with open('data/graphrag/settings.yaml', 'r') as f:
  settings_yaml = yaml.load(f, Loader=yaml.FullLoader)
settings_yaml['llm']['model'] = GPT_4_O_MODEL_NAME
settings_yaml['llm']['api_key'] = AZURE_OPENAI_API_KEY if is_azure else OPENAI_API_KEY
settings_yaml['llm']['type'] = 'azure_openai_chat' if is_azure else 'openai_chat'
settings_yaml['embeddings']['llm']['api_key'] = AZURE_OPENAI_API_KEY if is_azure else OPENAI_API_KEY
settings_yaml['embeddings']['llm']['type'] = 'azure_openai_embedding' if is_azure else 'openai_embedding'
settings_yaml['embeddings']['llm']['model'] = TEXT_EMBEDDING_3_LARGE_MODEL_NAME
if is_azure:
  settings_yaml['llm']['api_version'] = AZURE_OPENAI_API_VERSION
  settings_yaml['llm']['deployment_name'] = GPT_4_O_MODEL_NAME
  settings_yaml['llm']['api_base'] = AZURE_OPENAI_ENDPOINT
  settings_yaml['embeddings']['llm']['api_version'] = AZURE_OPENAI_API_VERSION
  settings_yaml['embeddings']['llm']['deployment_name'] = TEXT_EMBEDDING_3_LARGE_MODEL_NAME
  settings_yaml['embeddings']['llm']['api_base'] = AZURE_OPENAI_ENDPOINT

with open('data/graphrag/settings.yaml', 'w') as f:
  yaml.dump(settings_yaml, f)

if not os.path.exists('data/graphrag/input'):
  os.makedirs('data/graphrag/input')
  !cp data/french_revolution.md data/graphrag/input/french_revolution.txt
  !python -m graphrag.index --root ./data/graphrag

In [23]:
import subprocess

def ask_graph(query, response_type=DEFAULT_RESPONSE_TYPE):
  env = os.environ.copy() | {
    'GRAPHRAG_GLOBAL_SEARCH_MAX_TOKENS': str(DEFAULT_MAX_CONTEXT_TOKENS),
  }
  command = [
    'python', '-m', 'graphrag.query',
    '--root', './data/graphrag',
    '--method', 'global',
    '--response_type', response_type,
    query,
  ]
  output = subprocess.check_output(command, universal_newlines=True, env=env)
  return remove_data(output.split('Search Response: ')[1])

In [24]:
from IPython.display import Markdown

result = ask_graph('Who was Robespierre and what was his role in the French revolution?')

Markdown(result)

### Maximilien Robespierre: Key Figure in the French Revolution

Maximilien Robespierre was a central and controversial figure during the French Revolution, known for his radical leadership and significant influence on the political landscape of the time. His actions and policies had profound impacts on the course of the revolution and the atmosphere in France.

#### Key Roles and Actions:
- **Leadership in the Reign of Terror**:
  - Robespierre played a crucial role in the Reign of Terror, a period marked by extreme political repression and mass executions, contributing to an atmosphere of fear and instability in Paris .
  - His leadership during this period led to significant political conflict and unrest, culminating in his execution on 28 July 1794, which marked the end of the Reign of Terror .

- **Advocacy for the Third Estate**:
  - He was deeply involved in advocating for the Third Estate, organizing meetings, petitions, and literature to support their cause, which underscored the growing influence of the common people in the political process .

- **Political Factions and Influence**:
  - Robespierre led the Montagnards, a radical political faction, alongside Georges Danton and Jean-Paul Marat, significantly influencing the course of the revolution .
  - He was associated with several political groups, including the Jacobins, Cordeliers, and the Society of Thirty, highlighting his extensive involvement in revolutionary activities .

- **Controversial Policies and Actions**:
  - He introduced the Cult of the Supreme Being, a revolutionary cult, and organized a lavish festival, which faced opposition and ridicule, contributing to his downfall .
  - Robespierre was involved in the arrest and execution of other key figures, such as Danton and Camille Desmoulins, reflecting his significant influence and the internal conflicts within the revolutionary factions .

Robespierre's radical stance and actions made him a pivotal yet divisive figure in the French Revolution, shaping its trajectory and leaving a lasting legacy on French history.

In [13]:
result = ask_graph('Timeline of the French revolution')

Markdown(result)

# Timeline of the French Revolution

The French Revolution was a period of radical social and political upheaval in France from 1789 to 1799. Here are the key events:

## 1789
- **May 5**: Estates General convened, marking the beginning of the revolution .
- **June 17**: National Assembly formed by the Third Estate.
- **July 14**: Storming of the Bastille, a pivotal event symbolizing the uprising against the monarchy .
- **August 26**: Declaration of the Rights of Man and of the Citizen adopted .

## 1790
- **July 14**: Fête de la Fédération celebrated to commemorate national unity .

## 1791
- **June 20-21**: Flight to Varennes, Louis XVI's failed attempt to escape Paris.
- **September 3**: Constitution of 1791 adopted, establishing a constitutional monarchy.

## 1792
- **April 20**: France declares war on Austria, beginning the French Revolutionary Wars .
- **August 10**: Insurrection of 10 August 1792, leading to the fall of the monarchy .
- **September 21**: Proclamation of the French First Republic .

## 1793
- **January 21**: Execution of Louis XVI .
- **September 5**: Beginning of the Reign of Terror, characterized by mass executions .

## 1794
- **July 28**: Execution of Robespierre, marking the end of the Reign of Terror.

## 1795
- **August 22**: Constitution of 1795 adopted, establishing the Directory.

## 1799
- **November 9**: Coup of 18 Brumaire, leading to the establishment of the French Consulate and marking the end of the French Revolution .

The French Revolution profoundly impacted France and the world, leading to the abolition of feudalism, the rise of democratic ideals, and significant political and social changes .

In [30]:
from pydantic import BaseModel, Field
from typing import Literal
import instructor
import json

QUESTIONS = [
  {
    'question': 'When did the French Revolution officially begin and end, and how long did it last?',
    'response_type': 'One short sentence (max 23 tokens)',
  },
  {
    'question': 'What were the key outcomes of the French Revolution ?',
    'response_type': 'List of 7-10 bullet points',  },
  {
    'question': 'How did the financial and political crisis contribute to the calling of the Estates-General in 1789?',
    'response_type': DEFAULT_RESPONSE_TYPE,
  },
  {
    'question': 'What role did the Enlightenment and previous revolutions play in shaping the French Revolution?',
    'response_type': DEFAULT_RESPONSE_TYPE,
  },
  {
    'question': 'Analyze how the various social classes in France were affected by the Revolution and the policies implemented, such as the Civil Constitution of the Clergy and the abolition of feudal dues.',
    'response_type': DEFAULT_RESPONSE_TYPE,
  },
]

class EvalAnswers(BaseModel):
  best_answer: Literal[1, 2] = Field(..., description="The index of the best answer, evaluated for accuracy and relevance")
  explanation: str = Field(..., description="Short explanation for the choice of the best answer (max 100 tokens)")

aoaix = instructor.from_openai(oai)

for question in QUESTIONS:
  for i in range(5):
    answer_graph = ask_graph(question['question'], question['response_type'])
    answer_embeddings = ask_embeddings(question['question'], question['response_type'])
    evaluation = aoaix.chat.completions.create(
      response_model=EvalAnswers,
      model=GPT_4_O_MODEL_NAME,
      messages=[
        {
          "role": "system",
          "content": ("Evaluate the two answers below based on accuracy and relevance to the question. "
                      "Select the index of the best answer (1 or 2).")
        },
        {"role": "user", "content": json.dumps({
          'question': question['question'],
          'answers': {
            1: answer_graph,
            2: answer_embeddings,
          }
        })},
      ],
      max_tokens=150,
      temperature=0.5,
    )
    if 'evaluation_best_answer' not in question: question['evaluation_best_answer'] = []
    question['evaluation_best_answer'].append('graph' if evaluation.best_answer == 1 else 'embeddings')
    if 'evaluation_explanation' not in question: question['evaluation_explanation'] = []
    question['evaluation_explanation'].append(evaluation.explanation)

for question in QUESTIONS:
  print(f"Question: {question['question']}")
  print(f"Best Answer: {', '.join(question['evaluation_best_answer'])}")
  print(f"Explanations: {' ; '.join(question['evaluation_explanation'])}")
  print()

Question: When did the French Revolution officially begin and end, and how long did it last?
Best Answer: embeddings, embeddings, embeddings, embeddings, embeddings
Explanations: Answer 2 is more accurate as it provides a precise duration of the French Revolution, including the months and days. ; Answer 2 provides both the start and end dates of the French Revolution and specifies the duration more precisely as 10 years, 6 months, and 4 days, making it more accurate and relevant. ; Answer 2 provides a more precise duration of the French Revolution, which is more accurate and relevant to the question. ; Answer 2 provides the exact duration of the French Revolution, which is more precise and relevant. ; Answer 2 is more accurate as it provides the precise duration of the French Revolution, including the months and days.

Question: What were the key outcomes of the French Revolution ?
Best Answer: graph, graph, graph, graph, graph
Explanations: Answer 1 is more comprehensive and accuratel