In [1]:
%pip install openai graphrag pandas requests python-dotenv langchain numpy tiktoken matplotlib scikit-learn pyyaml pydantic instructor
from IPython.display import clear_output ; clear_output()

In [2]:
from dotenv import load_dotenv
import os
load_dotenv()

is_azure = (
  os.getenv("AZURE_OPENAI_ENDPOINT", default="") != "" and
  os.getenv("OPENAI_API_KEY", default="") == ""
)

GPT_4_O_MODEL_NAME = os.getenv("GPT_4_O_MODEL_NAME", default="gpt-4o")
TEXT_EMBEDDING_3_LARGE_MODEL_NAME = os.getenv("TEXT_EMBEDDING_3_LARGE_MODEL_NAME", default="text-embedding-3-large")

if is_azure:
  AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
  AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
  AZURE_OPENAI_API_VERSION = "2024-05-01-preview"
  from openai import AzureOpenAI
  oai = AzureOpenAI(azure_endpoint=AZURE_OPENAI_ENDPOINT, api_key=AZURE_OPENAI_API_KEY, api_version=AZURE_OPENAI_API_VERSION)
else:
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
  from openai import OpenAI
  oai = OpenAI(api_key=OPENAI_API_KEY)

In [3]:
import requests
import os

if not os.path.exists('data'): os.makedirs('data')

if not os.path.exists('data/french_revolution.md'):
  french_revolution = requests.get("https://r.jina.ai/https://en.wikipedia.org/wiki/French_Revolution").text.split('\nSee also')[0]
  with open('data/french_revolution.md', 'w') as f:
    f.write(french_revolution)
else:
  with open('data/french_revolution.md', 'r') as f:
    french_revolution = f.read()

print(french_revolution[:123])

Title: French Revolution

URL Source: https://en.wikipedia.org/wiki/French_Revolution

Published Time: 2001-10-18T00:19:10Z


In [4]:
from langchain.text_splitter import MarkdownTextSplitter
import pandas as pd

if not os.path.exists('data/embeddings.parquet'):
  embeddings = pd.DataFrame(columns=['Topic', 'Text', 'Embedding'])

  splitter = MarkdownTextSplitter(chunk_size=500, chunk_overlap=250)

  chunks = splitter.split_text(french_revolution)
  chunk_embeddings = oai.embeddings.create(
    input=chunks,
    model=TEXT_EMBEDDING_3_LARGE_MODEL_NAME
  )
  for i, chunk in enumerate(chunks):
    try:
      topic = oai.chat.completions.create(
        model=GPT_4_O_MODEL_NAME,
        messages=[
          {
            "role": "system",
            "content": ("Read the user-provided text carefully and output its topic as a short sentence. "
                        "For example: 'Key events in the life of George Washington', 'Inflation in the Weimar republic'. "
                        "Do not add any additional text such as punctuation, markup, or quoting. Only output the topic.")},
          {"role": "user", "content": chunk}
        ],
        max_tokens=23,
        temperature=0.5,
      ).choices[0].message.content
    except Exception:
      pass
    embeddings.loc[len(embeddings)] = [topic or chunk[:23], chunk, chunk_embeddings.data[i].embedding]
  embeddings.to_parquet('data/embeddings.parquet')
else:
  embeddings = pd.read_parquet('data/embeddings.parquet')

embeddings

Unnamed: 0,Topic,Text,Embedding
0,The French Revolution,Title: French Revolution\n\nURL Source: https:...,"[-0.024158839136362076, 0.0018313959008082747,..."
1,The French Revolution,"The Storming of the Bastille, 14 July 1789\n\n...","[-0.005255864467471838, 0.0037901357281953096,..."
2,The French Revolution,The French Revolution[a] was a period of polit...,"[-0.027185581624507904, -0.016875529661774635,..."
3,Causes and early events of the French Revolution,Its causes are generally agreed to be a combin...,"[-0.020989766344428062, -0.024809090420603752,..."
4,Key events during the French Revolution,which was converted into a National Assembly i...,"[-0.009496822953224182, -0.017249926924705505,..."
...,...,...,...
296,Alphonse Aulard and his contributions to the s...,evidence.[266][267] Alphonse Aulard (1849–1928...,"[-0.00028573774034157395, 0.02177402749657631,..."
297,Marxist socio-economic analysis of the French ...,Socio-economic analysis and a focus on the exp...,"[-0.0038298999425023794, 0.006939169950783253,..."
298,Alfred Cobban's critique of Jacobin-Marxist in...,Alfred Cobban challenged Jacobin-Marxist socia...,"[0.004117575474083424, 0.00744161382317543, -0..."
299,Interpretations of the French Revolution by Fr...,"In their 1965 work, La Revolution française, F...","[0.017132386565208435, 0.012499384582042694, -..."


In [5]:
import numpy as np
import tiktoken

def cosine_similarity(vector1, vector2):
  dot_product = np.dot(vector1, vector2)
  norm1 = np.linalg.norm(vector1)
  norm2 = np.linalg.norm(vector2)
  similarity = dot_product / (norm1 * norm2)
  return similarity

tokenizer = tiktoken.encoding_for_model('gpt-4o')

def embeddings_search(query, max_tokens=10000, k=100, min_similarity=0.2):
  query_embedding = oai.embeddings.create(
    input=[query],
    model=TEXT_EMBEDDING_3_LARGE_MODEL_NAME
  ).data[0].embedding
  results = embeddings.copy()
  results['Similarity'] = results['Embedding'].apply(lambda x: cosine_similarity(x, query_embedding))
  results = results.sort_values(by='Similarity', ascending=False).head(k)
  results = results[results['Similarity'] >= min_similarity]
  results['Tokens'] = results['Text'].apply(lambda txt: len(tokenizer.encode(txt)))
  while results['Tokens'].sum() > max_tokens:
    results = results[:-1]
  return results

In [6]:
from graphrag.query.structured_search.global_search.reduce_system_prompt import REDUCE_SYSTEM_PROMPT as SYSTEM_PROMPT
import re

DEFAULT_RESPONSE_TYPE = 'Summarize and explain in 1-2 paragraphs with bullet points using at most 300 tokens'
DEFAULT_MAX_CONTEXT_TOKENS = 10000

def remove_data(text):
    return re.sub(r'\[Data:.*?\]', '', text).strip()

def ask_embeddings(query, response_type=DEFAULT_RESPONSE_TYPE):
  results = embeddings_search(query, max_tokens=DEFAULT_MAX_CONTEXT_TOKENS)
  response = oai.chat.completions.create(
    model=GPT_4_O_MODEL_NAME,
    messages=[
      {
        "role": "system",
        "content": SYSTEM_PROMPT.format(
          response_type=response_type,
          report_data="---\n---\n".join(results['Text'].tolist()),
        ),
      },
      {"role": "user", "content": query}
    ],
    max_tokens=4000,
    temperature=0.5,
  ).choices[0].message.content
  return remove_data(response)

In [7]:
from IPython.display import Markdown

result = ask_embeddings('Who was Robespierre and what was his role in the French revolution?')

Markdown(result)

### Maximilien Robespierre: Role in the French Revolution

Maximilien Robespierre was a central figure in the French Revolution, known for his radical policies and leadership during the Reign of Terror. His influence and actions had significant impacts on the course of the Revolution.

#### Key Points:

- **Political Leadership**: Robespierre was a leading member of the radical Montagnards faction and played a crucial role in the Committee of Public Safety, which became the de facto executive government during the Reign of Terror .
  
- **Policies and Actions**:
  - **Reign of Terror**: Under his influence, the Law of 22 Prairial was enacted, which expedited the execution of "enemies of the people" without allowing them to defend themselves. This led to a significant increase in executions .
  - **Universal Male Suffrage**: He supported radical reforms, including universal male suffrage, which were included in the Constitution of 1793, although these reforms were suspended following political unrest .
  - **Opposition to Other Factions**: Robespierre was instrumental in the arrests and executions of other revolutionary leaders such as Georges Danton and Jacques Hébert, whom he saw as threats to the stability of the regime .

- **Downfall and Execution**: Robespierre's refusal to name alleged conspirators against the Republic and his increasingly dictatorial behavior led to his arrest by the National Convention. He was executed on 28 July 1794, marking the end of the Reign of Terror .

Robespierre's legacy is complex, with some viewing him as a defender of revolutionary ideals and others as a tyrant whose actions led to widespread fear and violence. His tenure significantly shaped the trajectory of the French Revolution.

In [8]:
result = ask_embeddings('Timeline of the French revolution')

Markdown(result)

### Timeline of the French Revolution

The French Revolution, spanning from 1789 to 1799, was a period of profound political and social upheaval in France. Here are the key events:

- **1789:**
  - **May 5:** Estates General convened to address the financial crisis.
  - **June 17:** Formation of the National Assembly.
  - **July 14:** Storming of the Bastille, marking the revolution's symbolic beginning.
  - **August 26:** Declaration of the Rights of Man and of the Citizen.

- **1791:**
  - **September:** The Constitution of 1791 is adopted, establishing a constitutional monarchy .

- **1792:**
  - **April:** Outbreak of the French Revolutionary Wars.
  - **August 10:** Insurrection leading to the fall of the monarchy.
  - **September 22:** Proclamation of the French First Republic .

- **1793:**
  - **January 21:** Execution of Louis XVI.
  - **June:** Start of the Reign of Terror under the Committee of Public Safety.
  - **July:** Execution of Marie Antoinette .

- **1794:**
  - **July 28:** Execution of Robespierre, marking the end of the Reign of Terror.

- **1795:**
  - **August:** Establishment of the Directory, replacing the National Convention.

- **1799:**
  - **November 9:** Coup of 18 Brumaire, leading to the rise of Napoleon Bonaparte and the establishment of the French Consulate, marking the end of the French Revolution .

The Revolution led to significant changes including the abolition of feudalism, the establishment of a republic, and the spread of democratic ideals.

In [9]:
import yaml

if not os.path.exists('data/graphrag'):
  !python -m graphrag.index --init --root data/graphrag

with open('data/graphrag/settings.yaml', 'r') as f:
  settings_yaml = yaml.load(f, Loader=yaml.FullLoader)
settings_yaml['llm']['model'] = GPT_4_O_MODEL_NAME
settings_yaml['llm']['api_key'] = AZURE_OPENAI_API_KEY if is_azure else OPENAI_API_KEY
settings_yaml['llm']['type'] = 'azure_openai_chat' if is_azure else 'openai_chat'
settings_yaml['embeddings']['llm']['api_key'] = AZURE_OPENAI_API_KEY if is_azure else OPENAI_API_KEY
settings_yaml['embeddings']['llm']['type'] = 'azure_openai_embedding' if is_azure else 'openai_embedding'
settings_yaml['embeddings']['llm']['model'] = TEXT_EMBEDDING_3_LARGE_MODEL_NAME
if is_azure:
  settings_yaml['llm']['api_version'] = AZURE_OPENAI_API_VERSION
  settings_yaml['llm']['deployment_name'] = GPT_4_O_MODEL_NAME
  settings_yaml['llm']['api_base'] = AZURE_OPENAI_ENDPOINT
  settings_yaml['embeddings']['llm']['api_version'] = AZURE_OPENAI_API_VERSION
  settings_yaml['embeddings']['llm']['deployment_name'] = TEXT_EMBEDDING_3_LARGE_MODEL_NAME
  settings_yaml['embeddings']['llm']['api_base'] = AZURE_OPENAI_ENDPOINT

with open('data/graphrag/settings.yaml', 'w') as f:
  yaml.dump(settings_yaml, f)

if not os.path.exists('data/graphrag/input'):
  os.makedirs('data/graphrag/input')
  !cp data/french_revolution.md data/graphrag/input/french_revolution.txt
  !python -m graphrag.index --root ./data/graphrag

[2KInitializing project at data/graphrag
[2K🚀 [32mReading settings from data/graphrag/settings.yaml[0m
[2K⠸ GraphRAG Indexer 
[2K[1A[2K⠸ GraphRAG Indexer les loaded (0 filtered) [90m━━━━━━[0m [35m100%[0m [36m0:00:…[0m [33m0:00:…[0m
├── Loading Input (text) - 1 files loaded (0 filtered) [90m━━━━━━[0m [35m100%[0m [36m0:00:…[0m [33m0:00:…[0m
[2K[1A[2K[1A[2K⠸ GraphRAG Indexer 
├── Loading Input (text) - 1 files loaded (0 filtered) [90m━━━━━━[0m [35m100%[0m [36m0:00:…[0m [33m0:00:…[0m
[2K[1A[2K[1A[2K⠸ GraphRAG Indexer 
├── Loading Input (text) - 1 files loaded (0 filtered) [90m━━━━━━[0m [35m100%[0m [36m0:00:…[0m [33m0:00:…[0m
[2K[1A[2K[1A[2K⠸ GraphRAG Indexer 
├── Loading Input (text) - 1 files loaded (0 filtered) [90m━━━━━━[0m [35m100%[0m [36m0:00:…[0m [33m0:00:…[0m
[2K[1A[2K[1A[2K⠹ GraphRAG Indexer 
├── Loading Input (text) - 1 files loaded (0 filtered) [90m━━━━━━[0m [35m100%[0m [36m0:00:…[0m [33m0:00:…[0m
└── cre

In [10]:
import subprocess

def ask_graph(query, response_type=DEFAULT_RESPONSE_TYPE):
  env = os.environ.copy() | {
    'GRAPHRAG_GLOBAL_SEARCH_MAX_TOKENS': str(DEFAULT_MAX_CONTEXT_TOKENS),
  }
  command = [
    'python', '-m', 'graphrag.query',
    '--root', './data/graphrag',
    '--method', 'local',
    '--response_type', response_type,
    query,
  ]
  output = subprocess.check_output(command, universal_newlines=True, env=env)
  return remove_data(output.split('Search Response: ')[1])

In [11]:
from IPython.display import Markdown

result = ask_graph('Who was Robespierre and what was his role in the French revolution?')

Markdown(result)

[2024-07-09T11:26:06Z WARN  lance::dataset] No existing dataset at /workspaces/battle-of-the-semantics/lancedb/description_embedding.lance, it will be created


### Robespierre and His Role in the French Revolution

**Maximilien Robespierre** was a prominent and controversial political figure during the French Revolution, known for his significant influence and radical policies. His role can be summarized as follows:

- **Advocate for the Third Estate**: Robespierre was deeply involved in advocating for the Third Estate, organizing meetings, petitions, and literature to support their cause .
- **Leadership in the Legislative Assembly**: He proposed a motion to bar existing deputies from elections, gaining substantial support in political clubs .
- **Cult of the Supreme Being**: Robespierre introduced this revolutionary cult and led a lavish festival on 8 June, which faced opposition and ridicule, contributing to his downfall .
- **Montagnards Leader**: As a leading figure of the Montagnards, a radical political faction, Robespierre played a crucial role in revolutionary activities alongside Georges Danton and Jean-Paul Marat .
- **Reign of Terror**: His leadership during this period was marked by extreme political repression and mass executions, leading to accusations of dictatorship and his eventual execution on 28 July 1794 .

Robespierre's legacy remains complex, as he was both a champion of revolutionary ideals and a figure associated with the excesses of the Reign of Terror.

In [12]:
result = ask_graph('Timeline of the French revolution')

Markdown(result)

# Timeline of the French Revolution

The French Revolution was a period of radical social and political upheaval in France from 1789 to 1799. Here are the key events:

- **1789**
  - **May 5**: The Estates General convenes, marking the beginning of the revolution .
  - **July 14**: The Storming of the Bastille, a pivotal event symbolizing the uprising against the monarchy .
  - **August**: The National Assembly adopts the Declaration of the Rights of Man and of the Citizen .
  - **October**: The March to Versailles by Republican Women .

- **1792**
  - **April**: The French Revolutionary Wars begin .
  - **August 10**: The Insurrection of 10 August leads to the fall of the monarchy .
  - **September**: Proclamation of the French First Republic .

- **1793**
  - **January 21**: Execution of Louis XVI .
  - **September**: The Reign of Terror begins, characterized by mass executions and political repression .

- **1794**
  - **July 28**: Execution of Robespierre, marking the end of the Reign of Terror .

- **1799**
  - **November 9**: The Coup of 18 Brumaire leads to the establishment of the French Consulate, ending the revolution .

The French Revolution had profound impacts on France and Europe, including the abolition of feudalism, the spread of democratic ideals, and significant political and social changes .

In [13]:
QUESTIONS = [
  {
    'question': 'When did the French Revolution officially begin and end, and how long did it last?',
    'response_type': 'One short sentence (max 23 tokens)',
    'answer': 'The French Revolution began on 5 May 1789 and ended on 9 November 1799, lasting 10 years, 6 months, and 4 days.'
  },
  {
    'question': 'What were the key outcomes of the French Revolution ?',
    'response_type': 'List of 7-10 bullet points',
    'answer': ''
  },
  {
    'question': 'How did the financial and political crisis contribute to the calling of the Estates-General in 1789?',
    'response_type': DEFAULT_RESPONSE_TYPE,
    'answer': ''
  },
  {
    'question': 'What role did the Enlightenment and previous revolutions play in shaping the French Revolution?',
    'response_type': DEFAULT_RESPONSE_TYPE,
    'answer': ''
  },
  {
    'question': 'Analyze how the various social classes in France were affected by the Revolution and the policies implemented, such as the Civil Constitution of the Clergy and the abolition of feudal dues.',
    'response_type': DEFAULT_RESPONSE_TYPE,
    'answer': ''
  },
]

for question in QUESTIONS:
  question['answer_graph'] = ask_graph(question['question'], question['response_type'])
  question['answer_embeddings'] = ask_embeddings(question['question'], question['response_type'])

In [14]:
from pprint import pprint
pprint(QUESTIONS)

[{'answer': 'The French Revolution began on 5 May 1789 and ended on 9 November '
            '1799, lasting 10 years, 6 months, and 4 days.',
  'answer_embeddings': 'The French Revolution officially began on 5 May 1789 '
                       'and ended on 9 November 1799, lasting 10 years, 6 '
                       'months, and 4 days .',
  'answer_graph': 'The French Revolution began on 5 May 1789 and ended on 9 '
                  'November 1799, lasting 10 years, 6 months, and 4 days .',
  'question': 'When did the French Revolution officially begin and end, and '
              'how long did it last?',
  'response_type': 'One short sentence (max 23 tokens)'},
 {'answer': '',
  'answer_embeddings': '### Key Outcomes of the French Revolution\n'
                       '\n'
                       '- **Abolition of Feudalism**: The French Revolution '
                       'led to the abolition of feudalism, ending the '
                       'privileges of the aristocracy and the C