In [26]:
%pip install openai graphrag pandas requests python-dotenv langchain numpy tiktoken matplotlib scikit-learn pyyaml pydantic instructor
from IPython.display import clear_output ; clear_output()

In [27]:
from dotenv import load_dotenv
import os
load_dotenv()

is_azure = (
  os.getenv("AZURE_OPENAI_ENDPOINT", default="") != "" and
  os.getenv("OPENAI_API_KEY", default="") == ""
)

GPT_4_O_MODEL_NAME = os.getenv("GPT_4_O_MODEL_NAME", default="gpt-4o")
TEXT_EMBEDDING_3_LARGE_MODEL_NAME = os.getenv("TEXT_EMBEDDING_3_LARGE_MODEL_NAME", default="text-embedding-3-large")

if is_azure:
  AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
  AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
  AZURE_OPENAI_API_VERSION = "2024-05-01-preview"
  from openai import AzureOpenAI
  oai = AzureOpenAI(azure_endpoint=AZURE_OPENAI_ENDPOINT, api_key=AZURE_OPENAI_API_KEY, api_version=AZURE_OPENAI_API_VERSION)
else:
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
  from openai import OpenAI
  oai = OpenAI(api_key=OPENAI_API_KEY)

In [28]:
import requests
import os

if not os.path.exists('data'): os.makedirs('data')

if not os.path.exists('data/french_revolution.md'):
  french_revolution = requests.get("https://r.jina.ai/https://en.wikipedia.org/wiki/French_Revolution").text.split('\nSee also')[0]
  with open('data/french_revolution.md', 'w') as f:
    f.write(french_revolution)
else:
  with open('data/french_revolution.md', 'r') as f:
    french_revolution = f.read()

print(french_revolution[:123])

Title: French Revolution

URL Source: https://en.wikipedia.org/wiki/French_Revolution

Published Time: 2001-10-18T00:19:10Z


In [29]:
from langchain.text_splitter import MarkdownTextSplitter
import pandas as pd

if not os.path.exists('data/embeddings.parquet'):
  embeddings = pd.DataFrame(columns=['Topic', 'Text', 'Embedding'])

  splitter = MarkdownTextSplitter(chunk_size=500, chunk_overlap=250)

  chunks = splitter.split_text(french_revolution)
  chunk_embeddings = oai.embeddings.create(
    input=chunks,
    model=TEXT_EMBEDDING_3_LARGE_MODEL_NAME
  )
  for i, chunk in enumerate(chunks):
    try:
      topic = oai.chat.completions.create(
        model=GPT_4_O_MODEL_NAME,
        messages=[
          {
            "role": "system",
            "content": ("Read the user-provided text carefully and output its topic as a short sentence. "
                        "For example: 'Key events in the life of George Washington', 'Inflation in the Weimar republic'. "
                        "Do not add any additional text such as punctuation, markup, or quoting. Only output the topic.")},
          {"role": "user", "content": chunk}
        ],
        max_tokens=23,
        temperature=0.5,
      ).choices[0].message.content
    except Exception:
      pass
    embeddings.loc[len(embeddings)] = [topic or chunk[:23], chunk, chunk_embeddings.data[i].embedding]
  embeddings.to_parquet('data/embeddings.parquet')
else:
  embeddings = pd.read_parquet('data/embeddings.parquet')

embeddings

Unnamed: 0,Topic,Text,Embedding
0,French Revolution,Title: French Revolution\n\nURL Source: https:...,"[-0.02485630474984646, 0.0026923343539237976, ..."
1,Key events of the French Revolution,"The Storming of the Bastille, 14 July 1789\n\n...","[-0.005255864467471838, 0.0037901357281953096,..."
2,The French Revolution and its impact on modern...,The French Revolution[a] was a period of polit...,"[-0.027185581624507904, -0.016875529661774635,..."
3,Causes and early events of the French Revolution,Its causes are generally agreed to be a combin...,"[-0.020989766344428062, -0.024809090420603752,..."
4,Key events in the early stages of the French R...,which was converted into a National Assembly i...,"[-0.009496822953224182, -0.017249926924705505,..."
...,...,...,...
296,Alphonse Aulard's contributions to the study o...,evidence.[266][267] Alphonse Aulard (1849–1928...,"[-0.0005632034735754132, 0.021342121064662933,..."
297,Marxist socio-economic analysis of the French ...,Socio-economic analysis and a focus on the exp...,"[-0.003857595380395651, 0.007019456941634417, ..."
298,Alfred Cobban's critique of Jacobin-Marxist in...,Alfred Cobban challenged Jacobin-Marxist socia...,"[0.004117575474083424, 0.00744161382317543, -0..."
299,Political decisions and radicalization during ...,"In their 1965 work, La Revolution française, F...","[0.017132386565208435, 0.012499384582042694, -..."


In [30]:
import numpy as np
import tiktoken

def cosine_similarity(vector1, vector2):
  dot_product = np.dot(vector1, vector2)
  norm1 = np.linalg.norm(vector1)
  norm2 = np.linalg.norm(vector2)
  similarity = dot_product / (norm1 * norm2)
  return similarity

tokenizer = tiktoken.encoding_for_model('gpt-4o')

def embeddings_search(query, max_tokens=10000, k=100, min_similarity=0.2):
  query_embedding = oai.embeddings.create(
    input=[query],
    model=TEXT_EMBEDDING_3_LARGE_MODEL_NAME
  ).data[0].embedding
  results = embeddings.copy()
  results['Similarity'] = results['Embedding'].apply(lambda x: cosine_similarity(x, query_embedding))
  results = results.sort_values(by='Similarity', ascending=False).head(k)
  results = results[results['Similarity'] >= min_similarity]
  results['Tokens'] = results['Text'].apply(lambda txt: len(tokenizer.encode(txt)))
  while results['Tokens'].sum() > max_tokens:
    results = results[:-1]
  return results

In [40]:
from graphrag.query.structured_search.global_search.reduce_system_prompt import REDUCE_SYSTEM_PROMPT as SYSTEM_PROMPT
import re

DEFAULT_RESPONSE_TYPE = 'Summarize and explain in 1-2 paragraphs with bullet points using at most 300 tokens'
DEFAULT_MAX_CONTEXT_TOKENS = 10000

def remove_data(text):
    return re.sub(r'\[Data:.*?\]', '', text).strip()

def ask_embeddings(query, response_type=DEFAULT_RESPONSE_TYPE):
  results = embeddings_search(query, max_tokens=DEFAULT_MAX_CONTEXT_TOKENS)
  response = oai.chat.completions.create(
    model=GPT_4_O_MODEL_NAME,
    messages=[
      {
        "role": "system",
        "content": SYSTEM_PROMPT.format(
          response_type=response_type,
          report_data="---\n---\n".join(results['Text'].tolist()),
        ),
      },
      {"role": "user", "content": query}
    ],
    max_tokens=4000,
    temperature=0.5,
  ).choices[0].message.content
  return remove_data(response)

In [32]:
from IPython.display import Markdown

result = ask_embeddings('Who was Robespierre and what was his role in the French revolution?')

Markdown(result)

### Maximilien Robespierre and His Role in the French Revolution

Maximilien Robespierre was a prominent and controversial figure in the French Revolution, known for his radical views and leadership during the Reign of Terror. Here are the key points summarizing his role:

- **Political Leadership**: Robespierre was a leading member of the radical Montagnards faction and a central figure in the Committee of Public Safety, which effectively governed France during the Reign of Terror .
  
- **Reign of Terror**: Under his influence, the Committee of Public Safety initiated the Reign of Terror, a period marked by mass executions of perceived enemies of the revolution. Approximately 16,600 people were executed, with many more dying in prison or awaiting trial .

- **Legislative Actions**: Robespierre played a significant role in passing laws that intensified the Terror, such as the Law of 22 Prairial, which expedited the trial and execution of suspected enemies .

- **Downfall and Execution**: His increasing paranoia and accusations against fellow revolutionaries led to his downfall. On 26 July 1794, he accused unnamed members of the Convention of conspiracy, leading to his arrest and execution on 28 July 1794 .

### Key Events and Implications

- **Conflict with Other Factions**: Robespierre's refusal to name the alleged conspirators in the Convention created confusion and fear, which his opponents exploited to build a coalition against him .
  
- **Political and Social Impact**: His leadership during the Reign of Terror left a lasting impact on France, with significant political purges and the execution of many revolutionaries, including Georges Danton and Jean-Paul Marat .

Robespierre's legacy is complex; he is both revered for his commitment to revolutionary ideals and reviled for the extreme measures he endorsed to achieve them.

In [33]:
result = ask_embeddings('Timeline of the French revolution')

Markdown(result)

### Timeline of the French Revolution

The French Revolution was a significant period of political and societal change in France, spanning from 1789 to 1799. Below are the key events and their implications:

- **May 5, 1789**: The Estates General convened, marking the start of the Revolution.
- **June 1789**: The Estates General transformed into the National Assembly.
- **July 14, 1789**: The Storming of the Bastille, a pivotal event symbolizing the uprising against the Ancien Régime.
- **August 1789**: The National Assembly abolished feudalism and declared the Rights of Man and of the Citizen .
- **September 1792**: The monarchy was abolished, and the French First Republic was proclaimed .
- **January 21, 1793**: Execution of Louis XVI, leading to increased opposition from conservative European powers .
- **1793-1794**: The Reign of Terror, during which approximately 16,000 people were executed .
- **July 28, 1794**: The execution of Robespierre marked the end of the Reign of Terror .
- **1795**: The Directory replaced the National Convention, signifying a more conservative phase of the Revolution .
- **November 9, 1799**: The coup of 18 Brumaire led by Napoleon Bonaparte established the French Consulate, effectively ending the Revolutionary period .

These events collectively led to the abolition of the Ancien Régime, the establishment of a constitutional monarchy, and eventually the formation of the French Consulate, setting the stage for modern French political and social structures.

In [34]:
import yaml

if not os.path.exists('data/graphrag'):
  !python -m graphrag.index --init --root data/graphrag

with open('data/graphrag/settings.yaml', 'r') as f:
  settings_yaml = yaml.load(f, Loader=yaml.FullLoader)
settings_yaml['llm']['model'] = GPT_4_O_MODEL_NAME
settings_yaml['llm']['api_key'] = AZURE_OPENAI_API_KEY if is_azure else OPENAI_API_KEY
settings_yaml['llm']['type'] = 'azure_openai_chat' if is_azure else 'openai_chat'
settings_yaml['embeddings']['llm']['api_key'] = AZURE_OPENAI_API_KEY if is_azure else OPENAI_API_KEY
settings_yaml['embeddings']['llm']['type'] = 'azure_openai_embedding' if is_azure else 'openai_embedding'
settings_yaml['embeddings']['llm']['model'] = TEXT_EMBEDDING_3_LARGE_MODEL_NAME
if is_azure:
  settings_yaml['llm']['api_version'] = AZURE_OPENAI_API_VERSION
  settings_yaml['llm']['deployment_name'] = GPT_4_O_MODEL_NAME
  settings_yaml['llm']['api_base'] = AZURE_OPENAI_ENDPOINT
  settings_yaml['embeddings']['llm']['api_version'] = AZURE_OPENAI_API_VERSION
  settings_yaml['embeddings']['llm']['deployment_name'] = TEXT_EMBEDDING_3_LARGE_MODEL_NAME
  settings_yaml['embeddings']['llm']['api_base'] = AZURE_OPENAI_ENDPOINT

with open('data/graphrag/settings.yaml', 'w') as f:
  yaml.dump(settings_yaml, f)

if not os.path.exists('data/graphrag/input'):
  os.makedirs('data/graphrag/input')
  !cp data/french_revolution.md data/graphrag/input/french_revolution.txt
  !python -m graphrag.index --root ./data/graphrag

In [35]:
import subprocess

def ask_graph(query, response_type=DEFAULT_RESPONSE_TYPE):
  env = os.environ.copy() | {
    'GRAPHRAG_GLOBAL_SEARCH_MAX_TOKENS': str(DEFAULT_MAX_CONTEXT_TOKENS),
  }
  command = [
    'python', '-m', 'graphrag.query',
    '--root', './data/graphrag',
    '--method', 'local',
    '--response_type', response_type,
    query,
  ]
  output = subprocess.check_output(command, universal_newlines=True, env=env)
  return remove_data(output.split('Search Response: ')[1])

In [36]:
from IPython.display import Markdown

result = ask_graph('Who was Robespierre and what was his role in the French revolution?')

Markdown(result)

# Maximilien Robespierre and His Role in the French Revolution

## Overview
Maximilien Robespierre was a central figure in the French Revolution, known for his radical views and significant influence within the revolutionary government. His actions and policies were pivotal during the Reign of Terror, a period marked by extreme political repression and mass executions.

## Key Roles and Actions
- **Leadership and Influence**:
  - Robespierre was a leading member of the Committee of Public Safety and a prominent leader within the Jacobins and the Montagnard faction .
  - He opposed property qualifications for voting and standing for office, advocating for broader democratic participation .

- **Reign of Terror**:
  - Robespierre's policies were central to the Reign of Terror, which saw mass executions of perceived enemies of the revolution, including notable figures such as Georges Danton and Hébert .
  - His radical stance and actions eventually led to his downfall, culminating in his arrest and execution on 28 July 1794 .

- **Cult of the Supreme Being**:
  - Robespierre led the establishment of the Cult of the Supreme Being, a revolutionary cult that faced ridicule and opposition .

- **Political Conflicts**:
  - He was involved in significant political conflicts, including opposition to the refractory clergy and the Convention's policies .

Robespierre's influence and radical policies left a lasting impact on the French Revolution, highlighting the intense political dynamics and ideological battles of the period.


In [37]:
result = ask_graph('Timeline of the French revolution')

Markdown(result)

### Timeline of the French Revolution

The French Revolution was a decade-long period of significant political and social upheaval in France, marked by several key events and transitions:

- **1789**: The Revolution began with the Estates General convening in May, followed by the Storming of the Bastille on July 14, symbolizing the uprising against the monarchy .
- **1791**: The Constitution of 1791 was adopted, establishing a constitutional monarchy and marking a shift in political power .
- **1792**: The monarchy was abolished, and the French First Republic was proclaimed in September .
- **1793**: King Louis XVI was executed in January, leading to the Reign of Terror, a period of extreme political repression and mass executions .
- **1794**: The Reign of Terror ended with the execution of Robespierre in July .
- **1795**: The Directory was established, marking a period of relative stability but also corruption and inefficiency .
- **1799**: The Revolution concluded with the coup of 18 Brumaire in November, leading to the establishment of the French Consulate under Napoleon Bonaparte .

These events collectively reshaped France's political landscape, leading to the rise of republicanism and the eventual establishment of the French Consulate.


In [41]:
QUESTIONS = [
  {
    'question': 'When did the French Revolution officially begin and end, and how long did it last?',
    'response_type': 'One short sentence (max 23 tokens)',
    'answer': 'The French Revolution began on 5 May 1789 and ended on 9 November 1799, lasting 10 years, 6 months, and 4 days.'
  },
  {
    'question': 'What were the key outcomes of the French Revolution ?',
    'response_type': 'List of 7-10 bullet points',
    'answer': ''
  },
  {
    'question': 'How did the financial and political crisis contribute to the calling of the Estates-General in 1789?',
    'response_type': DEFAULT_RESPONSE_TYPE,
    'answer': ''
  },
  {
    'question': 'What role did the Enlightenment and previous revolutions play in shaping the French Revolution?',
    'response_type': DEFAULT_RESPONSE_TYPE,
    'answer': ''
  },
  {
    'question': 'Analyze how the various social classes in France were affected by the Revolution and the policies implemented, such as the Civil Constitution of the Clergy and the abolition of feudal dues.',
    'response_type': DEFAULT_RESPONSE_TYPE,
    'answer': ''
  },
]

for question in QUESTIONS:
  question['answer_graph'] = ask_graph(question['question'], question['response_type'])
  question['answer_embeddings'] = ask_embeddings(question['question'], question['response_type'])

In [43]:
from pprint import pprint
pprint(QUESTIONS)

[{'answer': 'The French Revolution began on 5 May 1789 and ended on 9 November '
            '1799, lasting 10 years, 6 months, and 4 days.',
  'answer_embeddings': 'The French Revolution began on 5 May 1789 and ended on '
                       '9 November 1799, lasting 10 years, 6 months, and 4 '
                       'days .',
  'answer_graph': 'The French Revolution began on May 5, 1789, and ended on '
                  'November 9, 1799, lasting 10 years, 6 months, and 4 days .',
  'question': 'When did the French Revolution officially begin and end, and '
              'how long did it last?',
  'response_type': 'One short sentence (max 23 tokens)'},
 {'answer': '',
  'answer_embeddings': '### Key Outcomes of the French Revolution\n'
                       '\n'
                       '- **Abolition of Feudalism**: The revolution led to '
                       'the suppression of the feudal system, the emancipation '
                       'of the individual, greater division of