In [1]:
%pip install openai graphrag pandas requests python-dotenv langchain numpy tiktoken matplotlib scikit-learn pyyaml pydantic instructor
from IPython.display import clear_output ; clear_output()

In [5]:
from dotenv import load_dotenv
import os
load_dotenv()

is_azure = (
  os.getenv("AZURE_OPENAI_ENDPOINT", default="") != "" and
  os.getenv("OPENAI_API_KEY", default="") == ""
)

GPT_4_O_MODEL_NAME = os.getenv("GPT_4_O_MODEL_NAME", default="gpt-4o")
TEXT_EMBEDDING_3_LARGE_MODEL_NAME = os.getenv("TEXT_EMBEDDING_3_LARGE_MODEL_NAME", default="text-embedding-3-large")

if is_azure:
  AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
  AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
  AZURE_OPENAI_API_VERSION = "2024-05-01-preview"
  from openai import AzureOpenAI
  oai = AzureOpenAI(azure_endpoint=AZURE_OPENAI_ENDPOINT, api_key=AZURE_OPENAI_API_KEY, api_version=AZURE_OPENAI_API_VERSION)
else:
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
  from openai import OpenAI
  oai = OpenAI(api_key=OPENAI_API_KEY)

In [6]:
import requests
import os

if not os.path.exists('data'): os.makedirs('data')

if not os.path.exists('data/french_revolution.txt'):
  french_revolution = requests.get("https://www.gutenberg.org/cache/epub/71688/pg71688.txt").text
  with open('data/french_revolution.txt', 'w') as f:
    f.write(french_revolution)
else:
  with open('data/french_revolution.txt', 'r') as f:
    french_revolution = f.read()

print(french_revolution[:123])

The Project Gutenberg eBook of The French Revolution 1789-1795
    
This ebook is for the use of anyone anywhere in the Un


In [7]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pandas as pd

if not os.path.exists('data/embeddings.parquet'):
  embeddings = pd.DataFrame(columns=['Topic', 'Text', 'Embedding'])

  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=500)

  chunks = splitter.split_text(french_revolution)
  chunk_embeddings = oai.embeddings.create(
    input=chunks,
    model=TEXT_EMBEDDING_3_LARGE_MODEL_NAME
  )
  for i, chunk in enumerate(chunks):
    try:
      topic = oai.chat.completions.create(
        model=GPT_4_O_MODEL_NAME,
        messages=[
          {
            "role": "system",
            "content": ("Read the user-provided text carefully and output its topic as a short sentence. "
                        "For example: 'Key events in the life of George Washington', 'Inflation in the Weimar republic'. "
                        "Do not add any additional text such as punctuation, markup, or quoting. Only output the topic.")},
          {"role": "user", "content": chunk}
        ],
        max_tokens=23,
        temperature=0.5,
      ).choices[0].message.content
    except Exception:
      pass
    embeddings.loc[len(embeddings)] = [topic or chunk[:23], chunk, chunk_embeddings.data[i].embedding]
  embeddings.to_parquet('data/embeddings.parquet')
else:
  embeddings = pd.read_parquet('data/embeddings.parquet')

embeddings

Unnamed: 0,Topic,Text,Embedding
0,The French Revolution 1789-1795,﻿The Project Gutenberg eBook of The French Rev...,"[-0.01553737185895443, -0.00945056602358818, -..."
1,The French Revolution 1789-1795,Title: The French Revolution 1789-1795\n\nAuth...,"[-0.000799534609541297, -0.003513576230034232,..."
2,Historical works for schools on ancient Roman ...,Transcriber’s Notes: Italic text is enclosed i...,"[0.009207612834870815, 0.006131153088063002, -..."
3,Key periods in Roman and Greek history,"=THE GRACCHI, MARIUS, AND SULLA.= By A. H. BEE...","[0.008042179979383945, 0.028144072741270065, -..."
4,List of historical books and their topics,"=ROME AND CARTHAGE, the PUNIC WARS.= By R. BOS...","[0.0033951664809137583, 0.03002190962433815, -..."
...,...,...,...
1101,Donation information for the Project Gutenberg...,The Foundation’s business office is located at...,"[-0.009449628181755543, 0.013315385207533836, ..."
1102,Project Gutenberg's reliance on public support...,Project Gutenberg™ depends upon and cannot sur...,"[-0.021214725449681282, -0.008037537336349487,..."
1103,Donation policies and guidelines for Project G...,While we cannot and do not solicit contributio...,"[-0.004949253983795643, -0.024125445634126663,..."
1104,Donation methods and general information about...,Please check the Project Gutenberg web pages f...,"[-0.009324702434241772, -0.00771912420168519, ..."


In [8]:
import numpy as np
import tiktoken

def cosine_similarity(vector1, vector2):
  dot_product = np.dot(vector1, vector2)
  norm1 = np.linalg.norm(vector1)
  norm2 = np.linalg.norm(vector2)
  similarity = dot_product / (norm1 * norm2)
  return similarity

tokenizer = tiktoken.encoding_for_model('gpt-4o')

def embeddings_search(query, max_tokens=10000, k=100, min_similarity=0.2):
  query_embedding = oai.embeddings.create(
    input=[query],
    model=TEXT_EMBEDDING_3_LARGE_MODEL_NAME
  ).data[0].embedding
  results = embeddings.copy()
  results['Similarity'] = results['Embedding'].apply(lambda x: cosine_similarity(x, query_embedding))
  results = results.sort_values(by='Similarity', ascending=False).head(k)
  results = results[results['Similarity'] >= min_similarity]
  results['Tokens'] = results['Text'].apply(lambda txt: len(tokenizer.encode(txt)))
  while results['Tokens'].sum() > max_tokens:
    results = results[:-1]
  return results

In [9]:
from graphrag.query.structured_search.global_search.reduce_system_prompt import REDUCE_SYSTEM_PROMPT as SYSTEM_PROMPT
import re

DEFAULT_RESPONSE_TYPE = 'Summarize and explain in 1-2 paragraphs with bullet points using at most 300 tokens'
DEFAULT_MAX_CONTEXT_TOKENS = 10000

def remove_data(text):
    return re.sub(r'\[Data:.*?\]', '', text).strip()

def ask_embeddings(query, response_type=DEFAULT_RESPONSE_TYPE):
  results = embeddings_search(query, max_tokens=DEFAULT_MAX_CONTEXT_TOKENS)
  response = oai.chat.completions.create(
    model=GPT_4_O_MODEL_NAME,
    messages=[
      {
        "role": "system",
        "content": SYSTEM_PROMPT.format(
          response_type=response_type,
          report_data="---\n---\n".join(results['Text'].tolist()),
        ),
      },
      {"role": "user", "content": query}
    ],
    max_tokens=4000,
    temperature=0.5,
  ).choices[0].message.content
  return remove_data(response)

In [10]:
from IPython.display import Markdown

result = ask_embeddings('Who was Robespierre and what was his role in the French revolution?')

Markdown(result)

### Maximilien Robespierre: Key Figure in the French Revolution

Maximilien Robespierre was a prominent leader during the French Revolution, known for his influential role in the radical phase of the revolution and his association with the Reign of Terror.

#### Background and Ideology
- **Early Life:** Robespierre was a lawyer from Arras, influenced by Rousseau's teachings, which led him to resign from a judicial post to avoid condemning people to death .
- **Beliefs:** He was deeply committed to the principles of liberty, equality, and fraternity, although his commitment to individual liberty waned as he gained power .
- **Rousseau's Influence:** Robespierre's ideals were rooted in Rousseau's concepts, advocating for a society without class divisions and focused on moral and civic virtue .

#### Political Career and Actions
- **Constituent Assembly:** Robespierre was a member of the Constituent Assembly and later became a leading figure in the Jacobins, a radical political club .
- **Committee of Public Safety:** He became a dominant member of the Committee of Public Safety, where he played a crucial role in orchestrating the Reign of Terror, systematically using the guillotine to eliminate perceived enemies of the revolution .
- **The Terror:** Robespierre reduced the Terror to a system, using it as a tool to maintain control and suppress opposition. He was responsible for numerous executions, including those of the Hébertists and Dantonists .

#### Downfall and Legacy
- **Fall from Power:** Robespierre's increasing authoritarianism and the establishment of the Cult of the Supreme Being alienated many, leading to his arrest and execution in 1794 .
- **Impact:** His rule left a controversial legacy, marked by his initial advocacy for democratic principles and subsequent descent into tyranny .

Robespierre remains a complex figure, embodying both the revolutionary zeal for change and the dangers of radical authoritarianism.

In [11]:
result = ask_embeddings('Timeline of the French revolution')

Markdown(result)

### Timeline of the French Revolution

The French Revolution was a period of significant social and political change in France that lasted from 1789 until 1799. Here is a concise timeline of key events:

#### 1789
- **May 5**: Meeting of the States General.
- **June 17**: Adoption of the title of National Assembly.
- **June 20**: Tennis Court Oath.
- **June 23**: The King commands the separation of the Orders.
- **July 14**: Capture of the Bastille.
- **August 4**: Abolition of feudal rights.
- **October 6**: The King brought to Paris.

#### 1790
- **July 14**: Feast of the Federation.
- **November 27**: Oath imposed on the Clergy.

#### 1791
- **April 2**: Death of Mirabeau.
- **June 20**: The Flight to Varennes.
- **July 17**: The Massacre of the Champ de Mars.
- **August 27**: Declaration of Pilnitz.
- **September 30**: End of the Constituent Assembly.
- **October 1**: Meeting of the Legislative Assembly.

#### 1792
- **April 20**: Declaration of War against the King of Hungary and Bohemia.
- **June 13**: Dismissal of the Girondist Ministers.
- **June 20**: The King mobbed in the Tuileries.
- **July 26**: The Duke of Brunswick’s Manifesto.
- **August 10**: Overthrow of the Monarchy.
- **September 2–7**: The September Massacres.
- **September 21**: Meeting of the Convention.
- **September 22**: Proclamation of the Republic.
- **November 6**: Victory of Jemmapes.

#### 1793
- **January 21**: Execution of the King.
- **February 1**: Declaration of War against England and Holland.
- **March 9**: Establishment of the Revolutionary Court.
- **June 2**: Expulsion of the Girondists.
- **October 16**: Execution of the Queen.
- **October 31**: Execution of the Girondists.

#### 1794
- **March 24**: Execution of the Hébertists.
- **April 5**: Execution of the Dantonists.
- **June 26**: Victory of Fleurus.
- **July 28**: Execution of the Robespierrists.
- **November 12**: Jacobin Club closed.

#### 1795
- **October 5**: Insurrection of the Middle Classes against the Convention.
- **October 26**: Meeting of the New Legislature .

In [12]:
import yaml

if not os.path.exists('data/graphrag'):
  !python -m graphrag.index --init --root data/graphrag

with open('data/graphrag/settings.yaml', 'r') as f:
  settings_yaml = yaml.load(f, Loader=yaml.FullLoader)
settings_yaml['llm']['model'] = GPT_4_O_MODEL_NAME
settings_yaml['llm']['api_key'] = AZURE_OPENAI_API_KEY if is_azure else OPENAI_API_KEY
settings_yaml['llm']['type'] = 'azure_openai_chat' if is_azure else 'openai_chat'
settings_yaml['embeddings']['llm']['api_key'] = AZURE_OPENAI_API_KEY if is_azure else OPENAI_API_KEY
settings_yaml['embeddings']['llm']['type'] = 'azure_openai_embedding' if is_azure else 'openai_embedding'
settings_yaml['embeddings']['llm']['model'] = TEXT_EMBEDDING_3_LARGE_MODEL_NAME
if is_azure:
  settings_yaml['llm']['api_version'] = AZURE_OPENAI_API_VERSION
  settings_yaml['llm']['deployment_name'] = GPT_4_O_MODEL_NAME
  settings_yaml['llm']['api_base'] = AZURE_OPENAI_ENDPOINT
  settings_yaml['embeddings']['llm']['api_version'] = AZURE_OPENAI_API_VERSION
  settings_yaml['embeddings']['llm']['deployment_name'] = TEXT_EMBEDDING_3_LARGE_MODEL_NAME
  settings_yaml['embeddings']['llm']['api_base'] = AZURE_OPENAI_ENDPOINT

with open('data/graphrag/settings.yaml', 'w') as f:
  yaml.dump(settings_yaml, f)

if not os.path.exists('data/graphrag/input'):
  os.makedirs('data/graphrag/input')
  !cp data/french_revolution.txt data/graphrag/input/french_revolution.txt
  !python -m graphrag.index --root ./data/graphrag

[2KInitializing project at data/graphrag
[2K🚀 [32mReading settings from data/graphrag/settings.yaml[0m
[2K⠸ GraphRAG Indexer 
[2K[1A[2K⠸ GraphRAG Indexer les loaded (0 filtered) [90m━━━━━━[0m [35m100%[0m [36m0:00:…[0m [33m0:00:…[0m
├── Loading Input (text) - 1 files loaded (0 filtered) [90m━━━━━━[0m [35m100%[0m [36m0:00:…[0m [33m0:00:…[0m
[2K[1A[2K[1A[2K⠼ GraphRAG Indexer 
├── Loading Input (text) - 1 files loaded (0 filtered) [90m━━━━━━[0m [35m100%[0m [36m0:00:…[0m [33m0:00:…[0m
[2K[1A[2K[1A[2K⠼ GraphRAG Indexer 
├── Loading Input (text) - 1 files loaded (0 filtered) [90m━━━━━━[0m [35m100%[0m [36m0:00:…[0m [33m0:00:…[0m
[2K[1A[2K[1A[2K⠼ GraphRAG Indexer 
├── Loading Input (text) - 1 files loaded (0 filtered) [90m━━━━━━[0m [35m100%[0m [36m0:00:…[0m [33m0:00:…[0m
[2K[1A[2K[1A[2K⠹ GraphRAG Indexer 
├── Loading Input (text) - 1 files loaded (0 filtered) [90m━━━━━━[0m [35m100%[0m [36m0:00:…[0m [33m0:00:…[0m
└── cre

In [13]:
import subprocess

def ask_graph(query, response_type=DEFAULT_RESPONSE_TYPE):
  env = os.environ.copy() | {
    'GRAPHRAG_GLOBAL_SEARCH_MAX_TOKENS': str(DEFAULT_MAX_CONTEXT_TOKENS),
  }
  command = [
    'python', '-m', 'graphrag.query',
    '--root', './data/graphrag',
    '--method', 'local',
    '--response_type', response_type,
    query,
  ]
  output = subprocess.check_output(command, universal_newlines=True, env=env)
  return remove_data(output.split('Search Response: ')[1])

In [15]:
from IPython.display import Markdown

result = ask_graph('Who was Robespierre and what was his role in the French revolution?')

Markdown(result)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  entity_df["community"] = entity_df["community"].fillna(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  entity_df["community"] = entity_df["community"].astype(int)


### Maximilien Robespierre: Key Figure in the French Revolution

**Overview:**
Maximilien Robespierre was a French lawyer and statesman who became one of the most influential figures of the French Revolution. He is particularly known for his pivotal role in the Reign of Terror and his leadership among the Jacobins .

**Key Roles and Actions:**
- **Committee of Public Safety:** Robespierre was a leading member, using his position to consolidate power and control over the revolutionary government .
- **Reign of Terror:** He played a significant role in establishing the Terror as a systematic policy, using the guillotine to eliminate his enemies and reappointing judges to convert the court into his special instrument .
- **Opposition to Rivals:** Robespierre was known for his hostility towards the Girondists, Hébertists, and Dantonists, contributing to their downfall .
- **Worship of the Supreme Being:** He instituted this as part of his vision for societal regeneration, reflecting his belief in Rousseau's teachings .

**Legacy:**
Robespierre's legacy is complex, marked by his integrity and talk of virtue, but also by his ambition and the cruelty of his policies. His eventual downfall led to significant changes in the revolutionary government and the end of the Reign of Terror .

In [16]:
result = ask_graph('Timeline of the French revolution')

Markdown(result)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  entity_df["community"] = entity_df["community"].fillna(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  entity_df["community"] = entity_df["community"].astype(int)


### Timeline of the French Revolution

The French Revolution, spanning from 1789 to 1799, was a period of radical social and political upheaval in France. Here are some key events:

- **1789**: The French Revolution begins with the convening of the States-General and the storming of the Bastille. This year also saw significant financial decisions, including the abolition of tithes .
- **1790**: The establishment of the Jacobin clubs and the dissolution of the ministry due to attacks by the Jacobins of Paris .
- **1791**: Continued expansion of the Jacobin clubs across France .
- **1792**: The Insurrection of August 10 leads to the overthrow of the monarchy, and the September Massacres occur, marking a period of extreme violence .
- **1793**: The fall of the Girondists and the rise of the Commune of Paris, which significantly influences the political landscape .
- **1795**: The French Revolution concludes with the establishment of the Directory, marking the end of the revolutionary period .

These events highlight the profound changes and tumultuous nature of the French Revolution, which reshaped France's political and social structures.

In [17]:
QUESTIONS = [
  {
    'question': 'When did the French Revolution officially begin and end, and how long did it last?',
    'response_type': 'One short sentence (max 23 tokens)',
    'answer': 'The French Revolution began on 5 May 1789 and ended on 9 November 1799, lasting 10 years, 6 months, and 4 days.'
  },
  {
    'question': 'What were the key outcomes of the French Revolution ?',
    'response_type': 'List of 7-10 bullet points',
    'answer': ''
  },
  {
    'question': 'How did the financial and political crisis contribute to the calling of the Estates-General in 1789?',
    'response_type': DEFAULT_RESPONSE_TYPE,
    'answer': ''
  },
  {
    'question': 'What role did the Enlightenment and previous revolutions play in shaping the French Revolution?',
    'response_type': DEFAULT_RESPONSE_TYPE,
    'answer': ''
  },
  {
    'question': 'Analyze how the various social classes in France were affected by the Revolution and the policies implemented, such as the Civil Constitution of the Clergy and the abolition of feudal dues.',
    'response_type': DEFAULT_RESPONSE_TYPE,
    'answer': ''
  },
]

for question in QUESTIONS:
  question['answer_graph'] = ask_graph(question['question'], question['response_type'])
  question['answer_embeddings'] = ask_embeddings(question['question'], question['response_type'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  entity_df["community"] = entity_df["community"].fillna(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  entity_df["community"] = entity_df["community"].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  entity_df["community"] = entity_df["community"].fillna(-1)
A value is trying to be set

In [18]:
from pprint import pprint
pprint(QUESTIONS)

[{'answer': 'The French Revolution began on 5 May 1789 and ended on 9 November '
            '1799, lasting 10 years, 6 months, and 4 days.',
  'answer_embeddings': 'The French Revolution began on May 5, 1789, and ended '
                       'on October 26, 1795, lasting over six years .',
  'answer_graph': 'The French Revolution began in 1789 and ended in 1799, '
                  'lasting 10 years .',
  'question': 'When did the French Revolution officially begin and end, and '
              'how long did it last?',
  'response_type': 'One short sentence (max 23 tokens)'},
 {'answer': '',
  'answer_embeddings': '### Key Outcomes of the French Revolution\n'
                       '\n'
                       '- **Abolition of Feudal Privileges and Serfdom**:\n'
                       '  - The revolution led to the abolition of feudal '
                       'privileges, including the exclusive rights of '
                       'seigneurs to hunt and keep rabbits and pigeons, and '