# Dependencies

In [17]:
!pip install cheshire_cat_api requests

Collecting requests
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting charset-normalizer<4,>=2 (from requests)
  Using cached charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (35 kB)
Collecting idna<4,>=2.5 (from requests)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting certifi>=2017.4.17 (from requests)
  Using cached certifi-2025.1.31-py3-none-any.whl.metadata (2.5 kB)
Using cached requests-2.32.3-py3-none-any.whl (64 kB)
Using cached certifi-2025.1.31-py3-none-any.whl (166 kB)
Using cached charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (145 kB)
Using cached idna-3.10-py3-none-any.whl (70 kB)
Installing collected packages: idna, charset-normalizer, certifi, requests
Successfully installed certifi-2025.1.31 charset-normalizer-3.4.1 idna-3.10 requests-2.32.3


# Code

In [1]:
import re

def split_and_save(file_path, output_folder, delimiter, prefix, allowed):
    # Open and read the content of the source file
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    content = clean_text(allowed, content)

    pattern = r'(' + delimiter + r')'
    articles = re.split(pattern, content)

    # The first item in 'articles' might not start with "Art. X" and can be an introduction or preface
    # If it doesn't match our pattern, we'll ignore it in file writing
    if not re.match(pattern, articles[0]):
        articles = articles[1:]

    # Iterate over the articles to write them to separate files
    for i in range(1, len(articles), 2):
        article_number = articles[i].replace('\n', ' ').replace(':', ' ').split(" ")[0]  # Extract the number X from "Art. X"
        file_name = f'art-{article_number}.txt'  # Create the file name
        # Write the content to the file
        with open(f"{output_folder}/{prefix}_" + file_name, 'w', encoding='utf-8') as file:
            file.write(f"{delimiter}" + articles[i])


def save_result(res, file_path):
  with open(file_path, 'w', encoding='utf-8') as file:
    file.write(res[0])


def split_files_in_folder(folder_path, output_folder, delimiter, allowed):

  import os
  files = os.listdir(folder_path)

  for file_name in files:
    if file_name.endswith(".txt"):
      file_path = os.path.join(folder_path, file_name)
      split_and_save(file_path, output_folder, delimiter, file_name[:-4], allowed)

def clean_text(allowed, content):
  if not allowed:
    return content
  return "\n".join([x for x in content.split("\n") if any([x.startswith(y) for y in allowed])])

In [2]:
#split_and_save("memory/crossjustice/dutch/dutch_ccp.txt", "memory/crossjustice/dutch", "Artikel ", "dutch_ccp", [])
#split_and_save("memory/crossjustice/dir_2010_64.txt", "memory/crossjustice/directive", "Article ", "dir_2010_64", [])

In [59]:
import time
import json
import cheshire_cat_api as ccat

WEBSITE = "localhost"
PORT = 1865
KEY = "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"
content = ""

def on_open():
    # This is triggered when the connection is opened
    print("Connection opened!")

def on_message(message: str):
    # This is triggered when a new message arrives
    # and grabs the message
    # print(message)
    global content

    response = json.loads(message)
    if response["type"] != "chat_token":
      print(response)
    if response["type"] == "chat":
      content = response

def on_error(exception: Exception):
    # This is triggered when a WebSocket error is raised
    global content
    print(str(exception))
    content = str(exception)

def on_close(status_code: int, message: str):
    # This is triggered when the connection is closed
    print(f"Connection closed!")



def connect(user_id):
  # Connection settings with default values
  config = ccat.Config(
      base_url=WEBSITE,
      port=PORT,
      user_id=user_id,
      auth_key="",
      secure_connection=False
  )

  # Cat Client
  cat_client = ccat.CatClient(
      config=config,
      on_open=on_open,
      on_close=on_close,
      on_message=on_message,
      on_error=on_error
  )

  # Connect to the WebSocket API
  cat_client.connect_ws()

  while not cat_client.is_ws_connected:
      time.sleep(1)

  return cat_client

def disconnect(cat_client):
  cat_client.close()


def ask_gpt(cat_client, prompt):
  global content

  content = ""
  cat_client.send(message=prompt)

  while content == "":
    True

  return content


def prompt(user_id, prompt):
  client = connect(user_id)
  res = ask_gpt(client, prompt)
  disconnect(client)
  return res["content"], [( x["metadata"]["source"], x["score"], x["page_content"]) for x in res["why"]["memory"]["declarative"]]


def print_response(res):
  print("Output:\n")
  print(res[0])
  print("\n\nMemories:\n\n")
  for x in res[1]:
    print(f"{x[0]} - {x[1]}\n{x[2]}\n\n")


def clean_history(user):
  import requests

  url = f"http://{WEBSITE}:{PORT}/memory/conversation_history/"

  payload = {}
  headers = {
    'Accept': 'application/json',
    'user_id': user
  }

  response = requests.request("DELETE", url, headers=headers, data=payload)
  print(response.text)

def get_history(user):
  import requests

  url = f"http://{WEBSITE}:{PORT}/memory/conversation_history/"

  payload = {}
  headers = {
    'Accept': 'application/json',
    'user_id': user
  }

  response = requests.request("GET", url, headers=headers, data=payload)
  print(response.text)

def upload_memory(user, chunk_size, file_name, file_path):
  import http.client
  import mimetypes
  from codecs import encode
  import time

  conn = http.client.HTTPConnection(WEBSITE, PORT)
  dataList = []
  boundary = 'boundary'
  dataList.append(encode('--' + boundary))
  dataList.append(encode('Content-Disposition: form-data; name=file; filename={0}'.format(file_name)))

  fileType = mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
  dataList.append(encode('Content-Type: {}'.format(fileType)))
  dataList.append(encode(''))

  with open(file_path, 'rb') as f:
    dataList.append(f.read())
  dataList.append(encode('--' + boundary))
  dataList.append(encode('Content-Disposition: form-data; name=chunk_size;'))

  dataList.append(encode('Content-Type: {}'.format('text/plain')))
  dataList.append(encode(''))

  dataList.append(encode(str(chunk_size)))
  dataList.append(encode('--' + boundary))
  dataList.append(encode('Content-Disposition: form-data; name=chunk_overlap;'))

  dataList.append(encode('Content-Type: {}'.format('text/plain')))
  dataList.append(encode(''))

  dataList.append(encode("100"))
  dataList.append(encode('--'+boundary+'--'))
  dataList.append(encode(''))
  body = b'\r\n'.join(dataList)
  payload = body
  headers = {
    'Content-type': 'multipart/form-data; boundary={}'.format(boundary),
    'Accept': 'application/json',
    'user_id': user
  }
  conn.request("POST", "/rabbithole/", payload, headers)
  res = conn.getresponse()
  data = res.read()
  print(data.decode("utf-8"))

def upload_all_memories_folder(user, chunk_size, folder):
  import os
  files = os.listdir(folder)

  for file_name in files:
    if file_name.endswith(".txt"):
      file_path = os.path.join(folder, file_name)
      upload_memory(user, chunk_size, file_name, file_path)
      time.sleep(1)


def summarise_in_folder(user, folder, target_folder):
  import os
  files = os.listdir(folder)

  for file_name in files:
    if file_name.endswith(".txt"):
      file_path = os.path.join(folder, file_name)
      prompt_summary_r = prompt_summary(file_path)
      clean_history(user)
      summary = prompt(user, prompt_summary_r)
      save_result(summary, f"{target_folder}/{file_name}")


def set_declarative_memories(n, t):
  import http.client
  import json

  conn = http.client.HTTPConnection(WEBSITE, PORT)

  payload = json.dumps({
    "language": "English",
    "only_local_responses": False,
    "prompt_prefix": "",
    "disable_episodic_memories": True,
    "disable_declarative_memories": False,
    "disable_procedural_memories": True,
    "number_of_declarative_items": n,
    "number_of_episodic_items": 0,
    "declarative_threshold": t,
    "episodic_threshold": 2.0,
    "legacy": True
  })

  headers = {
    'Content-Type': 'application/json',
    'Accept': 'application/json'
  }
  conn.request("PUT", "/plugins/settings/cc_prompt_settings", payload, headers)
  res = conn.getresponse()
  data = res.read()
  print(data.decode("utf-8"))


def set_model_temperature(t):
  import http.client
  import json

  conn = http.client.HTTPConnection(WEBSITE, PORT)
  payload = json.dumps({
    "openai_api_key": KEY,
    "model_name": "gpt-4o-2024-11-20",
    "temperature": t,
    "streaming": True
  })
  headers = {
    'Content-Type': 'application/json',
    'Accept': 'application/json'
  }
  conn.request("PUT", "/llm/settings/LLMOpenAIChatConfig", payload, headers)
  res = conn.getresponse()
  data = res.read()
  print(data.decode("utf-8"))

def set_embedder():
  import http.client
  import json

  conn = http.client.HTTPConnection(WEBSITE, PORT)
  payload = json.dumps({
    "model": "text-embedding-ada-002",
    "openai_api_key": KEY
  })
  headers = {
    'Content-Type': 'application/json',
    'Accept': 'application/json'
  }
  conn.request("PUT", "/embedder/settings/EmbedderOpenAIConfig", payload, headers)
  res = conn.getresponse()
  data = res.read()
  print(data.decode("utf-8"))

# Data

In [18]:
prolog_tree_dutch = """
  directive_2010_64_nl - art32a_1

  Article 32a(1) Dutch Code of Criminal Procedure
  Option: essentialDocument

  Explanation:

  has_right(right_to_translation, nl, art32a_1, mario, essentialDocument)
  has_right(art32a_1, mario, right_to_translation, essentialDocument)
  person_status(mario, suspect) [FACT]
  person_request_submitted(mario, essential_document) [FACT]
  Auxiliaries:

  art1_4 - cost - state

  Article 1(4) The Criminal Cases Fees Act
  Explanation:

  auxiliary_right(art1_4, art32a_1, mario, cost, state)
  auxiliary_right(art1_4, mario, cost, state)
"""

prolog_tree_directive = """
  directive_2010_64 - art3_1

  Article 3
  Option: essentialDocument

  Explanation:

  has_right(right_to_translation, dir, art3_1, mario, essentialDocument)
    has_right(art3_1, mario, right_to_translation, essentialDocument)
      proceeding_language(mario, dutch) [FACT]
      essential_document(art3_3, mario, documents)
        authority_decision(mario, essential_document) [FACT]
      not(person_understands(mario, dutch))

  Auxiliaries:

  art4 - cost - state

  Article 4
  Explanation:

  auxiliary_right(art4, art3_1, mario, cost, state)
      auxiliary_right(art4, mario, cost, state)

  Properties:

  art3_7 - form - oral

  Article 3.7
  Explanation:

  right_property(art3_7, art3_1, mario, form, oral)
      right_property(art3_7, mario, form, oral)
          not(proceeding_event(mario, prejudice_fairness))
"""

# Prompts

In [19]:
def prompt_0():
    return """
        You have been provided a Prolog inference tree using a legal norm in a specific case (Prolog Tree).
        Provide the following info according to the given structure:
        
        Summary: simplified text of the legal norm. Use everyday langaguage with a serious register;
        What Rights do You Have: all main rights referenced by a corresponding positive Prolog statement;
        Why do You Have Them: inference steps and reasoning that led to the the rights. Use all the Prolog terms in the explanation explicitly referencing the original Prolog when needed.
        
        Use enumerations in the 'What Rights do You Have' and 'Why do You Have Them' sections if needed.
    """

def prompt_1(res_dir, res_national):

  _prompt_1_1 = """
    You have received two legal sources. Compare them according to the following steps:
    1 - Make a summary of the legal case referencing the Prolog facts in the 'Why do You Have Them' section. Make one summary for each legal source;
    2 - Compare which Prolog facts are in common between the two legal sources, and which are the source of differences;
    3 - analyse consequences deriving from step 1 and 2. Explain your reasoning;
    4 - Which legal source favours the individual?

    THE ANALYSIS MUST FOCUS ON THE CASE AND NOT ON THE GENERAL LEGAL SOURCES
  """

  def _memories(res, n):
    return  "\n\n".join([x[2] for x in res[1][:n]])

  return f"""
    Legal Source:

    {res_dir[0]}

    Legal Source:

    {res_national[0]}

    Context:

    {_memories(res_dir, 5)}

    {_memories(res_national, 5)}


  """ + _prompt_1_1


# Upload Memories (skip if already uploaded)

In [38]:
set_embedder()

{"name":"EmbedderOpenAIConfig","value":{"model":"text-embedding-ada-002","openai_api_key":"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"}}


In [39]:
upload_all_memories_folder("directive-art", 4096, "memory/crossjustice/directive")

{"filename":"dir_2010_64_art-10.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"filename":"dir_2010_64_art-7.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"filename":"dir_2010_64_art-8.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"filename":"dir_2010_64_art-6.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"filename":"dir_2010_64_art-3.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"filename":"dir_2010_64_art-11.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"filename":"dir_2010_64_art-9.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"filename":"dir_2010_64_art-4.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"filename":"dir_2010_64_art-1.txt","content_type":"text/plain","info":"File is being ingested asynchr

In [60]:
upload_all_memories_folder("dutch-artt", 4096, "memory/crossjustice/dutch")

{"filename":"dutch_ccp_art-187d.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"filename":"dutch_ccp_art-183.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"filename":"dutch_ccp_art-5.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"filename":"dutch_ccp_art-11.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"detail":{"error":"MIME type None not supported. Admitted types: application/pdf - text/plain - text/markdown - text/html"}}
{"filename":"dutch_ccp_art-187b.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"filename":"dutch_ccp_art-12a.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"filename":"dutch_ccp_art-30.txt","content_type":"text/plain","info":"File is being ingested asynchronously"}
{"filename":"dutch_ccp_art-12f.txt","content_type":"text/plain","info":"File is being ingest

# Task 1

In [61]:
clean_history("directive-art")
clean_history("dutch-artt")

set_model_temperature(0.0)
set_declarative_memories(10, 0.4)
dutch = prompt("dutch-artt", f"Prolog Tree: \n\n {prolog_tree_dutch} \n\n {prompt_0()}")
directive = prompt("directive-art", f"Prolog Tree: \n\n {prolog_tree_directive} \n\n {prompt_0()}")

{"detail":"No conversation history found for the user directive-art"}
{"deleted":true}
{"name":"LLMOpenAIChatConfig","value":{"openai_api_key":"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","model_name":"gpt-4o-2024-11-20","temperature":0.0,"streaming":true}}
{"name":"cc_prompt_settings","value":{"language":"English","only_local_responses":false,"prompt_prefix":"","disable_episodic_memories":true,"disable_declarative_memories":false,"disable_procedural_memories":true,"number_of_declarative_items":10,"number_of_episodic_items":0,"declarative_threshold":0.4,"episodic_threshold":2.0,"legacy_mode":true,"legacy":true}}
Connection opened!
{'type': 'chat', 'user_id': 'dutch-artt', 'content': "### Summary:\nIf you are a suspect in a criminal case and you do not understand Dutch well enough, you can ask for important legal documents to be translated into a language you underst

In [62]:
print_response(dutch)

Output:

### Summary:
If you are a suspect in a criminal case and you do not understand Dutch well enough, you can ask for important legal documents to be translated into a language you understand. You must make this request in writing, explain why you need the translation, and specify which documents you want translated.

### What Rights do You Have:
1. **Right to Translation**: You have the right to request translations of essential legal documents into a language you understand (as per `art32a_1`).
2. **Right to Cost Coverage**: The state is responsible for covering the costs of these translations (as per `art1_4`).

### Why do You Have Them:
1. **Right to Translation**:
   - The Prolog statement `has_right(right_to_translation, nl, art32a_1, mario, essentialDocument)` confirms that Mario, as a suspect, has the right to translation of essential documents under Article 32a(1) of the Dutch Code of Criminal Procedure.
   - This right is triggered because Mario is identified as a suspec

In [63]:
print_response(directive)

Output:

### Summary:
If you are involved in criminal proceedings and do not understand the language being used, you have the right to receive translations of important documents. These translations must be provided in a timely manner and be of good quality to ensure fairness in the proceedings. In some cases, an oral translation or summary may be given instead of a written one, as long as it does not harm the fairness of the case. The costs of these translations are covered by the state, regardless of the outcome of the case.

### What Rights do You Have:
1. **Right to Translation of Essential Documents**: You have the right to receive translations of documents that are critical for your defense and for ensuring fairness in the proceedings (Article 3.1).
2. **Right to Oral Translation or Summary**: In certain cases, you may receive an oral translation or summary instead of a written one, provided it does not harm the fairness of the proceedings (Article 3.7).
3. **Right to Cost-Free T

# Task 2

In [64]:
clean_history("clean")
set_model_temperature(0.0)
set_declarative_memories(10, 0.4)
comparison = prompt("clean", prompt_1(directive, dutch))

{"detail":"No conversation history found for the user clean"}
{"name":"LLMOpenAIChatConfig","value":{"openai_api_key":"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","model_name":"gpt-4o-2024-11-20","temperature":0.0,"streaming":true}}
{"name":"cc_prompt_settings","value":{"language":"English","only_local_responses":false,"prompt_prefix":"","disable_episodic_memories":true,"disable_declarative_memories":false,"disable_procedural_memories":true,"number_of_declarative_items":10,"number_of_episodic_items":0,"declarative_threshold":0.4,"episodic_threshold":2.0,"legacy_mode":true,"legacy":true}}
Connection opened!
{'type': 'chat', 'user_id': 'clean', 'content': "### Step 1: Summary of the Legal Case Referencing the Prolog Facts\n\n#### Legal Source 1:\n- **Summary**: Mario, a suspect in criminal proceedings, does not understand Dutch, the language of the proceedings (`not(p

In [65]:
print_response(comparison)

Output:

### Step 1: Summary of the Legal Case Referencing the Prolog Facts

#### Legal Source 1:
- **Summary**: Mario, a suspect in criminal proceedings, does not understand Dutch, the language of the proceedings (`not(person_understands(mario, dutch))`). As a result, he has the right to receive translations of essential documents to ensure fairness and his ability to defend himself (`has_right(right_to_translation, dir, art3_1, mario, essentialDocument)`). Essential documents include decisions depriving liberty, charges, indictments, and judgments, among others. Oral translations or summaries may be provided instead of written translations if they do not harm the fairness of the proceedings (`right_property(art3_7, art3_1, mario, form, oral)`). The state is responsible for covering the costs of these translations, regardless of the case's outcome (`auxiliary_right(art4, art3_1, mario, cost, state)`).

#### Legal Source 2:
- **Summary**: Mario, as a suspect (`person_status(mario, susp