# Elephant-L4 experiments notebook

In [31]:
import json
import datetime
import time
import ast

In [2]:
API_KEY = ''

In [4]:
# We use Langchain's API to connect to Groq
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

In [89]:
# We set up the answer Pydantic class
# We get much better returns this way
from langchain_core.pydantic_v1 import BaseModel, Field


class L4Scenario(BaseModel):
    scenario: str = Field(description='Rich and realistic factual matrix in which this provision might come up.')
    outcome: str = Field(description='Realistic outcome when applying the provision.')
    edge_case: str = Field(description='Edge case of a rich and realistic factual matrix where interpretation of the provision might be hairy.')

In [90]:
model = ChatGroq(temperature=0, groq_api_key=API_KEY, model_name="llama3-70b-8192")
structured_llm = model.with_structured_output(L4Scenario)

In [91]:
# Test-run
system = """You are a helpful, experienced, expert, legal academic, with a lot of life experience.
You will be shown a legal provision.
Be as detailed, comprehensive as possible.
Return the L4Scenario only.
"""
# we can take the last line out to be brief.
human = "{text}"
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])

chain = prompt | structured_llm

In [15]:
# Load all acts to find the latest one
# We used the version that has all legislation data compressed
# For faster load times
acts = []
with open('2024-05-17-acts_no-content.jl', 'r') as f:
    for line in f:
        acts.append(json.loads(line))

In [23]:
# get only acts that are current
latest_date = 0
dates = []
for n, act in enumerate(acts):
    key = [i for i in act.keys()][0]
    doc_status = act[key]['doc_status']
    # print(doc_status)
    if doc_status == 'Current':
        # print(key)
        # check unix timestamp
        # extract from e.g. ASA2007-/Date(1680278400000)/
        date = key.split('/')[1]
        date = date.split('(')[1]
        date = date.replace(')', '')
        # dates.append((n, date))
        date = int(date)
        if date > latest_date:
            latest_date = date
            # print(date)
            latest_act = key
# print(type(latest_date), latest_date)
latest_date_plaintext = datetime.datetime.fromtimestamp(int(latest_date)/1000).strftime('%Y-%m-%d')
print(latest_act, latest_date_plaintext)

TCA2005-/Date(1715270400000)/ 2024-05-10


In [24]:
# load the relevant xml file
with open('TCA2005-Date(1715270400000)_alt.json', 'r') as f:
    test_act = json.load(f)

In [43]:
test_act

{'provisions': [{'id': 'No ID',
   'number': '1',
   'headers': [],
   'text': ['1.\xa0\xa0This Act is the Trust Companies Act 2005.'],
   'amendments': [],
   'nested_provisions': [],
   'context': {'part': {'id': 'P11-',
     'number': 'PART 1',
     'header': 'PRELIMINARY'},
    'division': None,
    'section': None}},
  {'id': 'No ID',
   'number': '2',
   'headers': [],
   'text': ['2.\xa0\xa0In this Act, unless the context otherwise requires\xa0—“advocate and solicitor” means an advocate and solicitor of the Supreme Court or a foreign lawyer as defined in section\xa02(1) of the Legal Profession Act\xa01966;“Authority” means the Monetary Authority of Singapore established under the Monetary Authority of Singapore Act\xa01970;“book” includes any record, register, account, deed, writing and information, however compiled, recorded or stored, whether in written or printed form or on microfilm or in any other electronic form or otherwise;“capital markets products” has the meaning given

In [63]:
human_message_template = "This is the provision: REPLACE"

In [92]:
# Expect dictionaries
answers = []

In [93]:
def invoke(prompt):
    answer = chain.invoke({"text": prompt})
    return answer

In [94]:
# convert provision text to plain text
for n, provision in enumerate(test_act['provisions']):
    print(f'Processing provision no. {n}')
    num = provision['number']
    text = provision['text'][0]
    prompt = human_message_template.replace('REPLACE', text)
    try:
        answer = invoke(prompt)
    except Exception as e:
        print(e)
        # We try again
        try:
            answer = invoke(prompt)
        except Exception as e:
            print(e)
            print('Failed to get answer for provision', num)
            answer = None
    print(num, ' - ', answer)
    to_add = {
        'num': num,
        'text': text,
        'return': answer
    }
    answers.append(to_add)
    # so we don't hit the rate limit
    time.sleep(5)

Processing provision no. 0
1  -  scenario='A company is established under the Trust Companies Act 2005.' outcome='The company is a trust company.' edge_case='The company is not a trust company, but it is established under the Trust Companies Act 2005.'
Processing provision no. 1
2  -  scenario='A foreign lawyer is trying to understand the definition of an advocate and solicitor in the context of the Monetary Authority of Singapore Act.' outcome='The lawyer understands that an advocate and solicitor refers to an advocate and solicitor of the Supreme Court or a foreign lawyer as defined in section 2(1) of the Legal Profession Act 1966.' edge_case='A foreign lawyer is trying to understand the definition of an advocate and solicitor in the context of the Monetary Authority of Singapore Act.'
Processing provision no. 2
3  -  scenario='A foreign company wants to establish a trust business in Singapore.' outcome='The company will be guilty of an offence and liable to a fine and/or imprisonmen

In [60]:
# # save answers as jsonlines file
# # This is the first run
# with open('tca2015-l4_scenario_test-simple.jl', 'w') as f:
#     for answer in answers:
#         pydantic_class = answer['return']
#         # convert pydantic class to tuple
#         tup = (pydantic_class.scenario, pydantic_class.outcome, pydantic_class.edge_case)
#         answer['return'] = tup
#         f.write(json.dumps(answer))

In [95]:
# save answers as jsonlines file
# This is the second run where I changed the prompts to get it to provide a richer set of facts
# No discernible difference despite the changes
with open('tca2015-l4_scenario_test-comprehensive.jl', 'w') as f:
    for answer in answers:
        pydantic_class = answer['return']
        # convert pydantic class to tuple
        tup = (pydantic_class.scenario, pydantic_class.outcome, pydantic_class.edge_case)
        answer['return'] = tup
        f.write(json.dumps(answer))