In [1]:
import os
import json

import rag_functions as rag
import llm_functions as llm

#### Choose documents

In [2]:
data_path = "/home/xinmeng/cpib/data"
files = {0: os.path.join(data_path, "2022_SGHC_5.pdf"), 
         1: os.path.join(data_path, "2017_SGHC_225.pdf")
        }

#### Generate document information

In [3]:
rag.initiate()

raw_elements = rag.get_raw_elements(files, generate_new=False)
info = rag.get_info(files, raw_elements, generate_new=False)

info.keys()

dict_keys([0, 1])

#### Convert information to embeddings

In [4]:
embedding_model_path = "/home/xinmeng/cpib/models/gte-large"

generateEmbeddings = rag.MyEmbeddingFunction(embedding_model_path=embedding_model_path)

collections = rag.get_collections(generateEmbeddings, info, generate_new=False)

# collections[0].peek()      # returns a list of the first 10 items in the collection
# collections[0].count()     # returns the number of items in the collection

Stored collection retrieved: "data_0"
Stored collection retrieved: "data_1"


In [5]:
generateQueryEmbeddings = rag.MyEmbeddingFunction(embedding_model_path=embedding_model_path, query=True)

#### Initiate LLM 

In [6]:
mistral_7b_orca_model_path = "/home/xinmeng/cpib/models/Mistral-7B-OpenOrca-GPTQ"

llm.initiate_mistral(mistral_7b_orca_model_path)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
CUDA extension not installed.
CUDA extension not installed.


#### Test Q&A

In [7]:
query = "who are the accused"

results = rag.get_results(generateQueryEmbeddings, collections[0], query, 5)
relevant_results = rag.convert_results(results, info[0]["contexts"])
ans = llm.generate_ans(query, relevant_results)
llm.get_ans(ans)



'The appellant and Yong are the accused.'

#### Information received from Case Info

In [7]:
subjects = ['Derek Gordon Pigg, the appellant', 'Yong Hock Guan Dennis', 'Ong Eng Kee']

details = {}

other_details = {'Date of charge': None,
                 'Period of crime': None,
                 'Summary of crime': None}

#### Prompt examples

In [8]:
with open('prompt_examples.json', 'r') as f:
    prompt_examples = json.load(f)

In [9]:
prompt_examples.keys()

dict_keys(['example_case', 'example_case_short_1', 'example_case_short_2', 'example_case_short_3', 'example_david', 'example_emma', 'example_deets_david', 'example_deets_emma', 'example_crime_david', 'example_crime_emma', 'example_testimony_david', 'example_testimony_emma'])

In [10]:
example_case = prompt_examples['example_case']
example_david = prompt_examples['example_david']
example_emma = prompt_examples['example_emma']
example_deets_david = prompt_examples['example_deets_david']
example_deets_emma = prompt_examples['example_deets_emma']
example_crime_david = prompt_examples['example_crime_david']
example_crime_emma = prompt_examples['example_crime_emma']
example_testimony_david = prompt_examples['example_testimony_david']
example_testimony_emma = prompt_examples['example_testimony_emma']

In [11]:
query = f"Details of {subjects[0]}. \
          nickname, job title, role, company, organisation? \
          How many charges? Form of gratification. Is subject guilty"

results = rag.get_results(generateQueryEmbeddings, collections[0], query, 8)
for i in rag.convert_results(results, info[0]["contexts"]).split('\n'):
    print(i, '\n')

  

Footnote 1 of Public Prosecutor v Derek Gordon Pigg [2020] SGDC 278 (“GD”). 6 ROP at pp 8–15. ROP at pp 39, 541–542 and 1988 (NE dated 10 April 2018 at p 11 lines 4–11; NE dated 22 October 2018 at p 102 line 19 to p 103 line 10; Prosecution’s Closing Submissions dated 14 August 2019 (“PCS”) at para 28(a)). ROP at pp 40, 541 and 1988 (NE dated 10 April 2018 at p 12 lines 10–23; NE dated 22 October 2018 at p 102 lines 19–23; PCS at para 28(b)).  

8 The District Judge accepted Yong’s evidence and convicted the appellant. His key findings are as follows (see Public Prosecutor v Derek Gordon Pigg [2020] SGDC 278 (“GD”) at [28]–[36], [37]–[55] and [61]–[62]):  

Wong Hin Pkin Wendell and Andrew Chua Ruiming (Drew & Napier LLC) for the appellant and applicant; David Koh and Janice See (Attorney-General’s Chambers) for the respondent.  

appellant’s appeal against his conviction for all eight charges under s 6(a) of the PCA.  

45 Appellant’s Submissions at para 8. Appellant’s Submissions

In [None]:
for s in subjects:
    deets = llm.generate_details(generateQueryEmbeddings, collections[0], info[0]["contexts"], s, 
                                 example_case, example_david, example_deets_david, 
                                 example_case, example_emma, example_deets_emma, stream=False)
    details[s] = llm.get_details(deets, s)
details

In [None]:
for s in subjects:
    crime = llm.generate_crime(generateQueryEmbeddings, collections[0], info[0]["contexts"], s, 
                                 example_case, example_david, example_crime_david, 
                                 example_case, example_emma, example_crime_emma, stream=False)
    details[s].update(llm.get_details(crime, s))
details

In [13]:
for s in subjects:
    testimony = llm.generate_testimony(generateQueryEmbeddings, collections[0], info[0]["contexts"], s, 
                                       example_case, example_david, example_testimony_david, 
                                       example_case, example_emma, example_testimony_emma, stream=False)
    details[s]['Testimony'] = llm.get_testimony(testimony, s)
details

{'Derek Gordon Pigg': {'Nick': 'Derek',
  'Occupation': 'Manager',
  'Organisation': 'Transocean Eastern Pte Ltd',
  'Company nick': 'Transocean',
  'Number of charges': '8',
  'Role': 'Receiver',
  'Form of gratification': 'Money',
  'Charge': 'NA',
  'Admission to guilt': 'Yes',
  'Testimony': 'NA.'},
 'Yong Hock Guan Dennis': {'Nick': 'Yong',
  'Occupation': 'Senior sales manager',
  'Organisation': 'Mid-Continent Tubular Pte Ltd',
  'Company nick': 'MCT',
  'Number of charges': '15',
  'Role': 'Giver',
  'Form of gratification': 'Money',
  'Charge': 's 6(a) and 6(b) of the Prevention of Corruption Act',
  'Admission to guilt': 'Yes',
  'Testimony': 'According to the testification by Yong Hock Guan Dennis, the amount of bribe that was paid or received was not fixed but generally set aside 1 to 2% of the total price.'},
 'Ong Eng Kee': {'Nick': 'Ong',
  'Occupation': 'Managing Director',
  'Organisation': 'MCT',
  'Company nick': 'MCT',
  'Number of charges': '15',
  'Role': 'Giver',

In [11]:
details = {'Derek Gordon Pigg': {'Nick': 'Derek',
  'Occupation': 'Manager',
  'Organisation': 'Transocean Eastern Pte Ltd',
  'Company nick': 'Transocean',
  'Number of charges': '8',
  'Role': 'Receiver',
  'Form of gratification': 'Money',
  'Charge': 'NA',
  'Admission to guilt': 'Yes'},
 'Yong Hock Guan Dennis': {'Nick': 'Yong',
  'Occupation': 'Senior sales manager',
  'Organisation': 'Mid-Continent Tubular Pte Ltd',
  'Company nick': 'MCT',
  'Number of charges': '15',
  'Role': 'Giver',
  'Form of gratification': 'Money',
  'Charge': 's 6(a) and 6(b) of the Prevention of Corruption Act',
  'Admission to guilt': 'Yes'},
 'Ong Eng Kee': {'Nick': 'Ong',
  'Occupation': 'Managing Director',
  'Organisation': 'MCT',
  'Company nick': 'MCT',
  'Number of charges': '15',
  'Role': 'Giver',
  'Form of gratification': 'Money',
  'Charge': 'Section 6(b) of the Prevention of Corruption Act',
  'Admission to guilt': 'Yes'}}

In [14]:
for s in subjects:
    passport = llm.generate_passport(generateQueryEmbeddings, collections[0], info[0]["contexts"], s, stream=True)
    print(llm.get_passport(passport, s))
#     details[s]['Passport'] = llm.get_passport(testimony, s)
# details

Based on the provided context, the age, gender, and nationality of the subject are not available. Therefore, the answer is: <NA>
Based on the provided context, the age, gender, and nationality of the subject are not available.
Based on the provided context, the age, gender, and nationality of the subject are not available. Therefore, the answer is: <NA>


In [22]:
summary = llm.generate_summary(generateQueryEmbeddings, collections[0], info[0]["contexts"], details, stream=True)
other_details['Summary of crime'] = llm.get_summary(summary)
other_details

{'Date of charge': None,
 'Period of crime': None,
 'Summary of crime': 'In this case, Yong Hock Guan Dennis was charged with corruption and criminal breach of trust. He pleaded guilty in 2015 and faced multiple charges, including conspiring with Ong Eng Kee to pay a bribe of S$78,806 to Derek Gordon Pigg, another charge for conspiring with Ong to pay a bribe of S$99,008 to Derek (which was not paid), and an amalgamated charge of criminal breach of trust for keeping part of the "marketing expenses" for himself on five occasions in 2008. Yong also admitted and consented to have 31 other charges taken into consideration for sentencing, including nine counts of abetting the giving of corrupt gratification and two counts of criminal breach of trust. Derek Gordon Pigg was convicted on eight charges under the Prevention of Corruption Act for accepting gratification from Yong. He was sentenced to 15 months\' imprisonment and ordered to pay a penalty of S$270,000.'}

In [17]:
for s in subjects:
    chargedate = llm.generate_chargeDate(generateQueryEmbeddings, collections[0], info[0]["contexts"], s, details, stream=False)
    details[s]['Charge date'] = llm.get_chargeDate(chargedate)
details

{'Derek Gordon Pigg': {'Nick': 'Derek',
  'Occupation': 'Manager',
  'Organisation': 'Transocean Eastern Pte Ltd',
  'Company nick': 'Transocean',
  'Number of charges': '8',
  'Role': 'Receiver',
  'Form of gratification': 'Money',
  'Charge': 'NA',
  'Admission to guilt': 'Yes',
  'Charge date': 'May 2017'},
 'Yong Hock Guan Dennis': {'Nick': 'Yong',
  'Occupation': 'Senior sales manager',
  'Organisation': 'Mid-Continent Tubular Pte Ltd',
  'Company nick': 'MCT',
  'Number of charges': '15',
  'Role': 'Giver',
  'Form of gratification': 'Money',
  'Charge': 's 6(a) and 6(b) of the Prevention of Corruption Act',
  'Admission to guilt': 'Yes',
  'Charge date': '2015'},
 'Ong Eng Kee': {'Nick': 'Ong',
  'Occupation': 'Managing Director',
  'Organisation': 'MCT',
  'Company nick': 'MCT',
  'Number of charges': '15',
  'Role': 'Giver',
  'Form of gratification': 'Money',
  'Charge': 'Section 6(b) of the Prevention of Corruption Act',
  'Admission to guilt': 'Yes',
  'Charge date': '2015'

#### Display on Gradio

In [12]:
import gradio as gr
from gradio.themes.base import Base

import aspose.pdf as ap
import fitz
import string
import pickle
import os

In [21]:
subjects = ['Derek Gordon Pigg', 'Yong Hock Guan Dennis', 'Ong Eng Kee']

details = {'Derek Gordon Pigg': {'Nick': 'Derek',
  'Occupation': 'Manager',
  'Organisation': 'Transocean Eastern Pte Ltd',
  'Company nick': 'Transocean',
  'Number of charges': '8',
  'Role': 'Receiver',
  'Form of gratification': 'Money',
  'Charge': 'NA',
  'Admission to guilt': 'Yes',
  'Charge date': 'May 2017'},
 'Yong Hock Guan Dennis': {'Nick': 'Yong',
  'Occupation': 'Senior sales manager',
  'Organisation': 'Mid-Continent Tubular Pte Ltd',
  'Company nick': 'MCT',
  'Number of charges': '15',
  'Role': 'Giver',
  'Form of gratification': 'Money',
  'Charge': 's 6(a) and 6(b) of the Prevention of Corruption Act',
  'Admission to guilt': 'Yes',
  'Charge date': '2015'},
 'Ong Eng Kee': {'Nick': 'Ong',
  'Occupation': 'Managing Director',
  'Organisation': 'MCT',
  'Company nick': 'MCT',
  'Number of charges': '15',
  'Role': 'Giver',
  'Form of gratification': 'Money',
  'Charge': 'Section 6(b) of the Prevention of Corruption Act',
  'Admission to guilt': 'Yes',
  'Charge date': '2015'}}

other_details = {'Date of charge': None,
 'Period of crime': None,
 'Summary of crime': 'In this case, Yong Hock Guan Dennis was charged with corruption and criminal breach of trust. He pleaded guilty in 2015 and faced multiple charges, including conspiring with Ong Eng Kee to pay a bribe of S$78,806 to Derek Gordon Pigg, another charge for conspiring with Ong to pay a bribe of S$99,008 to Derek (which was not paid), and an amalgamated charge of criminal breach of trust for keeping part of the "marketing expenses" for himself on five occasions in 2008. Yong also admitted and consented to have 31 other charges taken into consideration for sentencing, including nine counts of abetting the giving of corrupt gratification and two counts of criminal breach of trust. Derek Gordon Pigg was convicted on eight charges under the Prevention of Corruption Act for accepting gratification from Yong. He was sentenced to 15 months\' imprisonment and ordered to pay a penalty of S$270,000.'}

In [13]:
# subjects = ['Derek Gordon Pigg, the appellant', 'Yong Hock Guan Dennis', 'Ong Eng Kee']
subjects = ['Heng Tze Yong', 'Ong Seng Wee']
database = collections[1]
contexts = info[1]["contexts"]


def step_1():
    details = dict()
    other_details = {'Date of charge': None,
                     'Period of crime': None,
                     'Summary of crime': None}

    ## cannot combine the loop for chargedate because it requires details
    for s in subjects:
        deets = llm.generate_details(generateQueryEmbeddings, database, contexts, s, 
                                     example_case, example_david, example_deets_david, 
                                     example_case, example_emma, example_deets_emma, stream=False)
        details[s] = llm.get_details(deets, s)
    for s in subjects:
        crime = llm.generate_details(generateQueryEmbeddings, database, contexts, s, 
                                     example_case, example_david, example_crime_david, 
                                     example_case, example_emma, example_crime_emma, stream=False)
        details[s].update(llm.get_crime(crime, s))
    for s in subjects:
        testimony = llm.generate_testimony(generateQueryEmbeddings, database, contexts, s, 
                                           example_case, example_david, example_testimony_david, 
                                           example_case, example_emma, example_testimony_emma, stream=False)
        details[s]['Testimony'] = llm.get_testimony(testimony, s)
    for s in subjects:
        chargedate = llm.generate_chargeDate(generateQueryEmbeddings, database, contexts, s, details, stream=False)
        details[s]['Charge date'] = llm.get_chargeDate(chargedate)

    # print(details)
    summary = llm.generate_summary(generateQueryEmbeddings, database, contexts, details, stream=False)
    other_details['Summary of crime'] = llm.get_summary(summary)

    return details, other_details


def step_2(details=details, other_details=other_details):

    html = f"""
            On &lt;date of charge&gt;, {len(details)} individuals have been charged in Court for \
            allegedly committing corruption: <br><br>"""
    
    for i in range(len(details)):
        subject = list(details.keys())[i]
        if i < len(details) - 1:
            html += f"""
                    {string.ascii_lowercase[i]}) {subject} ("{details[subject]['Nick']}") \
                    (&lt;chinese name&gt;, &lt;age&gt; &lt;gender&gt; &lt;nationality&gt;), \
                    {details[subject]['Occupation']} at {details[subject]['Organisation']} \
                    ("{details[subject]['Company nick']}"), at the material time; and <br><br>"""
        else:
            html += f"""
                    {string.ascii_lowercase[i]}) {subject} ("{details[subject]['Nick']}") \
                    (&lt;chinese name&gt;, &lt;age&gt; &lt;gender&gt; &lt;nationality&gt;), \
                    {details[subject]['Occupation']} at {details[subject]['Organisation']} \
                    ("{details[subject]['Company nick']}"), at the material time."""

    html += f"<br><br>"
    html += f"2. On/Between &lt;period of crime&gt;, {other_details['Summary of crime']}"

    html += f"<br><br>"
    html += f"3. For their alleged actions, "
    for subject in details.keys():
        html += f"{subject} faces {details[subject]['Number of charges']} charge(s) under the {details[subject]['Charge']}. "

    html += f"<br><br>"
    html += f"""
            4. Singapore adopts a strict zero-tolerance approach towards corruption. Any person who is \
            convicted of a corruption offence under Section 6 of the Prevention of Corruption Act \
            can be fined up to S$100,000 or sentenced to imprisonment of up to five years or both. 
            <br> <br>
            5. CPIB looks into all corruption-related complaints and reports, including anonymous \
            ones, and can be reached via the following channels: <br>
            a) Lodge an e-Complaint; <br>
            b) Email us at report@cpib.gov.sg; <br>
            c) Call the Duty Officer at 1800-376-0000; or <br>
            d) Write to us at the CPIB Headquarters @ 2 Lengkok Bahru, S159047.
            <br> <br>
            6. Where possible, the report should include the following information: <br>
            a) Where, when and how the alleged corrupt act happened? <br>
            b) Who was involved and what were their roles? <br>
            c) What was the bribe given and the favour shown?
            <br> <br>
            Corrupt Practices Investigation Bureau"""

    return html

def generate():
    details, other_details = step_1()
    return step_2(details=details, other_details=other_details)


with gr.Blocks() as demo:

    gr.Markdown("<center><h1> Press Release Generation </h1></center")

    import fitz
    
    def upload_file(file):
        # doc = fitz.open(file)
        # document = ap.Document(file)
        # document.save(f"{file.name}.pdf")
        with open("test.pickle", "wb") as f:
            pickle.dump(file, f)
        
        return file.name
  
    
    file_output = gr.File()
    upload_button = gr.UploadButton("Click to Upload a File", file_types=None)
    upload_button.upload(upload_file, upload_button, file_output)

    btn = gr.Button(value="Generate", variant="primary", elem_id="warning", elem_classes="feedback")
    txt_1 = gr.HTML(label="test")#, lines=1)
    

    btn.click(generate, inputs=[], outputs=[txt_1])

    


    demo.launch(share=True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://5faf08ca0252678ec4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




{'Heng Tze Yong': {'Nick': 'Heng', 'Occupation': 'Appellant', 'Organisation': 'NA', 'Company nick': 'NA', 'Role': 'Receiver', 'Number of charges': '2', 'Form of gratification': 'Money', 'Charge': 'NA', 'Admission to guilt': 'Yes', 'Testimony': 'The amount of bribe that was paid or received is S$3,000 for the first bribe, S$7,000 for the second bribe, and NA for the TIC charge.', 'Charge date': '8 March 2017'}, 'Ong Seng Wee': {'Nick': 'Ben Ong', 'Occupation': 'Facility Manager', 'Organisation': 'Micron Semiconductor Asia Pte Ltd', 'Company nick': 'Micron', 'Role': 'Receiver', 'Number of charges': '2', 'Form of gratification': 'Money', 'Charge': 'NA', 'Admission to guilt': 'Yes', 'Testimony': 'S$3,000 and S$7,000.', 'Charge date': 'NA'}}
