In [1]:
%pip install PyPDF2

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
Installing collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Note: you may need to restart the kernel to use updated packages.


In [3]:


import PyPDF2

pdfpath = "ukpga_20250022_en.pdf"  


def extracttext(pdf_file):
    text = ""
    with open(pdf_file, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text.strip()

raw_text = extracttext(pdfpath)

print(raw_text[:1000])  # print first 1000 chars


Universal  Credit  Act 
2025 
CHAPTER  22 
Explanatory  Notes  have  been  produced  to assist  in the 
understanding  of this Act and are available  separately 
£8.90

Universal  Credit  Act 2025 
CHAPTER  22 
CONTENTS 
Universal  credit 
1 Standard  allowance  for tax years  2026-27  to 2029-30 
2 LCWRA  element  for tax year  2026-27 
3 Freeze  of LCWRA  and LCW  elements  for tax years  2026-27  to 2029-30 
4 Protected  LCWRA  amount  for tax years  2026-27  to 2029-30 
5 Legacy  employment  and support  allowance  payments 
Corresponding  provision  for Northern  Ireland 
6 Corresponding  provision  for Northern  Ireland 
Short  title 
7 Short  title 
Amendments  to the Universal  Credit  Regulations  2013  in 
connection  with  new  amounts  of the LCWRA  element Schedule  1 —  
Northern  Ireland:  corresponding  provision Schedule  2 —  

c. 22 CHARLES  III 
Universal  Credit  Act 2025 
2025  CHAPTER  22 
An Act to make  provision  to alter  the rates  of the standard  allowance

In [None]:
import re

def clean_text(text: str) -> str:
    # remove extra spaces and line breaks
    text = text.replace("\r", " ")
    text = text.replace("\n", " ")
    
    # collapse multiple spaces into one
    text = re.sub(r"\s+", " ", text)
    
    #     the       test     is       #    ->                  #the best is#                          trimmed
    text = text.strip()
    return text

act_text = clean_text(raw_text)

print("Text length:", len(act_text))
print(act_text[:1000])  # preview first 1000 chars


Text length: 35422
Universal Credit Act 2025 CHAPTER 22 Explanatory Notes have been produced to assist in the understanding of this Act and are available separately £8.90 Universal Credit Act 2025 CHAPTER 22 CONTENTS Universal credit 1 Standard allowance for tax years 2026-27 to 2029-30 2 LCWRA element for tax year 2026-27 3 Freeze of LCWRA and LCW elements for tax years 2026-27 to 2029-30 4 Protected LCWRA amount for tax years 2026-27 to 2029-30 5 Legacy employment and support allowance payments Corresponding provision for Northern Ireland 6 Corresponding provision for Northern Ireland Short title 7 Short title Amendments to the Universal Credit Regulations 2013 in connection with new amounts of the LCWRA element Schedule 1 — Northern Ireland: corresponding provision Schedule 2 — c. 22 CHARLES III Universal Credit Act 2025 2025 CHAPTER 22 An Act to make provision to alter the rates of the standard allowance, limited capability for work element and limited capability for work and work-

In [6]:
%pip install groq

Collecting groq
  Downloading groq-0.36.0-py3-none-any.whl.metadata (16 kB)
Downloading groq-0.36.0-py3-none-any.whl (137 kB)
Installing collected packages: groq
Successfully installed groq-0.36.0
Note: you may need to restart the kernel to use updated packages.


In [35]:


import os
from getpass import getpass
from groq import Groq

# 1. set API key safely (DON'T hardcode, this won't go to GitHub if you don't print it)
os.environ["GROQ_API_KEY"] = getpass("Enter your GROQ API key: ")

# 2. init client
client = Groq(api_key=os.environ["GROQ_API_KEY"])

def summarize_act(text: str) -> str:
    system_msg = (

        "You are a legal summarisation assistant. Summarise only based on the exact wording provided from the Universal Credit Act 2025. "
        "Produce 5–10 short bullet points that cover:\n"
        "- Purpose of the Act\n"
        "- Key definitions present in the Act\n"
        "- Eligibility for the rate changes (not overall eligibility for Universal Credit) - Obligations or responsibilities of authorities - Enforcement or penalties state clearly if none exist\n"
"- Obligations or responsibilities of authorities\n"
"-  Enforcement or penalties (state clearly if none exist)\n"


    )

    user_msg = (
        "Here is the full act text. "
        "Return ONLY bullet points, no intro, no conclusion.\n\n"
        f"{text}"
    )

    resp = client.chat.completions.create(
        model="meta-llama/llama-4-scout-17b-16e-instruct",  # or any Groq chat model you like
        temperature=0.2,
        messages=[
            {"role": "system", "content": system_msg},
            {"role": "user", "content": user_msg},
        ],
    )

    return resp.choices[0].message.content

summary_bullets = summarize_act(act_text)
print(summary_bullets)


* The purpose of the Universal Credit Act 2025 is to alter the rates of the standard allowance, limited capability for work element, and limited capability for work and work-related activity element of universal credit.
* Key definitions:
  * "The standard allowance" means the allowance to be included in an award of universal credit.
  * "The LCWRA element" and "the LCW element" refer to specific elements of universal credit.
  * "Pre-2026 claimant", "severe conditions criteria claimant", and "claimant who is terminally ill" are defined in relation to the LCWRA element.
* Eligibility for rate changes:
  * The Act specifies that certain amounts (standard allowance, LCWRA element, etc.) must be increased by specific percentages for tax years 2026-27 to 2029-30.
  * The Secretary of State must exercise powers to secure these increases.
* Obligations or responsibilities of authorities:
  * The Secretary of State must exercise powers to increase certain amounts of universal credit.
  * The 

In [37]:
import json
from groq import Groq
import os


def extract_sections(text: str) -> dict:
    system_msg = (
        "You are a legal extraction assistant. Extract information ONLY from the given Universal Credit Act 2025 text. "
        "Return STRICT JSON with the keys:"
        "definitions, obligations, responsibilities, eligibility, payments, penalties, record_keeping.Payments ,Entitlements , Enforcement ,Reporting "
        "Rules:"
"- Use exact concepts and descriptions found in the Act."
"- Eligibility refers to eligibility for benefit RATES or AMOUNTS mentioned in the Act, not eligibility for Universal Credit in general."
"- If a section does not exist in the Act, return an empty string for that key."
"- Do not estimate or invent numbers beyond what is explicitly written. If the Act describes a formula instead of exact values, mention the formula, not specific results."
"- Do not include commentary or interpretation."
"- Return ONLY JSON. No explanations or text outside JSON."
    )

    user_msg = (
        "Here is the full Act text. "
        "Return ONLY valid JSON, no explanation, no markdown, no extra text.\n\n"
        f"{text}"
    )

    resp = client.chat.completions.create(
        model="meta-llama/llama-4-scout-17b-16e-instruct",
        temperature=0.2,
        messages=[
            {"role": "system", "content": system_msg},
            {"role": "user", "content": user_msg},
        ],
    )

    raw_content = resp.choices[0].message.content

    
    if raw_content.startswith("```"):
        # remove first ```... line
        raw_content = re.sub(r"^```[a-zA-Z0-9]*\s*", "", raw_content)
        # remove trailing ```
        if raw_content.endswith("```"):
            raw_content = raw_content[:-3].strip()

    # fallback: just take from first { to last }
    if "{" in raw_content and "}" in raw_content:
        raw_content = raw_content[raw_content.find("{"): raw_content.rfind("}")+1]

    data = json.loads(raw_content)

    
   
    # print(data)
    return data

sections = extract_sections(act_text)

print(json.dumps(sections, indent=2, ensure_ascii=False))

# save partial result (for your final submission later)
with open("sections_output.json", "w", encoding="utf-8") as f:
    json.dump(sections, f, indent=2, ensure_ascii=False)

print("Saved to sections_output.json")


{
  "definitions": {
    "consumer prices index": "all items consumer prices index published by the Statistics Board",
    "relevant power": "the power in section 9(2) of the Welfare Reform Act 2012 or the power in section 150(2)(b) of the Social Security Administration Act 1992",
    "standard allowance": "the allowance to be included in an award of universal credit under section 9(1) of the Welfare Reform Act 2012",
    "tax year": "the 12 months beginning with 6 April in any year",
    "LCWRA element": "limited capability for work and work-related activity element",
    "LCW element": "limited capability for work element"
  },
  "obligations": {
    "Secretary of State": "must exercise a relevant power to secure that the amounts of the standard allowance for tax years 2026-27 to 2029-30 are at least the amounts calculated",
    "Department for Communities in Northern Ireland": "must exercise a relevant power to secure that the amounts of the standard allowance for tax years 2026-27 

In [38]:
import json
import re
from groq import Groq
import os


def clean_to_json_array(text: str) -> str:
    """Strip ``` fences etc and keep only the JSON list."""
    t = text.strip()

    # remove markdown fences like ```json ... ```
    if t.startswith("```"):
        # remove first ```... line (``` or ```json)
        t = re.sub(r"^```[a-zA-Z0-9]*\s*", "", t)
        # remove last ``` if present
        t = re.sub(r"```$", "", t).strip()

    # now try to cut from first '[' to last ']'
    if "[" in t and "]" in t:
        t = t[t.find("["): t.rfind("]") + 1]

    return t.strip()

def run_rule_checks(full_text: str, sections: dict) -> list:
    rules_desc = """
1. Act must define key terms.
2. Act must specify eligibility criteria.
3. Act must specify responsibilities of the administering authority.
4. Act must include enforcement or penalties.
5. Act must include payment calculation or entitlement structure.
6. Act must include record-keeping or reporting requirements.
"""

    system_msg = (
        "You are a legal compliance checker. "
        "Given the full text of the Universal Credit Act 2025 and extracted sections, "
        "evaluate each rule and return STRICT JSON ONLY."
        "- Do not infer penalties or record keeping if they do not appear."
"- Eligibility refers ONLY to who receives the adjusted amounts (not Universal Credit generally)."
"- Do not invent text or rewrite unclear clauses."
"- Return ONLY a JSON array. No text outside JSON."
    )

    user_msg = f"""
FULL ACT TEXT:
{full_text[:15000]}

EXTRACTED SECTIONS (JSON):
{json.dumps(sections, indent=2)}

RULES TO CHECK:
{rules_desc}

For each rule, return an object with:
- rule: exact wording of the rule
- status: "pass" or "fail"
- evidence: short quote or section reference
- confidence: integer 0–100

Return ONLY a JSON array like:
[
  {{
    "rule": "...",
    "status": "...",
    "evidence": "...",
    "confidence": 90
  }},
  ...
]
No markdown, no explanation, no comments.
"""

    resp = client.chat.completions.create(
        model="meta-llama/llama-4-scout-17b-16e-instruct",
        temperature=0.2,
        messages=[
            {"role": "system", "content": system_msg},
            {"role": "user", "content": user_msg},
        ],
    )

    raw = resp.choices[0].message.content
    cleaned = clean_to_json_array(raw)

    try:
        data = json.loads(cleaned)
    except json.JSONDecodeError as e:
        print("=== RAW MODEL OUTPUT ===")
        print(raw)
        print("=== CLEANED TEXT WE TRIED TO PARSE ===")
        print(cleaned)
        raise e

    if not isinstance(data, list):
        raise ValueError("Expected a JSON array (list) of rule objects.")

    for item in data:
        item.setdefault("rule", "")
        item.setdefault("status", "")
        item.setdefault("evidence", "")
        item.setdefault("confidence", 0)

    return data

rules_result = run_rule_checks(act_text, sections)

print(json.dumps(rules_result, indent=2, ensure_ascii=False))

with open("rules_output.json", "w", encoding="utf-8") as f:
    json.dump(rules_result, f, indent=2, ensure_ascii=False)

print("Saved to rules_output.json")


[
  {
    "rule": "Act must define key terms.",
    "status": "pass",
    "evidence": "\"consumer prices index\" means the all items consumer prices index published by the Statistics Board;\"",
    "confidence": 100
  },
  {
    "rule": "Act must specify eligibility criteria.",
    "status": "fail",
    "evidence": "",
    "confidence": 0
  },
  {
    "rule": "Act must specify responsibilities of the administering authority.",
    "status": "pass",
    "evidence": "The Secretary of State must exercise a relevant power in order to secure that the amounts of the standard allowance for tax years 2026-27 to 2029-30 are at least the amounts calculated for each tax year in accordance with subsection (2).",
    "confidence": 100
  },
  {
    "rule": "Act must include enforcement or penalties.",
    "status": "fail",
    "evidence": "",
    "confidence": 0
  },
  {
    "rule": "Act must include payment calculation or entitlement structure.",
    "status": "pass",
    "evidence": "The minimum a