# Group 6

Group Members:
1.   Chris Devoe
2.   Ife Kayode
3.   Michael Moore
4.   Emmanuel Opoku
5.   Aakash Subedi

# Get the data from the Legal Stories GitHub repository:

In [None]:
# Install Dependencies
#!pip install torch==2.9.0+cu126 torchvision==0.24.0+cu126 torchaudio==2.9.0+cu126 --index-url https://download.pytorch.org/whl/cu126
import os
#!pip install -q --upgrade torch
#!pip install -q transformers triton==3.4 kernels
#!pip uninstall -q torchvision torchaudio -y
os.environ["HF_HOME"] = "/content/drive/MyDrive/hf_cache"
os.makedirs("/content/drive/MyDrive/hf_cache", exist_ok=True)


# # Get files
!wget https://raw.githubusercontent.com/hjian42/LegalStories/main/data/101-doctrines/legal_doctrines_101.tsv
!wget https://raw.githubusercontent.com/hjian42/LegalStories/main/data/101-doctrines/gpt3.5_story_question_101.tsv

In [None]:
# Legal Doctrines 101 file
import csv
def load_doctrines():
  # Format: concept	intro_text	word_count
  with open('legal_doctrines_101.tsv', newline='') as f:
    reader = csv.DictReader(f, delimiter='\t')
    data = list(reader)
  return data

# GPT 3.5 Story Questions 101 file
def load_story_questions():
  # Format: concept	intro_text	story	concept_question	ending_question	limitation_question
  with open('gpt3.5_story_question_101.tsv', newline='') as f:
    reader = csv.DictReader(f, delimiter='\t')
    data = list(reader)
  return data

doctrines_data = load_doctrines()
questions_data = load_story_questions()

print("Doctrines Data: ")
for row in doctrines_data:
  print(row)

print("\nStory Questions Data: ")
for row in questions_data:
  print(row)


# Prepare Data for Finetuning

In [None]:
import sys
import os
import json

def get_question_and_concept(question):
  prompt =""
  completion = ""
  prompt += "Concept: "
  prompt += question["concept"]
  prompt += ". Summary Text: "
  prompt += question["intro_text"]

  completion += "Story: "
  completion += question["story"]
  # May not need concept question for what we want to do, can be added if needed
  # completion += " Concept Question: "
  # completion += question["concept_question"]
  # completion += " Limitation Question: "
  # completion += question["limitation_question"]
  completion += " Question: "
  completion += question["ending_question"]
  completion_new = completion.replace("\n\n", "\n")
  return prompt, completion_new


def prepare_questions_finetuning_data(raw_data, filename='finetuning_data_questions.jsonl'):
  finetuning_data = []
  for question in raw_data:
    data = {}
    prompt, completion = get_question_and_concept(question)
    data["messages"] = [{"role":"user", "content":prompt}, {"role":"assistant", "content":completion}]
    finetuning_data.append(data)
  with open(filename, 'w') as out:
    for data in finetuning_data:
      out.write(json.dumps(data))
      out.write('\n')

prepare_questions_finetuning_data(questions_data)


In [None]:
# Print head of file
!head finetuning_data_questions.jsonl

# Finetune GPT3.5

In [None]:
# @title
import os
import openai

print("Enter openai api key: ")
openai.api_key = input()

os.environ['OPENAI_API_KEY']=openai.api_key

In [None]:
client = openai.OpenAI()

client.files.create(
    file=open("finetuning_data_questions.jsonl",'rb'),
    purpose="fine-tune"
)

In [None]:
print('Training File ID:')
training_file = input()

In [None]:
client.fine_tuning.jobs.create(
  training_file=training_file,
  model="gpt-3.5-turbo-0125"
)

In [None]:
# Choose a random topic for the case
import random
topic = random.choice(doctrines_data)

# Create examples for the prompt
ex1in, ex1out = get_question_and_concept(questions_data[0])
ex2in, ex2out = get_question_and_concept(questions_data[1])
ex3in, ex3out = get_question_and_concept(questions_data[2])
story_so_far = None # Used to pass the previous question to prompt

# Create the prompt
# Used Chatgpt to improve the prompt to make the story continuous
def initial_prompt():
  return [
    {
        "role": "system",
        "content": (
            "You are a legal case question generator that creates a multi-step case based on legal concepts. "
            "You will produce a multiple-choice question built around a short narrative introduction. "
            "The story must introduce characters, a location, and a situation that can be expanded later."
            "You MUST write in this structure:\n\n"
            "1. STORY: A 3–6 sentence narrative introducing the situation.\n"
            "2. QUESTION: A single legal question about the concept.\n"
            "3. /ANSWER: The correct choice.\n"
            "4. RATIONALE: A short explanation.\n\n"
            "Do NOT end the story — leave open threads so the next question can continue it.\n\n"
            "Examples:\n"
            f"Concept: {ex1in}\nQuestion: {ex1out}\n"
            f"Concept: {ex2in}\nQuestion: {ex2out}\n"
            f"Concept: {ex3in}\nQuestion: {ex3out}\n"
        )
    },
    {
        "role": "user",
        "content": f"Concept: {topic['concept']}. Summary Text: {topic['intro_text']}"
    }
]

def middle_prompt(story_so_far):
  return [
    {
        "role": "system",
        "content": (
            "You are continuing a legal case story. Your job is to write the NEXT question in the same narrative. "
            "You MUST continue the same characters, same setting, same timeline, and the same story threads. "
            "Use the previous story as canon and extend it naturally.\n\n"
            "Your structure MUST be:\n"
            "1. STORY: 3–6 new sentences that continue the narrative.\n"
            "2. QUESTION: A new multiple-choice question about a NEW legal concept.\n"
            "3. /ANSWER.\n"
            "4. RATIONALE.\n\n"
            "Do not contradict earlier facts. Do not end the story — keep it open.\n\n"
            "Examples:\n"
            f"Concept: {ex1in}\nQuestion: {ex1out}\n"
            f"Concept: {ex2in}\nQuestion: {ex2out}\n"
            f"Concept: {ex3in}\nQuestion: {ex3out}\n"
        )
    },
    {
        "role": "user",
        "content": f"Story so far:\n{story_so_far}"
    }
]

def end_prompt(story_so_far):
  return [
    {
        "role": "system",
        "content": (
            "Write the final question in this legal case. Continue the story from the previous section, "
            "but this time bring the situation to a conclusion.\n\n"
            "Structure:\n"
            "1. STORY: 3–6 sentences resolving the conflict.\n"
            "2. QUESTION: A final legal concept question.\n"
            "3. /ANSWER.\n"
            "4. RATIONALE.\n\n"
            "The story should conclude all major plot threads.\n\n"
            "Examples:\n"
            f"Concept: {ex1in}\nQuestion: {ex1out}\n"
            f"Concept: {ex2in}\nQuestion: {ex2out}\n"
            f"Concept: {ex3in}\nQuestion: {ex3out}\n"
        )
    },
    {
        "role": "user",
        "content": f"Story so far:\n{story_so_far}"
    }
]

def generate_response(prompt, finetuned_model):
  response = client.chat.completions.create(
    model=finetuned_model,
    messages=prompt,
    temperature=0.7,
    max_tokens=500,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0,
    stop=["###"]
    )
  return response.choices[0].message.content.strip()

question = generate_response(initial_prompt(), "gpt-3.5-turbo-0125")
print("BEGINNING: ")
print(question)
print("\n\n")
story_so_far = question.split("/")[0]
question2 = generate_response(middle_prompt(story_so_far), "gpt-3.5-turbo-0125")
print("MIDDLE: ")
print(question2)
print("\n\n")
story_so_far += question2.split("/")[0]
question3 = generate_response(end_prompt(story_so_far), "gpt-3.5-turbo-0125")
print("END: ")
print(question3)

# Setup Reasoning Model

In [None]:
print("What is your response? ")
user_response = input()

What is your response? 
B


In [None]:
# Using o3-mini, GptOss was way too big for colab to handle
from openai import OpenAI
client2 = OpenAI()

o3mini_prompt = (
    "You are a legal evaluator. Given a question, the correct response, and the users response"
    "return a score out of 4 points. "
    "1 point for the correct letter answer."
    "1 point for mentioning the right doctrines/concept/keywords."
    "1 point for an okay justification or 2 points for a good justification."
    "Provide your resoning for the score. An example of how it should be formatted is below: "
    "Example: "
    "Score: 4\n"
    "Rationale: You provided the correct answer to the question, and mentioned the concept of legality."
    "Your justification could use some improvements though, it seemed somewhat ambiguous."
)

def evaluate_response(question, user_response, prompt, model):
  response = client2.chat.completions.create(
    model=model,
    messages=[
        {"role": "system", "content": prompt},
        {"role": "user", "content": f"Question: {question}\nUser Response: {user_response}"}
    ]
  )
  return response.choices[0].message.content

evaluate_response(question, user_response, o3mini_prompt, "o3-mini")

'Score: 1\nRationale: You provided the correct letter answer ("B"), which secures 1 point. However, you did not mention any relevant doctrines, concepts, or keywords (such as "equal authenticity" or "bilingual laws") and you did not provide any justification for your answer.'

# Demo Game

In [None]:
questions_to_win = 6
story_so_far = None
user_response = ""
win = False
lose = False
count = 0

# Run game (use -1 to quit for now)
while not win and not lose and user_response != "-1":
  if count == 0:
    # Start case
    print("\nJUDGE: ")
    question = generate_response(initial_prompt(), "gpt-3.5-turbo-0125")
    print(question)
    story_so_far = question.split("/")[0]
    print("\nATTORNEY (Enter response): ")
    user_response = input()
    print("\nJURY: ")
    print(evaluate_response(question, user_response, o3mini_prompt, "o3-mini"))
  elif count == questions_to_win:
    # End case
    print("\nJUDGE: ")
    question = generate_response(end_prompt(story_so_far), "gpt-3.5-turbo-0125")
    print(question)
    print("\nATTORNEY (Enter response): ")
    user_response = input()
    print("\nJURY: ")
    print(evaluate_response(question, user_response, o3mini_prompt, "o3-mini"))
  else:
    # Middle case
    print("\nJUDGE: ")
    question = generate_response(middle_prompt(story_so_far), "gpt-3.5-turbo-0125")
    print(question)
    story_so_far += question.split("/")[0]
    print("\nATTORNEY (Enter response): ")
    user_response = input()
    print("\nJURY: ")
    print(evaluate_response(question, user_response, o3mini_prompt, "o3-mini"))
print("CASE CLOSED.")



In [None]:
## test with scoring
def get_score(model_response):
  score = model_response.split("\n")[0]
  score = int(score.split(" ")[1])
  return score

def get_letter_grade(grade):
  letter_grade = ""

  if 100 >= grade >= 90:
    letter_grade = "A"
  elif 89 >= grade >= 80:
    letter_grade = "B"
  elif 79 >= grade >= 70:
    letter_grade = "C"
  elif 69 >= grade >= 60:
    letter_grade = "D"
  elif 59 >= grade >= 0:
    letter_grade = "F"
  else:
    letter_grade = "error"

  return letter_grade

questions_to_win = 6
story_so_far = None
user_response = ""
win = False
stop = False
total_points = 0
count = 0

# Run game (use -1 to quit for now)
while not stop and user_response != "-1":
  if count == 0:
    # Start case
    print("\nJUDGE: ")
    question = generate_response(initial_prompt(), "gpt-3.5-turbo-0125")
    print(question.split("\n\n")[0] + "\n\n" + question.split("\n\n")[1])
    story_so_far = question.split("/")[0]
    print("\nATTORNEY (Enter response): ")
    user_response = input()
    print("\nJURY: ")
    model_response = evaluate_response(question, user_response, o3mini_prompt, "o3-mini")
    print(model_response)
    total_points += get_score(model_response)

  elif count == questions_to_win - 1:
    # End case
    print("\nJUDGE: ")
    question = generate_response(end_prompt(story_so_far), "gpt-3.5-turbo-0125")
    print(question.split("\n\n")[0] + "\n\n" + question.split("\n\n")[1])
    print("\nATTORNEY (Enter response): ")
    user_response = input()
    print("\nJURY: ")
    model_response = evaluate_response(question, user_response, o3mini_prompt, "o3-mini")
    print(model_response)
    total_points += get_score(model_response)
    stop = True

  else:
    # Middle case
    print("\nJUDGE: ")
    question = generate_response(middle_prompt(story_so_far), "gpt-3.5-turbo-0125")
    print(question.split("\n\n")[0] + "\n\n" + question.split("\n\n")[1])
    story_so_far += question.split("/")[0]
    print("\nATTORNEY (Enter response): ")
    user_response = input()
    print("\nJURY: ")
    model_response = evaluate_response(question, user_response, o3mini_prompt, "o3-mini")
    print(model_response)
    total_points += get_score(model_response)

  count += 1
print("CASE CLOSED.\n")

print("Results:")
grade = (total_points / (questions_to_win * 4)) * 100
letter_grade = get_letter_grade(grade)

if grade >= 70:
  win = True

if win:
  print(letter_grade, "\nGrade: " + str(grade) + "%\nYou've Won!")
else:
  print(letter_grade, "\nGrade: " + str(grade) + "%\nYou've Lost.")


JUDGE: 
Story: Sarah, a thrill-seeker, decides to go bungee jumping off a bridge during her vacation in New Zealand. Before taking the leap, she signs a waiver acknowledging the risks involved in the activity, including the potential for injury or death. Sarah is well aware of the dangers associated with bungee jumping but is excited about the adrenaline rush. As she plunges towards the water, the bungee cord snaps, causing her to sustain injuries upon impact. Despite her injuries, Sarah cannot hold the bungee jumping company liable under the principle of volenti non fit iniuria.

Question: In the scenario described, why is Sarah unable to bring a claim against the bungee jumping company for her injuries?
(A) Sarah did not sign a waiver before bungee jumping.
(B) Sarah was forced to go bungee jumping against her will.
(C) Sarah voluntarily assumed the risks associated with bungee jumping.
(D) Sarah's injuries were caused by a third party unrelated to the bungee jumping company.

ATTOR

# **Demo Game With Scoring and Hints**

In [None]:
o3mini_hprompt =  (
    "You are a legal evaluator. Given a question, the correct response, a previous hint already given"
    "return a hint (no need to preface with anything, just the text) to help get the correct answer."
    "The hint should be concise and relevant, and should be more helpful than the previous hint"
)

In [None]:
## Scoring with hints
def get_score(model_response):
  score = model_response.split("\n")[0]
  score = int(score.split(" ")[1])
  return score

def get_letter_grade(grade):
  letter_grade = ""

  if 100 >= grade >= 90:
    letter_grade = "A"
  elif 89 >= grade >= 80:
    letter_grade = "B"
  elif 79 >= grade >= 70:
    letter_grade = "C"
  elif 69 >= grade >= 60:
    letter_grade = "D"
  elif 59 >= grade >= 0:
    letter_grade = "F"
  else:
    letter_grade = "error"

  return letter_grade


def demo():

  questions_to_win = 6
  story_so_far = None
  user_response = ""
  win = False
  stop = False
  total_points = 0
  count = 0
  amount_hint = 0
  hint = ""

  # Run game (use -1 to quit for now)
  while not stop and user_response != "-1":
    if count == 0:
      # Start case
      amount_hint = 0
      print("\nJUDGE: ")
      question = generate_response(initial_prompt(), "gpt-3.5-turbo-0125")
      print(question.split("\n\n")[0] + "\n\n" + question.split("\n\n")[1])
      story_so_far = question.split("/")[0]

      while True:
        print("\nATTORNEY (Enter response): ")
        user_response = input()
        if (user_response == "hint" or user_response == "Hint") and amount_hint <= 2:
          hint = evaluate_response(question, hint, o3mini_hprompt, "o3-mini")
          print("\nHINT:")
          print(hint)
          amount_hint += 1
        elif (user_response == "hint" or user_response == "Hint") and amount_hint > 2:
          print("\nYOU HAVE USED UP THE MAXIMUM AMOUNT OF HINTS (3):")
        else:
          break

      print("\nJURY: ")
      model_response = evaluate_response(question, user_response, o3mini_prompt, "o3-mini")
      print(model_response)
      total_points += get_score(model_response)

    elif count == questions_to_win - 1:
      # End case
      amount_hint = 0
      print("\nJUDGE: ")
      question = generate_response(end_prompt(story_so_far), "gpt-3.5-turbo-0125")
      print(question.split("\n\n")[0] + "\n\n" + question.split("\n\n")[1])

      while True:
        print("\nATTORNEY (Enter response): ")
        user_response = input()
        if (user_response == "hint" or user_response == "Hint") and amount_hint <= 2:
          hint = evaluate_response(question, hint, o3mini_hprompt, "o3-mini")
          print("\nHINT:")
          print(hint)
          amount_hint += 1
        elif (user_response == "hint" or user_response == "Hint") and amount_hint > 2:
          print("\nYOU HAVE USED UP THE MAXIMUM AMOUNT OF HINTS (3):")
        else:
          break

      print("\nJURY: ")
      model_response = evaluate_response(question, user_response, o3mini_prompt, "o3-mini")
      print(model_response)
      total_points += get_score(model_response)
      stop = True

    else:
      # Middle case
      amount_hint = 0
      print("\nJUDGE: ")
      question = generate_response(middle_prompt(story_so_far), "gpt-3.5-turbo-0125")
      print(question.split("\n\n")[0] + "\n\n" + question.split("\n\n")[1])
      story_so_far += question.split("/")[0]

      while True:
        print("\nATTORNEY (Enter response): ")
        user_response = input()
        if (user_response == "hint" or user_response == "Hint") and amount_hint <= 2:
          hint = evaluate_response(question, hint, o3mini_hprompt, "o3-mini")
          print("\nHINT:")
          print(hint)
          amount_hint += 1
        elif (user_response == "hint" or user_response == "Hint") and amount_hint > 2:
          print("\nYOU HAVE USED UP THE MAXIMUM AMOUNT OF HINTS (3):")
        else:
          break
      print("\nJURY: ")

      model_response = evaluate_response(question, user_response, o3mini_prompt, "o3-mini")
      print(model_response)
      total_points += get_score(model_response)

    count += 1
  print("CASE CLOSED.\n")

  print("Results:")
  grade = (total_points / (questions_to_win * 4)) * 100
  letter_grade = get_letter_grade(grade)

  if grade >= 70:
    win = True

  if win:
    print(letter_grade, "\nGrade: " + str(grade) + "%\nYou've Won!")
  else:
    print(letter_grade, "\nGrade: " + str(grade) + "%\nYou've Lost.")

In [None]:
# play the game
demo()


JUDGE: 
Story: In the bustling city of London, a major financial scandal has come to light, implicating several high-profile individuals in fraudulent activities. The regulatory authority, known as the Financial Conduct Authority (FCA), has been conducting a thorough investigation into the matter and is preparing an official report detailing its findings. Among those under scrutiny is a well-known investment banker named Alex Thompson.

Question: As the FCA finalizes its report on the financial scandal, what legal practice might Alex Thompson invoke to respond to any criticisms made against him before the report is published?
(A) Miranda Rights
(B) Solicitor-Client Privilege
(C) Maxwellisation
(D) Habeas Corpus

ATTORNEY (Enter response): 
hint

HINT:
Think about which term is uniquely related to the right of someone under scrutiny to respond publicly to forthcoming negative information before it’s made official—it's not about criminal rights, lawyer confidentiality, or detention prot