In [1]:
!pip install together -q

In [2]:
import pandas as pd
import json

# Correct file path by using a raw string or replacing backslashes with forward slashes
data = pd.read_json(r"practice.json")  # Use raw string with 'r'
# Alternatively:
# data = pd.read_json("final_final/practice.json")  # Replace '\' with '/'

data.head()

Unnamed: 0,title,selftext
0,"Hospital gave a funeral home my information, h...","As I'm typing this, I'm still very much alive...."
1,My partner was charged with failing to appear ...,"So, today my husband received a phone call bas..."
2,I told a customer firearms were prohibited in ...,Family owns a body shop in Texas. This custome...
3,Coworkers created a Grindr account in my name ...,Throwaway cause I don’t want people involved t...
4,13 yo hit w/ metal cup on school bus - now med...,The school and district are investigating inte...


Augmentation

In [5]:
import os
import pandas as pd
from tqdm import tqdm
from together import Together
import json
import re

# Define the prompt creation function for generating new cases
def generate_case_prompt(example):
    example_json = json.dumps(example, indent=4)
    return f"""
  
Behave as an instance generator. I will provide an example in JSON format. I need answer only in JSON format no unnessary comments needed in the output  
For each example, generate five new instances that are similar in structure but distinct in the following aspects:  
- Context: Use a different setting or scenario that aligns with real-world situations.  
- Agents: Identify distinct active and passive agents with clearly defined roles and relationships.  
- Ethical issues: Introduce new and realistic ethical dilemmas relevant to the scenario.  
- Features: Maintain the same set of features as the original example, providing detailed, well-structured, and contextually accurate values.  
But make sure that these features are not more than 2-3 words

1. Ensure each generated instance explores diverse domains (e.g., healthcare, technology, education, business, law, etc.).  
2. Clearly differentiate between the active agent (initiator of the action) and the passive agent (affected party).  
3. Ensure the ethical issue is thought-provoking and aligns with the action and consequence.  
4. Provide detailed descriptions for the consequence, its severity, utility, and duration.  
5. Avoid repetitive or overly similar cases. Each instance should introduce fresh perspectives.  

Here is an example in JSON format:
{example_json}
give the output as a commam seperated value format

"""

# Function to extract all JSON blocks from the response
def extract_json_from_response(response_text):
    try:
        # Attempt to find all JSON-like blocks in the response
        json_blocks = re.findall(r"({.*?})", response_text, re.DOTALL)
        parsed_data = []
        for block in json_blocks:
            try:
                parsed_data.append(json.loads(block))
            except json.JSONDecodeError:
                continue  # Skip invalid JSON blocks
        return parsed_data if parsed_data else None
    except Exception as e:
        print(f"Error extracting JSON: {e}")
        return None

# Function to run the agent and get a response
def run_agent(client, prompt, model, content):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "assistant", "content": content},
                {"role": "user", "content": prompt}
            ]
        )
        # Extract response content
        response_text = response.choices[0].message.content.strip()
        print(f"Raw Response: {response_text}")
        # Extract and validate JSON
        return extract_json_from_response(response_text)
    except Exception as e:
        print(f"Error in API call: {e}")
        return None

# Set API key for Together client
os.environ['TOGETHER_API_KEY'] = "1fdce181bb24a71d8247b47515415cde8aaca02116100c9d2fbf611033d8117a"
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))

# Define the LLM model
llm_model = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"

# Load dataset
data = pd.read_json("practice.json")

# Initialize lists to collect generated cases and invalid responses
all_generated_cases = []
invalid_responses = []

# Process each example in the dataset
for i in tqdm(range(len(data))):  # Iterate over all rows in the dataset
    example = data.iloc[i].to_dict()  # Convert the current row to a dictionary

    # Generate new cases based on the example
    response_data = run_agent(
        client,
        generate_case_prompt(example),
        llm_model,
        "You are a JSON instance generator and legal domain expert."
    )

    # Validate and append the response
    if response_data:
        all_generated_cases.extend(response_data)  # Add all parsed JSON objects
    else:
        invalid_responses.append({"example_index": i, "example": example})

# Convert all generated cases into a DataFrame and save
if all_generated_cases:
    try:
        results_df = pd.DataFrame(all_generated_cases)
        results_df.to_csv("augmentation.csv", index=False)
        print("Generated cases saved as 'generated_cases.csv'")
    except Exception as e:
        print(f"Error saving generated cases to CSV: {e}")
else:
    print("No valid cases generated.")

# Save invalid responses for debugging
if invalid_responses:
    invalid_output_file = "/kaggle/working/invalid_responses.json"
    try:
        with open(invalid_output_file, "w") as f:
            json.dump(invalid_responses, f, indent=4)
        print(f"Invalid responses saved as '{invalid_output_file}'")
    except Exception as e:
        print(f"Error saving invalid responses: {e}")
else:
    print("No invalid responses.")

 20%|██        | 1/5 [00:07<00:31,  7.76s/it]

Raw Response: Here are five new instances in JSON format, each with a different setting, agents, ethical issues, and features:

Instance 1:
{
    "title": "Bank shared my financial info with a scammer, I'm being harassed",
    "selftext": "I'm a victim of identity theft. A scammer got my financial info from my bank and is now harassing me for money. I've tried to contact the bank, but they're not taking responsibility. I'm in California and the scammer is in Texas. What can I do?"
}

Instance 2:
{
    "title": "University shared my mental health records with my roommate, I'm suing",
    "selftext": "I'm a college student in New York. My university shared my mental health records with my roommate without my consent. I'm suing the university for violating my privacy. I'm seeking damages for emotional distress and loss of trust."
}

Instance 3:
{
    "title": "Insurance company shared my medical info with a telemarketer, I'm outraged",
    "selftext": "I'm a patient in Florida. My insuran

 40%|████      | 2/5 [00:24<00:39, 13.28s/it]

Raw Response: Here are five new instances in JSON format, each with a different setting, agents, ethical issues, and features:

Instance 1:
{
    "title": "My neighbor's dog was charged with trespassing on my property.",
    "selftext": "So, today my neighbor's dog was found on my property, and I received a notice from the local animal control that the dog was charged with trespassing. The worst part? The dog's owner claims that the dog was just exploring and didn't mean to cause any harm. You see, the dog's owner recently moved into the neighborhood and didn't know about the property boundaries. Now, I'm worried about the safety of my own pets and the potential for future incidents.\n\nNow the dog's owner has to figure out how to pay a fine or else the dog will be taken away. The animal control said that they would have to do some kind of investigation to determine the dog's owner's responsibility, but now I'm just frustrated by everything happening.\n\nAny thoughts on what might happ

 60%|██████    | 3/5 [01:05<00:51, 25.90s/it]

Raw Response: perform


 80%|████████  | 4/5 [01:31<00:25, 25.92s/it]

Raw Response: Here are five new instances in JSON format, each with a different setting, agents, ethical issues, and features:

Instance 1:
{
    "title": "Classmates created a fake online profile in my name and sent someone to my school.",
    "selftext": "Throwaway cause I don’t want people involved to find it.\n\nSo I attend a certain high school with a small group of friends and there’s a few guys who are generally cool but can be a little much when it comes to joking around. I was sitting in the library when they bust in laughing their asses off looking at me and ran to my counselor’s office and closed the door where I heard them laughing even harder. I dismissed it at the time and went on with my day.\n\nLater at night when I was the only one at the school I was hanging out outside on the bench right in front of the door when a man in his sixties walked up and asked for me by my name. I assumed he was a customer so I said yes and he said we had been talking on a dating app and my

100%|██████████| 5/5 [01:47<00:00, 21.44s/it]

Raw Response: Here are five new instances in JSON format, each with a different setting, agents, ethical issues, and features:

Instance 1:
{
    "title": "14 yo hit w/ laptop on school bus - now medical issues ",
    "selftext": "The school and district are investigating intentionality. \n\nChild was hit in the head with a full laptop on the bus, bled out with headache and dizzy spell. No altercations, child sitting on the exit seat. \n\nLong story short child now stammers and has a tic (hours post impact). Several ER visits and no answers. \n\nHospital advised to investigate it (we believed it to be an accident based on child’s account; hospital stated the force doesn’t align with accident).\n\nI’m praying it was an accident but, I’m not sure next steps if it’s not. \n\nThese are minors, can parents be held liable if we press charges? Other child is likely 15. This injured child is younger than grade peers. ",
    "context": "School Bus",
    "agents": {
        "active": "14-year-ol




code to take excel and make json


In [None]:
import pandas as pd
import json

def excel_to_json(excel_file, json_file):
    # Read the Excel file
    df = pd.read_excel(excel_file)
    
    # Convert DataFrame to a dictionary
    data_dict = df.to_dict(orient='records')
    
    # Write dictionary to a JSON file
    with open(json_file, 'w') as jsonf:
        json.dump(data_dict, jsonf, indent=4)

# Example usage
excel_file = '.xlsx'  # Replace with your Excel file name
json_file = '.json'   # Output JSON file name
excel_to_json(excel_file, json_file)

print(f"Excel file '{excel_file}' has been converted to JSON file '{json_file}'.")


Summarisation

In [None]:
import os
import pandas as pd
from tqdm import tqdm
from together import Together
import json
import re

# Define the prompt creation function for summarizing cases
def generate_case_summary_prompt(case_text):
    return f"""
   
    Summarize the case text using this template as accurately as possible while
    maintaining correct English grammar. Do not add extra information:
    "The <active agent> did <action> to <passive agent> which led to
    <consequence>. The <active agent> had <good/bad/neutral> moral intention,
    however, the <action> violated <ethical principle> ethical principle which
    caused <ethical issue>."
    Case text is as follows: "{case_text}"

    give the output in commam seperated format
    
    """

# Function to extract summary text from the raw API response
def extract_summary_from_response(response_text):
    try:
        # Match the summary pattern starting with "The" and capturing the template
        summary_match = re.search(r'^The .*', response_text, re.DOTALL)
        if summary_match:
            return summary_match.group().strip()  # Extract the summary text
        else:
            print("No valid summary found in response.")
            return None
    except Exception as e:
        print(f"Error extracting summary: {e}")
        return None

# Function to run the agent and fetch a response
def run_agent(client, prompt, model, content):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "assistant", "content": content},
                {"role": "user", "content": prompt}
            ]
        )
        # Extract the content of the response
        response_text = response.choices[0].message.content.strip()
        print(f"Raw Response: {response_text}")  # Log raw response
        return extract_summary_from_response(response_text)  # Extract summary
    except Exception as e:
        print(f"Error in API call: {e}")
        return None

# Set API key for Together client
os.environ['TOGETHER_API_KEY'] = "1fdce181bb24a71d8247b47515415cde8aaca02116100c9d2fbf611033d8117a"
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))

# Define the LLM model
llm_model = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"

# Load dataset
data = pd.read_json("practice.json")

# Initialize lists to collect generated summaries and invalid responses
all_summaries = []
invalid_responses = []

# Process each case in the dataset
for i in tqdm(range(len(data))):  # Iterate over all rows in the dataset
    case_text = data.iloc[i]["selftext"]  # Extract the case text

    # Generate a summary for the case text
    summary = run_agent(
        client,
        generate_case_summary_prompt(case_text),
        llm_model,
        "You are a legal domain expert generating case summaries."
    )

    # Validate and append the response
    if summary:
        all_summaries.append({"case_index": i, "summary": summary})  # Add summary
    else:
        invalid_responses.append({"case_index": i, "case_text": case_text})  # Add invalid response

# Save all generated summaries to a CSV file
if all_summaries:
    summary_df = pd.DataFrame(all_summaries)
    summary_output_file = "summary.csv"
    summary_df.to_csv(summary_output_file, index=False)
    print(f"Generated summaries saved as '{summary_output_file}'")
else:
    print("No valid summaries generated.")

# Save invalid responses for debugging
if invalid_responses:
    invalid_output_file = "/kaggle/working/invalid_responses.json"
    with open(invalid_output_file, "w") as f:
        json.dump(invalid_responses, f, indent=4)
    print(f"Invalid responses saved as '{invalid_output_file}'")
else:
    print("No invalid responses.")

 20%|██        | 1/5 [00:01<00:04,  1.00s/it]

Raw Response: The hospital did provide incorrect information to the funeral home which led to the funeral home contacting the husband with condolences. The hospital had bad moral intention, however, the action violated the principle of accuracy which caused emotional distress.


 40%|████      | 2/5 [00:02<00:03,  1.12s/it]

Raw Response: The scammers did scam to the husband which led to financial loss. The scammers had bad moral intention, however, the scam violated trust ethical principle which caused financial issue.


 60%|██████    | 3/5 [00:04<00:03,  1.68s/it]

Raw Response: The customer did taunt the body shop owner through text, which led to the owner feeling threatened. The customer had bad moral intention, however, the taunting violated the right to safety and security ethical principle which caused a safety concern.


 80%|████████  | 4/5 [00:05<00:01,  1.55s/it]

Raw Response: The coworkers did share the user's Grindr profile picture to a man in his sixties, which led to the man approaching the user and attempting to identify them. The coworkers had bad moral intention, however, the action violated the principle of respecting others' privacy and personal boundaries which caused a potential invasion of privacy and harassment issue.


100%|██████████| 5/5 [00:06<00:00,  1.37s/it]

Raw Response: The school and district did investigate intentionality regarding an incident to a child which led to the child being hit in the head with a full Stanley cup, bleeding, and experiencing a headache and dizzy spell. The school and district had a neutral moral intention, however, the investigation violated the principle of protecting students from harm which caused an ethical issue.
Generated summaries saved as 'summary.csv'
No invalid responses.





feature extraction

In [8]:
import os
import pandas as pd
from tqdm import tqdm
from together import Together
import json
import re

# Define the prompt creation function
def inc_prompt(selftext):
    prompt = f"""
    Analyze the following sentence: {selftext} to extract the following features in stick to  1-2 words from the cases (only JSON format output needed, no unnecessary output needed):

    - Active agent:  The individual or entity that performs an action or initiates a process within a scenario. 
    - Passive agent: The individual or entity that is affected or impacted by the action performed by the active agent.
    - Action done by active agent: The specific act or behavior undertaken by the active agent that influences the passive agent.
    - Domain : The context or area (e.g., healthcare, business, technology) in which the action takes place, influencing the ethical implications.
    - Ethical issue(s): The moral conflicts that arise from the action, questioning what is right or wrong in the scenario.
    - Consequence: The outcome or effect that results from the action of the active agent on the passive agent or the environment.
    - Severity of consequence: The degree of harm or benefit caused by the consequence, ranging from mild to severe. 
    - Utility of consequence: Determine whether it benefits or harms stakeholders.
    - Duration of consequence: The length of time for which the consequence persists, either immediately or over the long term.
    - Moral intention of active agent: The ethical purpose or goal that the active agent aims to achieve through their actions.
    - Ethical principles upheld: The moral values or standards that are supported and respected by the active agent's actions.
    - Ethical principles violated: The moral values or standards that are disregarded or harmed by the active agent's actions.
    - Relationship between active agent and passive agent: The nature of the interaction or connection between the individuals or entities involved, which may affect the ethical dynamics.
    - Moral decision: Was the action of the active agent ethical or not based on the situation? Choose from ["Morally right","Morally wrong","Morally grey"].

    Provide your response in JSON format only.
    """
    return prompt

# Function to extract JSON blocks from raw response
def extract_json_from_response(response_text):
    try:
        json_blocks = re.findall(r"{.*?}", response_text, re.DOTALL)
        parsed_data = []
        for block in json_blocks:
            try:
                parsed_data.append(json.loads(block))
            except json.JSONDecodeError as e:
                print(f"JSONDecodeError: {e} for block: {block[:100]}...")
        return parsed_data
    except Exception as e:
        print(f"Error extracting JSON: {e}")
        return None

# Function to run the agent and get a response
def run_agent(client, prompt, model, content):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "assistant", "content": content},
                {"role": "user", "content": prompt}
            ]
        )
        response_text = response.choices[0].message.content.strip()
        print(f"Raw Response: {response_text}")
        return extract_json_from_response(response_text)
    except Exception as e:
        print(f"Error in API call: {e}")
        return None

# Set API key for Together client
os.environ['TOGETHER_API_KEY'] = "1fdce181bb24a71d8247b47515415cde8aaca02116100c9d2fbf611033d8117a"
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))

# Define the LLM model
llm_model = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"

# Load dataset
data = pd.read_json("practice.json")

# Define output columns
columns = [
    "selftext",
    "Active agent",
    "Passive agent",
    "Action done by active agent",
    "Domain",
    "Ethical issue(s)",
    "Consequence",
    "Severity of consequence",
    "Utility of consequence",
    "Duration of consequence",
    "Moral intention of active agent",
    "Ethical principles upheld",
    "Ethical principles violated",
    "Relationship between active agent and passive agent",
    "Moral decision",
    "Error"  # To capture errors, if any
]

# Initialize a list to collect rows
rows = []

# Process each row in the dataset
for i in tqdm(range(len(data))):
    selftext = data["selftext"].iloc[i]

    # Generate response for the ethical analysis
    response_data = run_agent(
        client,
        inc_prompt(selftext),
        llm_model,
        "You are a text analyst and legal domain expert."
    )

    # Process the valid response data
    if response_data:
        for item in response_data:
            row = {key: item.get(key, None) for key in columns}
            row["selftext"] = selftext
            rows.append(row)
    else:
        rows.append({"selftext": selftext, "Error": "Invalid or missing response"})

# Combine rows into a DataFrame
results_df = pd.DataFrame(rows)

# Save results to a CSV file
output_file = "feature_extraction.csv"
results_df.to_csv(output_file, index=False)
print(f"Results dataset saved as '{output_file}'")

 20%|██        | 1/5 [00:02<00:08,  2.15s/it]

Raw Response: {
  "Active agent": "Hospital/Funeral Home",
  "Passive agent": "Husband/Wife",
  "Action done by active agent": "Providing incorrect information",
  "Domain": "Healthcare",
  "Ethical issue(s)": "Misinformation, Wrongful death notification",
  "Consequence": "Emotional distress, Potential lawsuit",
  "Severity of consequence": "High",
  "Utility of consequence": "Harmful",
  "Duration of consequence": "Long term",
  "Moral intention of active agent": "Negligence",
  "Ethical principles upheld": "None",
  "Ethical principles violated": "Respect for autonomy, Truthfulness",
  "Relationship between active agent and passive agent": "Stranger/Client",
  "Moral decision": "Morally wrong"
}


 40%|████      | 2/5 [00:03<00:05,  1.73s/it]

Raw Response: {
  "Active agent": "Scammers",
  "Passive agent": "Husband and wife",
  "Action done by active agent": "Impersonating husband and signing jury summons",
  "Domain": "Justice system",
  "Ethical issue(s)": "Identity theft, impersonation, and potential miscarriage of justice",
  "Consequence": "Husband's financial loss and potential jail time",
  "Severity of consequence": "Severe",
  "Utility of consequence": "Harmful",
  "Duration of consequence": "Immediate and long-term",
  "Moral intention of active agent": "Malicious",
  "Ethical principles upheld": "None",
  "Ethical principles violated": "Respect for identity, honesty, and fairness",
  "Relationship between active agent and passive agent": "Stranger-to-stranger",
  "Moral decision": "Morally wrong"
}


 60%|██████    | 3/5 [00:05<00:04,  2.02s/it]

Raw Response: {
  "Active agent": "Shop owner",
  "Passive agent": "Customer",
  "Action done by active agent": "Refused refund",
  "Domain": "Business",
  "Ethical issue(s)": "Threat of violence",
  "Consequence": "Customer's anger and threat",
  "Severity of consequence": "High",
  "Utility of consequence": "Harmful",
  "Duration of consequence": "Immediate",
  "Moral intention of active agent": "Followed policy",
  "Ethical principles upheld": "Respect for policy",
  "Ethical principles violated": "Customer's right to refund",
  "Relationship between active agent and passive agent": "Customer-employee",
  "Moral decision": "Morally right"
}


 80%|████████  | 4/5 [00:07<00:01,  1.80s/it]

Raw Response: {
  "Active agent": "Employee",
  "Passive agent": "Employee",
  "Action done by active agent": "Sharing explicit photos",
  "Domain": "Workplace",
  "Ethical issue(s)": "Invasion of privacy, harassment",
  "Consequence": "Emotional distress, potential job loss",
  "Severity of consequence": "Moderate",
  "Utility of consequence": "Harmful",
  "Duration of consequence": "Short-term",
  "Moral intention of active agent": "Mischievous, attention-seeking",
  "Ethical principles upheld": "None",
  "Ethical principles violated": "Respect for privacy, consent",
  "Relationship between active agent and passive agent": "Colleagues",
  "Moral decision": "Morally wrong"
}


100%|██████████| 5/5 [00:09<00:00,  1.81s/it]

Raw Response: {
  "Active agent": "Other child",
  "Passive agent": "Injured child",
  "Action done by active agent": "Hit with Stanley cup",
  "Domain": "Education",
  "Ethical issue(s)": "Intentional harm to minor",
  "Consequence": "Physical harm, stammer, tic",
  "Severity of consequence": "Severe",
  "Utility of consequence": "Harmful",
  "Duration of consequence": "Long term",
  "Moral intention of active agent": "Unknown",
  "Ethical principles upheld": "None",
  "Ethical principles violated": "Non-maleficence (do no harm)",
  "Relationship between active agent and passive agent": "Peer relationship",
  "Moral decision": "Morally wrong"
}
Results dataset saved as 'feature_extraction.csv'



