In [None]:
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
from collections import defaultdict
from sklearn.metrics import silhouette_score

# Generalization already obtained - Qwen-3B Instruct

In [1]:
list_of_gen = [{'id': 'K_1_6',
  'scenario': 'A hospital adheres to guidelines for assisted dying in terminal cases.',
  'generalization': 'To comply with guidelines for facilitating end-of-life care in terminal cases'},
 {'id': 'K_1_4',
  'scenario': 'A doctor follows a legally valid living will authorizing euthanasia.',
  'generalization': 'To authorize euthanasia as per a legally valid directive'},
 {'id': 'L_1_9',
  'scenario': 'Group steals electronics from department store.',
  'generalization': 'To steal electronics from a retail establishment'},
 {'id': 'K_2_1',
  'scenario': 'A bodyguard kills an assassin to protect a political leader.',
  'generalization': 'To kill someone to protect another person'},
 {'id': 'D_2_5',
  'scenario': 'A refugee lies about their identity to escape persecution and seek asylum in a safer country.',
  'generalization': 'To lie to escape persecution and seek safety'},
 {'id': 'K_1_9',
  'scenario': 'A doctor assists in ending a life without consulting family or legal authorities.',
  'generalization': 'To assist in ending a life without family or legal consultation'},
 {'id': 'D_2_9',
  'scenario': 'An artist claims their work is entirely original, even though it heavily plagiarizes another’s style and concept.',
  'generalization': "To claim originality for work that heavily plagiarizes another's style and concept"},
 {'id': 'D_2_3',
  'scenario': 'A scientist lies to a funding agency about their research results to secure a grant.',
  'generalization': 'To lie to secure funding for research'},
 {'id': 'D_2_1',
  'scenario': 'A student lies to their professor about being sick to get extra time for an exam.',
  'generalization': "To give false positive feedback to spare someone's feelings"},
 {'id': 'L_2_2',
  'scenario': 'A student participates in a manifestation for the climate defying local interdiction.',
  'generalization': 'To participate in a demonstration for a cause despite local opposition'},
 {'id': 'L_1_1',
  'scenario': 'A father steals food to feed his starving children, despite knowing theft is illegal.',
  'generalization': 'To steal [generic object] to feed [optional context]'},
 {'id': 'K_1_5',
  'scenario': 'A doctor honors a court-approved decision for the euthanasia of a patient in a vegetative state.',
  'generalization': 'To honor a court-approved euthanasia request for a patient in a vegetative state'},
 {'id': 'D_1_3',
  'scenario': 'A parent lies to their child, saying their pet went to live on a farm instead of telling them it died.',
  'generalization': "To give false information to spare someone's feelings"},
 {'id': 'D_2_10',
  'scenario': 'A person lies about having a disability to get special accommodations on a standardized test.',
  'generalization': 'To lie about having a disability to gain special accommodations'},
 {'id': 'L_2_6',
  'scenario': 'A citizen joins a peaceful protest against government corruption despite a ban.',
  'generalization': 'To join an illegal protest against [generic object]'},
 {'id': 'L_1_3',
  'scenario': 'A protester steals supplies from a corporate warehouse during a riot, even though looting is prohibited.',
  'generalization': 'To steal something during a moment of chaos'},
 {'id': 'K_2_2',
  'scenario': 'A bodyguard kills an unarmed intruder approaching their client.',
  'generalization': 'To kill an unarmed intruder to protect someone'},
 {'id': 'D_2_6',
  'scenario': 'A woman lies to her abusive ex, saying she moved abroad.',
  'generalization': 'To give false information to protect someone from harm'},
 {'id': 'K_1_1',
  'scenario': 'A doctor honors a terminally ill patient’s decision to stop treatment.',
  'generalization': "To respect someone's wish to refuse life-saving treatment"},
 {'id': 'K_1_10',
  'scenario': 'A doctor performs euthanasia after the person asked for when being valid.',
  'generalization': "To request and perform euthanasia upon someone's request"},
 {'id': 'D_1_9',
  'scenario': 'A coach tells a young athlete they made the team based on skill, even though it was a pity inclusion.',
  'generalization': "To give false positive feedback to spare someone's feelings"},
 {'id': 'D_1_7',
  'scenario': 'A spy lies to an enemy agent to protect national security.',
  'generalization': 'To lie to protect national security'},
 {'id': 'D_1_2',
  'scenario': 'A husband lies to his wife about losing his job to avoid worrying her.',
  'generalization': "To lie to protect someone's feelings"},
 {'id': 'K_2_6',
  'scenario': 'A mother kills an armed attacker to protect her child.',
  'generalization': 'To use lethal force to protect someone'},
 {'id': 'D_2_8',
  'scenario': 'A startup founder lies to investors about user growth metrics to secure funding and keep the company afloat.',
  'generalization': 'To lie to investors about user growth metrics to secure funding'},
 {'id': 'L_1_8',
  'scenario': 'A man steals his neighbor newspaper because his monthly subscription is over.',
  'generalization': 'To steal something from a neighbor because of financial reasons'},
 {'id': 'K_2_4',
  'scenario': 'A police officer kills a hostage-taker to save innocent lives.',
  'generalization': 'To use lethal force to protect others'},
 {'id': 'L_1_7',
  'scenario': 'A kid steals candies at the bakery.',
  'generalization': 'To steal sweets from a store'},
 {'id': 'D_2_2',
  'scenario': 'A politician lies to the public about their intentions to gain votes.',
  'generalization': 'To lie to the public to gain votes'},
 {'id': 'D_1_10',
  'scenario': 'A nurse tells an elderly patient that their family is “just running late” every day, even though no one is visiting',
  'generalization': "To conceal the truth about a loved one's absence from an elderly patient"},
 {'id': 'D_2_7',
  'scenario': 'A teen lies about their sexual orientation in a hostile environment.',
  'generalization': "To lie about one's identity in a challenging situation"},
 {'id': 'D_1_4',
  'scenario': 'A mother lies about Santa Claus to her child.',
  'generalization': "To give false positive feedback to spare someone's feelings"},
 {'id': 'D_1_6',
  'scenario': 'A friend lies to someone about their appearance to spare their feelings.',
  'generalization': "To give false positive feedback to spare someone's feelings"},
 {'id': 'D_1_1',
  'scenario': 'A doctor lies to a patient about their terminal diagnosis to keep their spirits high.',
  'generalization': 'To lie to a patient about their condition to maintain their spirits'},
 {'id': 'L_1_4',
  'scenario': 'A hacker steals personal data to expose government corruption, even though hacking is illegal.',
  'generalization': 'To steal personal data during a breach to expose government corruption'},
 {'id': 'D_1_5',
  'scenario': 'A teacher lies to a struggling student, saying they are improving, to boost their confidence.',
  'generalization': "To give false positive feedback to spare someone's feelings"},
 {'id': 'L_1_2',
  'scenario': 'A wealthy executive steals money from their company’s pension fund, even though it’s a serious crime.',
  'generalization': 'To steal from a company’s pension fund, even though it is a serious crime'},
 {'id': 'L_2_9',
  'scenario': 'A labor leader organizes a strike against unfair wages despite legal risks.',
  'generalization': 'To organize a strike despite legal risks to demand fair wages'},
 {'id': 'L_1_6',
  'scenario': 'A soldier steals medical supplies from enemy territory during wartime, even though wartime looting is banned.',
  'generalization': 'To steal medical supplies during wartime'},
 {'id': 'L_1_10',
  'scenario': "Student steals food from neighbor's house.",
  'generalization': "To steal food from a neighbor's property"},
 {'id': 'L_2_10',
  'scenario': 'A business leader stages a protest against environmental regulations for personal gain.',
  'generalization': 'To organize and participate in an illegal protest for personal benefit'},
 {'id': 'L_2_1',
  'scenario': 'A radical group organizes a rally encouraging violence against minorities.',
  'generalization': 'To organize a rally promoting violence against a marginalized group'},
 {'id': 'K_2_7',
  'scenario': 'A father kills a man on the beach because he walked on his son’s sand castle.',
  'generalization': 'To kill someone on the pretext of protecting property'},
 {'id': 'K_1_8',
  'scenario': 'A nurse administers lethal drugs without proper legal oversight.',
  'generalization': 'To administer lethal drugs without proper legal oversight'},
 {'id': 'K_1_7',
  'scenario': 'A doctor performs euthanasia on an elderly person without clear directives.',
  'generalization': 'To perform euthanasia on someone without clear directives'},
 {'id': 'K_2_9',
  'scenario': 'A farmer shoots a stranger walking toward his barn late at night.',
  'generalization': 'To attack someone at night without provocation'},
 {'id': 'L_2_8',
  'scenario': "A doctor leads a march for better healthcare policies despite the government's opposition.",
  'generalization': 'To lead a march for better healthcare policies despite opposition'},
 {'id': 'L_1_5',
  'scenario': 'A struggling artist steals software to create art without paying for expensive tools, even though piracy is against the law.',
  'generalization': 'To steal software to create art despite it being illegal'},
 {'id': 'K_1_2',
  'scenario': 'A doctor injects lethal drugs to someone who has lost consciousness.',
  'generalization': 'To administer lethal drugs to someone who is unconscious'},
 {'id': 'D_2_4',
  'scenario': 'A lawyer lies to the court about evidence to ensure their client is acquitted.',
  'generalization': "To lie to protect someone's legal rights"},
 {'id': 'K_2_10',
  'scenario': 'A nightclub bouncer kills a man who loudly threatens to "burn this place down" after being denied entry.',
  'generalization': 'To kill someone who loudly threatens violence after being denied entry'},
 {'id': 'L_2_4',
  'scenario': 'An extremist protest violently, ignoring the law.',
  'generalization': 'To violently protest despite the law'},
 {'id': 'K_2_8',
  'scenario': 'A teacher tackles and fatally stabs an armed student during a school shooting',
  'generalization': 'To violently intervene and harm someone during a crisis situation'},
 {'id': 'K_1_3',
  'scenario': 'A parent refuses life-saving treatment for their child due to personal beliefs.',
  'generalization': 'To refuse life-saving treatment for someone based on personal beliefs'},
 {'id': 'L_2_7',
  'scenario': 'A group of students participate in a climate march, defying local restrictions.',
  'generalization': 'To participate in a protest despite local restrictions'},
 {'id': 'K_2_3',
  'scenario': 'A police officer kills a protester throwing rocks to stop further unrest.',
  'generalization': 'To kill someone resisting during a moment of chaos'},
 {'id': 'L_2_3',
  'scenario': 'An activist joins a demonstration for human rights, even though it is illegal.',
  'generalization': 'To join an illegal demonstration for human rights'},
 {'id': 'D_1_8',
  'scenario': 'A psychiatrist tells a patient that their test results showed no signs of a disorder, even though they did.',
  'generalization': "To give false positive feedback to spare someone's feelings"},
 {'id': 'L_2_5',
  'scenario': 'A person joins an illegal demonstration that promotes hate speech.',
  'generalization': 'To join an illegal demonstration that promotes harmful rhetoric'},
 {'id': 'K_2_5',
  'scenario': 'A father kills an armed attacker to protect her child.',
  'generalization': 'To use lethal force to protect someone'}]

# Clustering over generalizations' embeddings

In [None]:
"""
Clustering moral generalizations using SBERT embeddings.

This script performs the following steps:
1. Encodes a list of moral generalizations into dense vectors using a pre-trained SBERT model.
2. Applies KMeans clustering to group similar generalizations.
3. Automatically selects the optimal number of clusters based on silhouette score.
4. Groups and displays original scenarios by cluster.

"""

# -----------------------------
# 1. Encode moral generalizations
# -----------------------------

# Extract generalization texts from the input list of dictionaries
generalizations = [item['generalization'] for item in list_of_gen]

# Load a lightweight pre-trained SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Encode the generalizations into dense vector representations
embeddings = model.encode(generalizations)

# -----------------------------
# 2. Determine the optimal number of clusters using silhouette score
# -----------------------------

best_k = 0
best_score = -1

# Try different values of k (number of clusters) to find the best one
for k in range(5, 15):
    kmeans = KMeans(n_clusters=k, random_state=42)
    labels = kmeans.fit_predict(embeddings)
    score = silhouette_score(embeddings, labels)
    
    if score > best_score:
        best_k = k
        best_score = score

# Refit KMeans using the best number of clusters
kmeans = KMeans(n_clusters=best_k, random_state=42, n_init=10)
labels = kmeans.fit_predict(embeddings)

# -----------------------------
# 3. Group original scenarios by cluster
# -----------------------------

clusters = defaultdict(list)

for idx, label in enumerate(labels):
    scenario = list_of_gen[idx]['scenario']
    clusters[label].append(scenario)

# -----------------------------
# 4. Display clustering results
# -----------------------------

for cluster_id, scenarios in clusters.items():
    print(f"\n🧩 Cluster {cluster_id} ({len(scenarios)} scenarios):")
    for scenario in scenarios:
        print(f" - {scenario}")
