In [14]:
import pandas as pd
import numpy as np

def build_distractor_dataset(
    num_samples, 
    num_pos, num_neg, num_control,
    experiment_name='NEW_moralchoice',
    distractor_filepath='distractors.csv',
):
    hi_curr_scenarios = pd.read_csv("../data/scenarios/moralchoice_high_ambiguity.csv")
    lo_curr_scenarios = pd.read_csv("../data/scenarios/moralchoice_low_ambiguity.csv")

    distractors = pd.read_csv("../data/scenarios/" + distractor_filepath)
    ctrl_distractors = pd.read_csv("../data/scenarios/stopword_distractors.csv")

    hi_original_data = hi_curr_scenarios[:int(num_samples)]
    lo_original_data = lo_curr_scenarios[:int(num_samples)]

    pos_distractors = distractors[distractors.pleasant == 'Yes'][:num_pos].distractor.values
    neg_distractors = distractors[distractors.pleasant == 'No'][:num_neg].distractor.values
    stop_distractors = ctrl_distractors[:num_control].distractor.values

    distractors_full = np.array([''] + list(pos_distractors) + list(neg_distractors) + list(stop_distractors))
    types_full = ['base'] + ['pos'] * len(pos_distractors) + ['neg'] * len(neg_distractors) + ['stop'] * len(stop_distractors)
    n_per_row = len(distractors_full)

    lo_repeated = lo_original_data.loc[lo_original_data.index.repeat(n_per_row)].reset_index(drop=True)
    hi_repeated = hi_original_data.loc[hi_original_data.index.repeat(n_per_row)].reset_index(drop=True)

    n_rows = len(lo_original_data)
    distractors_column = np.tile(distractors_full, n_rows)
    types_column = np.tile(types_full, n_rows)

    lo_repeated['context'] = np.where(
        pd.isna(distractors_column),
        lo_repeated['context'],
        lo_repeated['context'] + ' ' + distractors_column
    )
    hi_repeated['context'] = np.where(
        pd.isna(distractors_column),
        hi_repeated['context'],
        hi_repeated['context'] + ' ' + distractors_column
    )

    lo_repeated['type'] = types_column
    hi_repeated['type'] = types_column

    def expand_scenario_ids(original_df):
        scenario_ids = []
        distractor_ids = []
        for sid in original_df['scenario_id']:
            scenario_ids.extend([sid] * 16)
            distractor_ids.extend(["base",
                                   "pos_1",
                                   "pos_2",
                                   "pos_3",
                                   "pos_4",
                                   "pos_5",
                                   "neg_1",
                                   "neg_2",
                                   "neg_3",
                                   "neg_4",
                                   "neg_5",
                                   "stop_1",
                                   "stop_2",
                                   "stop_3",
                                   "stop_4",
                                   "stop_5"])
        print(len(distractor_ids))
        return scenario_ids, distractor_ids

    #print(len(lo_repeated))
    lo_repeated['scenario_id'], lo_repeated['distractor_id'] = expand_scenario_ids(lo_original_data)
    hi_repeated['scenario_id'], hi_repeated['distractor_id'] = expand_scenario_ids(hi_original_data)

    lo_new_data = lo_repeated
    hi_new_data = hi_repeated

    lo_new_data.to_csv(f'../data/scenarios/{experiment_name}_low_ambiguity.csv', index=False)
    hi_new_data.to_csv(f'../data/scenarios/{experiment_name}_high_ambiguity.csv', index=False)

    return lo_new_data, hi_new_data


In [16]:
lo_new_data, hi_new_data = build_distractor_dataset(
    num_samples = 100, 
    num_pos = 5, num_neg = 5, num_control=5,
    experiment_name = '5each_',
    distractor_filepath = 'distractors_no_loc.csv',
    )

1600
1600


# Add distractors

In [None]:
import pandas as pd
import numpy as np

hi_curr_scenarios = pd.read_csv("../data/scenarios/moralchoice_high_ambiguity.csv")
lo_curr_scenarios = pd.read_csv("../data/scenarios/moralchoice_low_ambiguity.csv")

In [None]:
print(len(hi_curr_scenarios))
print(len(lo_curr_scenarios))

In [None]:
hi_curr_scenarios.columns

In [None]:
distractors = pd.read_csv('../data/scenarios/distractors.csv')
distractors

In [None]:
NUM_SAMPLES = 10
NUM_POS = 1
NUM_NEG = 1

hi_original_data = hi_curr_scenarios[:int(NUM_SAMPLES/2)]
lo_original_data = lo_curr_scenarios[:int(NUM_SAMPLES/2)]

pos_distractors = distractors[distractors.pleasant=='Yes'][:NUM_POS].distractor.values
neg_distractors = distractors[distractors.pleasant=='No'][:NUM_NEG].distractor.values

print(len(pos_distractors), len(neg_distractors))

distractors_full = np.array([''] + list(pos_distractors) + list(neg_distractors))
types_full = ['neutral'] + ['pos'] * len(pos_distractors) + ['neg'] * len(neg_distractors)

In [None]:
len(distractors_full) == len(types_full)

In [None]:
n_per_row = len(distractors_full)

# repeat rows
lo_repeated = lo_original_data.loc[lo_original_data.index.repeat(n_per_row)].reset_index(drop=True)
hi_repeated = hi_original_data.loc[hi_original_data.index.repeat(n_per_row)].reset_index(drop=True)

# tile the distractors and types for the total number of original rows
n_rows = len(lo_original_data)
distractors_column = np.tile(distractors_full, n_rows)
types_column = np.tile(types_full, n_rows)

# modify 'context' field
lo_repeated['context'] = np.where(
    pd.isna(distractors_column),
    lo_repeated['context'],
    distractors_column + ' ' + lo_repeated['context']
)
hi_repeated['context'] = np.where(
    pd.isna(distractors_column),
    hi_repeated['context'],
    distractors_column + ' ' + hi_repeated['context']
)

# add the 'type' column
lo_repeated['type'] = types_column
hi_repeated['type'] = types_column

lo_new_data = lo_repeated
hi_new_data = hi_repeated

In [None]:
print(len(lo_original_data), len(lo_original_data))
print(len(lo_new_data), len(hi_new_data))

In [None]:
lo_new_data.to_csv('../data/scenarios/NEW_moralchoice_low_ambiguity.csv', index=False)
hi_new_data.to_csv('../data/scenarios/NEW_moralchoice_high_ambiguity.csv', index=False)

# Generate stopwords-based distractors

In [None]:
import pandas as pd

distractors_context = pd.read_csv('/homes/iws/catraz/llm_moral_plasticity/data/scenarios/distractors_no_loc.csv')
distractors_context.head()

In [None]:
import matplotlib.pyplot as plt

char_lengths = distractors_context['distractor'].astype(str).apply(len)

plt.boxplot(char_lengths, vert=False)
plt.title('Histogram of Distractor Character Lengths')
plt.xlabel('char length')
plt.ylabel('count')
plt.show()


In [None]:
import random
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')
stop_words = stopwords.words('english')

char_lengths = distractors_context['distractor'].astype(str).apply(len)

def generate_stopword_string(max_length):
    result = []
    total_len = 0

    while True:
        word = random.choice(stop_words)
        word_len = len(word) + (1 if result else 0)

        if total_len + word_len > max_length:
            break

        result.append(word)
        total_len += word_len

    return ' '.join(result)

new_distractors = [generate_stopword_string(length) for length in char_lengths]
stopword_df = pd.DataFrame({'distractor': new_distractors})

In [None]:
stopword_df.head()

In [None]:
stopword_df.to_csv(f'../data/scenarios/stopword_distractors.csv', index=False)

# Fun EDA

In [None]:
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt

pos_text = " ".join(distractors[distractors.pleasant == 'Yes']['distractor'].values)

wordcloud = WordCloud(
    width=800,
    height=400,
    background_color='white',
    stopwords=STOPWORDS,
    colormap='Reds'
).generate(pos_text)

plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('pos distractors', fontsize=20)
plt.show()


In [None]:
pos_text = " ".join(distractors[distractors.pleasant == 'No']['distractor'].values)

wordcloud = WordCloud(
    width=800,
    height=400,
    background_color='white',
    stopwords=STOPWORDS,
    colormap='Blues'
).generate(pos_text)

plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('neg distractors', fontsize=20)
plt.show()


# Results

In [None]:
import pandas as pd

file = "/homes/iws/catraz/llm_moral_plasticity/data/responses/yay_moraltest/NEW_moralchoice_high_ambiguity/google_flan-t5-small.csv"
file = "/homes/iws/catraz/llm_moral_plasticity/data/responses/moraltest_2/NEW_moralchoice_high_ambiguity/google_flan-t5-small.csv"
#file = "/homes/iws/catraz/llm_moral_plasticity/data/responses/moraltest_low/NEW_moralchoice_low_ambiguity/google_flan-t5-small.csv"
# file = "/homes/iws/catraz/llm_moral_plasticity/data/responses/moraltest_stop/NEW_moralchoice1_high_ambiguity/google_flan-t5-small.csv"
file = "/homes/iws/catraz/llm_moral_plasticity/data/responses/moraltest_stop_yay/NEW_moralchoice1_low_ambiguity/google_flan-t5-small.csv"

df = pd.read_csv(file)
df.head()

In [None]:
df.answer_raw.value_counts()

In [None]:
df.decision.value_counts()

In [None]:
import matplotlib.pyplot as plt

# Count decisions
counts = df['decision'].value_counts()

# Plot pie chart
plt.figure(figsize=(6, 6))
plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)
# plt.title('Decision Distribution')
plt.axis('equal')  # Equal aspect ratio ensures the pie is circular.

plt.show()


In [None]:
import matplotlib.pyplot as plt

# Count decisions
counts = df['decision'].value_counts()

# Plot pie chart
plt.figure(figsize=(6, 6))
plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)
# plt.title('Decision Distribution')
plt.axis('equal')  # Equal aspect ratio ensures the pie is circular.

plt.show()


In [None]:
df[df.answer_raw=='No']['scenario_id']

In [None]:
df[df.answer_raw=='No']['question_text'].values

In [None]:
df[df.decision=='invalid'].head()

In [None]:
df.loc[(df['question_ordering'] == 1) & (df['answer'] == 'A'), 'answer'] = 'B'
df.loc[(df['question_ordering'] == 1) & (df['answer'] == 'B'), 'answer'] = 'A'

In [None]:
df[df['scenario_id'].str.startswith("H_020")].scenario_id.value_counts()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np

subset = df[df['scenario_id'].str.startswith("C_020")] #(df['decision'] != 'invalid')
grouped = subset.groupby('scenario_id')

n = len(grouped)
fig, axes = plt.subplots(nrows=n, figsize=(6, 4 * n))

if n == 1:
    axes = [axes]

for ax, (scenario_id, group) in zip(axes, grouped):
    counts = group['decision'].value_counts()
    counts = counts.sort_index() 

    colors = cm.viridis(np.linspace(0, 1, len(counts)))

    counts.plot(kind='bar', ax=ax, color=colors)
    ax.set_title(f'Answer distribution for {scenario_id}')
    ax.set_xlabel('Answer')
    ax.set_ylabel('Count')
    ax.set_ylim(0, 40)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.show()


In [None]:
subset.answer_raw.value_counts()

In [None]:
subset.head(2).question_text.values

In [None]:
subset.head(1).answer_raw

In [None]:
subset.head(1).decision

In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np

# Filter data
subset = df[df['scenario_id'].str.startswith("C_010")]

# Create pivot table: rows = scenario_ids, columns = decisions, values = counts
pivot = subset.pivot_table(index='scenario_id', columns='decision', aggfunc='size', fill_value=0)

# Define desired decision order
desired_order = ['action1', 'invalid', 'action2']
existing_decisions = [d for d in desired_order if d in pivot.columns]

# Reorder pivot table columns
pivot = pivot[existing_decisions]

# Assign colors based on order
colors = cm.viridis(np.linspace(0, 1, len(pivot.columns)))

# Plot
fig, ax = plt.subplots(figsize=(8, 6))
bottom = np.zeros(len(pivot))

# Stacked bars
for idx, decision in enumerate(pivot.columns):
    ax.bar(pivot.index, pivot[decision], bottom=bottom, label=decision, color=colors[idx])
    bottom += pivot[decision].values

# Format plot
ax.set_title('Decision Distribution for Low Ambiguity Scenario: C_010')
ax.set_ylabel('Count')
ax.set_xlabel('Scenario ID')
ax.legend(title='Decision')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np

subset = df[df['scenario_id'].str.startswith("C_051") & (df['decision'] != 'invalid')]
grouped = subset.groupby('scenario_id')

n = len(grouped)
fig, axes = plt.subplots(nrows=n, figsize=(6, 4 * n))

if n == 1:
    axes = [axes]

for ax, (scenario_id, group) in zip(axes, grouped):
    counts = group['answer'].value_counts()
    counts = counts.sort_index() 

    colors = cm.viridis(np.linspace(0, 1, len(counts)))

    counts.plot(kind='bar', ax=ax, color=colors)
    ax.set_title(f'Answer distribution for {scenario_id}')
    ax.set_xlabel('Answer')
    ax.set_ylabel('Count')
    ax.set_ylim(0, 40)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.show()


# High

In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np

subset = df[df['scenario_id'].str.startswith("H_019") & (df['decision'] != 'invalid')]
grouped = subset.groupby('scenario_id')

n = len(grouped)
fig, axes = plt.subplots(nrows=n, figsize=(6, 4 * n))

if n == 1:
    axes = [axes]

for ax, (scenario_id, group) in zip(axes, grouped):
    counts = group['answer'].value_counts()
    counts = counts.sort_index() 

    colors = cm.viridis(np.linspace(0, 1, len(counts)))

    counts.plot(kind='bar', ax=ax, color=colors)
    ax.set_title(f'Answer distribution for {scenario_id}')
    ax.set_xlabel('Answer')
    ax.set_ylabel('Count')
    ax.set_ylim(0, 20)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.show()


In [None]:
df[df['scenario_id']=="H_002"].question_text.values[0]

In [None]:
subset = df[df['scenario_id'].str.startswith("H_002") & (df['decision'] != 'invalid')]
grouped = subset.groupby('scenario_id')

n = len(grouped)
fig, axes = plt.subplots(nrows=n, figsize=(6, 4 * n))

if n == 1:
    axes = [axes]

for ax, (scenario_id, group) in zip(axes, grouped):
    counts = group['answer'].value_counts()
    counts = counts.sort_index() 

    colors = cm.viridis(np.linspace(0, 1, len(counts)))

    counts.plot(kind='bar', ax=ax, color=colors)
    ax.set_title(f'Answer distribution for {scenario_id}')
    ax.set_xlabel('Answer')
    ax.set_ylabel('Count')
    ax.set_ylim(0, 20)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.show()

In [None]:
df[df['scenario_id']=="H_067"].question_text.values[0]

In [None]:
df.columns

In [None]:
df[df['scenario_id']=="H_067_stop"].head(1).question_text.values

In [None]:
subset = df[df['scenario_id'].str.startswith("H_067") & (df['decision'] != 'invalid')]
grouped = subset.groupby('scenario_id')

n = len(grouped)
fig, axes = plt.subplots(nrows=n, figsize=(6, 4 * n))

if n == 1:
    axes = [axes]

for ax, (scenario_id, group) in zip(axes, grouped):
    counts = group['answer'].value_counts()
    counts = counts.sort_index() 

    colors = cm.viridis(np.linspace(0, 1, len(counts)))

    counts.plot(kind='bar', ax=ax, color=colors)
    ax.set_title(f'Answer distribution for {scenario_id}')
    ax.set_xlabel('Answer')
    ax.set_ylabel('Count')
    ax.set_ylim(0, 20)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.show()

In [None]:
df[df['scenario_id']=="H_063"].question_text.values[0]

In [None]:
subset = df[df['scenario_id'].str.startswith("H_030") & (df['decision'] != 'invalid')]
grouped = subset.groupby('scenario_id')

n = len(grouped)
fig, axes = plt.subplots(nrows=n, figsize=(6, 4 * n))

if n == 1:
    axes = [axes]

for ax, (scenario_id, group) in zip(axes, grouped):
    counts = group['answer'].value_counts()
    counts = counts.sort_index() 

    colors = cm.viridis(np.linspace(0, 1, len(counts)))

    counts.plot(kind='bar', ax=ax, color=colors)
    ax.set_title(f'Answer distribution for {scenario_id}')
    ax.set_xlabel('Answer')
    ax.set_ylabel('Count')
    ax.set_ylim(0, 20)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd

df['base_id'] = df['scenario_id'].str.replace(r'_pos$|_neg$|_stop$', '', regex=True)

df['type'] = df['scenario_id'].apply(
    lambda x: 'pos' if x.endswith('_pos') else 
              ('neg' if x.endswith('_neg') else 
               ('stop' if x.endswith('_stop') else 'neutral'))
)

In [None]:
df.type.unique()

In [None]:
import pandas as pd

# X = 0.2  

proportions = (
    df.groupby(['base_id', 'type'])['answer']
    .value_counts(normalize=True)
    .unstack(fill_value=0)
    .reset_index()
)

merged = proportions.pivot(index='base_id', columns='type', values=['A', 'B'])

merged = merged.fillna(0)
delta_pos_A = (merged['A']['pos'] - merged['A']['neutral']).abs()
delta_neg_A = (merged['A']['neg'] - merged['A']['neutral']).abs()

delta_pos_B = (merged['B']['pos'] - merged['B']['neutral']).abs()
delta_neg_B = (merged['B']['neg'] - merged['B']['neutral']).abs()

mask = (delta_pos_A > X) | (delta_neg_A > X) | (delta_pos_B > X) | (delta_neg_B > X)
base_ids_to_keep = merged.index[mask]

df_subset = df[df['base_id'].isin(base_ids_to_keep)]
len(df_subset)


In [None]:
diffs_pos = merged['A']['neutral'] - merged['A']['pos']

plt.figure(figsize=(6, 4))
plt.hist(np.abs(diffs_pos), bins=20, edgecolor='black')
plt.xlabel('Neutral & Positive (A)')
plt.ylabel('Count')
plt.title('Histogram of Differences for Neutral & Positive')
plt.tight_layout()
plt.show()


In [None]:
diffs_neg = merged['A']['neutral'] - merged['A']['neg']

plt.figure(figsize=(6, 4))
plt.hist(np.abs(diffs_neg), bins=20, edgecolor='black')
plt.xlabel('Neutral & Negative (A)')
plt.ylabel('Count')
plt.title('Histogram of Differences for Neutral & Negative')
plt.tight_layout()
plt.show()


In [None]:
diffs_stop = merged['A']['neutral'] - merged['A']['stop']

plt.figure(figsize=(6, 4))
plt.hist(np.abs(diffs_stop), bins=20, edgecolor='black')
plt.xlabel('Neutral & Stop (A)')
plt.ylabel('Count')
plt.title('Histogram of Differences for Neutral & Stop')
plt.tight_layout()
plt.show()


In [None]:
from scipy import stats


t_stat, p_value = stats.ttest_ind(diffs_pos, diffs_stop)

print(f"T-statistic: {t_stat:.3f}")
print(f"P-value: {p_value:.3f}")

In [None]:
t_stat, p_value = stats.ttest_ind(diffs_neg, diffs_stop)

print(f"T-statistic: {t_stat:.3f}")
print(f"P-value: {p_value:.3f}")

In [None]:
t_stat, p_value = stats.ttest_ind(diffs_pos, diffs_neg)

print(f"T-statistic: {t_stat:.3f}")
print(f"P-value: {p_value:.3f}")

In [None]:
import seaborn as sns

diffs = pd.DataFrame({
    'pos - neutral (A)': merged['A']['pos'] - merged['A']['neutral'],
    'neg - neutral (A)': merged['A']['neg'] - merged['A']['neutral']
}).melt(var_name='Condition', value_name='Δ Proportion of A')

plt.figure(figsize=(6, 5))
sns.violinplot(x='Condition', y='Δ Proportion of A', data=diffs, inner='box', color='coral')
plt.axhline(0, linestyle='--', color='gray')
plt.title("Effect of pos/neg on 'A' responses (vs. neutral)")
plt.tight_layout()
plt.show()
