# Moral Machine Trolley Problem AI
This notebook trains a PyTorch model to decide between two trolley routes based on the Moral Machine dataset.

## 1. Setup and Imports

In [13]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cuda


In [16]:
# Configuration
DATA_PATH = './dataset/SharedResponses.csv'
N_ROWS = None  # Load all rows from the paired file
BATCH_SIZE = 256
EPOCHS = 5
LEARNING_RATE = 0.001

### Filtering and generation of subset to train on

We create a subset of the dataset to train on, as the full dataset is quite large (11GB+). We will load only the first 200,000 rows for training. 

We also pair the by scenario responses together, so that the model can learn to compare the two routes directly. 


### 2. Data Loading and Preprocessing (Two-Pass)

Since the dataset rows for a single scenario ("ResponseID") might be far apart in the file, and we have limited RAM, we cannot rely on simple chunking because a chunk might contain only one half of the pair.

We will use a **Two-Pass Strategy**:
1.  **Pass 1 (Lightweight)**: Scan only the `ResponseID` column to identify which IDs represent complete pairs. We stop once we have found enough target pairs (e.g., 300,000).
2.  **Pass 2 (Extraction)**: identifying the IDs we want, we scan the file again to load the full data *only* for those IDs.

This ensures we get complete pairs without loading the 11GB file into memory.

In [6]:
import gc

# Configuration
TARGET_PAIRS = 300000
CHUNK_SIZE = 500000 # Larger chunks for faster iteration
filename = DATA_PATH

print(f"--- PASS 1: Identifying {TARGET_PAIRS} valid pairs from {filename} ---")

seen_ids = set()
wanted_ids = set()

# Iterate ONLY over ResponseID column to save memory
# We only need to find IDs that appear twice
for i, chunk in enumerate(pd.read_csv(filename, usecols=['ResponseID'], chunksize=CHUNK_SIZE)):
    chunk_ids = chunk['ResponseID'].tolist()
    
    for rid in chunk_ids:
        if rid in seen_ids:
            # We found the second part of the pair!
            wanted_ids.add(rid)
            # Remove from seen to keep memory usage stable (assuming max 2 rows per ID)
            seen_ids.remove(rid)
            
            if len(wanted_ids) >= TARGET_PAIRS:
                break
        else:
            seen_ids.add(rid)
            
    if len(wanted_ids) >= TARGET_PAIRS:
        print(f"Target of {TARGET_PAIRS} pairs reached at chunk {i+1}.")
        break
    
    if (i + 1) % 5 == 0:
        print(f"Scanned chunk {i+1}. Found {len(wanted_ids)} pairs so far...")

print(f"Pass 1 Complete. Found total {len(wanted_ids)} pairs to extract.")

# Clear temporary set to free memory
del seen_ids
gc.collect()

print(f"\n--- PASS 2: Extracting data for {len(wanted_ids)} pairs ---")
# Now we read the full file, but only keep rows belonging to wanted_ids

extracted_rows = []
pairs_collected = 0

for i, chunk in enumerate(pd.read_csv(filename, chunksize=CHUNK_SIZE)):
    # Filter this chunk for rows that match our wanted IDs
    mask = chunk['ResponseID'].isin(wanted_ids)
    
    if mask.any():
        relevant_rows = chunk[mask].copy()
        extracted_rows.append(relevant_rows)
        # Optimization: verify if we have collected all rows for the wanted pairs
        # But since rows are scattered, we likely need to scan further.

    # Optional: Progress logging
    if (i + 1) % 10 == 0:
        print(f"Processing chunk {i+1}...")

print("Concatenating extracted rows...")
if not extracted_rows:
    raise ValueError("No rows were extracted! Check if the dataset path is correct.")

df_raw_subset = pd.concat(extracted_rows)

print(f"Subset shape: {df_raw_subset.shape}")
print("Creating paired dataset...")

# Now proceed with pairing logic on the subset
# 1. Sort by ResponseID to ensure A and B are adjacent/grouped
df_raw_subset = df_raw_subset.sort_values('ResponseID')

# 2. Assign sub_id
df_raw_subset['sub_id'] = df_raw_subset.groupby('ResponseID').cumcount()

# 3. Split
option_a = df_raw_subset[df_raw_subset['sub_id'] == 0].set_index('ResponseID')
option_b = df_raw_subset[df_raw_subset['sub_id'] == 1].set_index('ResponseID')

# 4. Join
paired_df = option_a.join(option_b, lsuffix='_A', rsuffix='_B')

# 5. Create Label & Filter
paired_df['Label'] = (paired_df['Saved_B'] == 1).astype(int)
valid_rows = (paired_df['Saved_A'] + paired_df['Saved_B']) == 1
paired_df = paired_df[valid_rows]

print(f"Final training set scenarios: {len(paired_df)}")
paired_df.head()

--- PASS 1: Identifying 300000 valid pairs from ./dataset/SharedResponses.csv ---
Scanned chunk 5. Found 0 pairs so far...
Scanned chunk 10. Found 0 pairs so far...
Scanned chunk 15. Found 0 pairs so far...
Target of 300000 pairs reached at chunk 19.
Pass 1 Complete. Found total 300000 pairs to extract.

--- PASS 2: Extracting data for 300000 pairs ---


  for i, chunk in enumerate(pd.read_csv(filename, chunksize=CHUNK_SIZE)):


Processing chunk 10...
Processing chunk 20...
Processing chunk 30...
Processing chunk 40...
Processing chunk 50...
Processing chunk 60...
Processing chunk 70...
Processing chunk 80...
Processing chunk 90...
Processing chunk 100...
Processing chunk 110...
Processing chunk 120...
Processing chunk 130...
Processing chunk 140...
Concatenating extracted rows...
Subset shape: (600000, 41)
Creating paired dataset...
Final training set scenarios: 300000


Unnamed: 0_level_0,ExtendedSessionID_A,UserID_A,ScenarioOrder_A,Intervention_A,PedPed_A,Barrier_A,CrossingSignal_A,AttributeLevel_A,ScenarioTypeStrict_A,ScenarioType_A,...,MaleExecutive_B,FemaleExecutive_B,FemaleAthlete_B,MaleAthlete_B,FemaleDoctor_B,MaleDoctor_B,Dog_B,Cat_B,sub_id_B,Label
ResponseID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2222bRQqBTZ6dLnPH,32757157_6999801415950060.0,6999801000000000.0,7,0,0,0,1,Fit,Fitness,Fitness,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0
2222sJk4DcoqXXi98,1043988516_3525281295.0,3525281000.0,2,1,0,1,0,Rand,Random,Random,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1
2223CNmvTr2Coj4wp,-1613944085_422160228641876.0,422160200000000.0,10,0,1,0,1,Female,Gender,Gender,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0
2223Xu54ufgjcyMR3,1425316635_327833569077076.0,327833600000000.0,11,0,0,1,0,Old,Age,Age,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1
2223jMWDEGNeszivb,-1683127088_785070916172117.0,785070900000000.0,8,0,1,0,2,More,Utilitarian,Utilitarian,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1


In [7]:
# save the paired dataset for future use
paired_df.to_csv('./dataset/PairedResponses.csv', index=False)

### 3. Model Training
We will define a simple feedforward neural network to learn the decision-making process based on the features of the two routes. The model will be trained to predict which route is chosen by the majority of respondents for each scenario.

The input features will include:
- The attributes of the characters on each route (e.g., number of people, presence of children, etc.)
- The country of the respondent is also included as a feature (one-hot encoded), as it may influence moral decisions.

The target variable will be a binary label indicating which route was chosen by the majority of respondents for that scenario.

In [8]:
from sklearn.model_selection import train_test_split

# 1. Define Features
CHAR_COLS = ['Man', 'Woman', 'Pregnant', 'Stroller', 'OldMan', 'OldWoman', 'Boy', 'Girl', 
             'Homeless', 'LargeWoman', 'LargeMan', 'Criminal', 'MaleExecutive', 'FemaleExecutive', 
             'FemaleAthlete', 'MaleAthlete', 'FemaleDoctor', 'MaleDoctor', 'Dog', 'Cat']

CTX_COLS = ['Intervention', 'CrossingSignal']

# Construct full feature list for A and B
# Note: 'UserCountry3' is user-level, doesn't change between A and B
feat_cols_A = [c + '_A' for c in CHAR_COLS + CTX_COLS]
feat_cols_B = [c + '_B' for c in CHAR_COLS + CTX_COLS]

print(f"Features per option: {len(feat_cols_A)}")

# 2. Encode Country
# Fill NaN countries with 'Unknown'
paired_df['UserCountry3'] = paired_df['UserCountry3_A'].fillna('Unknown') # _A and _B are same for country

country_encoder = LabelEncoder()
paired_df['country_idx'] = country_encoder.fit_transform(paired_df['UserCountry3'].astype(str))
n_countries = len(country_encoder.classes_)
print(f"Number of unique countries: {n_countries}")

# 3. Prepare Tensors
X_A = paired_df[feat_cols_A].values.astype(np.float32)
X_B = paired_df[feat_cols_B].values.astype(np.float32)
X_country = paired_df['country_idx'].values.astype(np.int64)
y = paired_df['Label'].values.astype(np.float32)

# 4. Split Train/Test
# We split indices to keep arrays aligned
indices = np.arange(len(paired_df))
train_idx, val_idx = train_test_split(indices, test_size=0.2, random_state=42)

print(f"Train samples: {len(train_idx)}, Validation samples: {len(val_idx)}")

Features per option: 22
Number of unique countries: 197
Train samples: 240000, Validation samples: 60000


In [10]:
class MoralDataset(Dataset):
    def __init__(self, x_a, x_b, x_country, y):
        self.x_a = torch.tensor(x_a, dtype=torch.float32)
        self.x_b = torch.tensor(x_b, dtype=torch.float32)
        self.x_country = torch.tensor(x_country, dtype=torch.long)
        self.y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.x_a[idx], self.x_b[idx], self.x_country[idx], self.y[idx]

# Create DataLoaders
train_dataset = MoralDataset(X_A[train_idx], X_B[train_idx], X_country[train_idx], y[train_idx])
val_dataset = MoralDataset(X_A[val_idx], X_B[val_idx], X_country[val_idx], y[val_idx])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

print("DataLoaders prepared.")

DataLoaders prepared.


In [11]:
class TrolleyModel(nn.Module):
    def __init__(self, num_features, num_countries, emb_dim=16):
        super(TrolleyModel, self).__init__()
        
        # Embedding for user country
        self.country_emb = nn.Embedding(num_countries, emb_dim)
        
        # Shared feature extractor (Siamese-like structure)
        # We process Option A and Option B through the same weights to learn "Value of an option"
        # Input dim: num_features + emb_dim (we append country context to both)
        input_dim = num_features + emb_dim
        
        self.feature_net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)  # Outputs a scalar "score" or "utility" for the option
        )
        
    def forward(self, x_a, x_b, country_idx):
        # Get country embedding
        c_emb = self.country_emb(country_idx) # [batch, emb_dim]
        
        # Concatenate country info to both options
        # x_a: [batch, features], c_emb: [batch, emb_dim]
        a_input = torch.cat([x_a, c_emb], dim=1)
        b_input = torch.cat([x_b, c_emb], dim=1)
        
        # Compute scores for both options
        score_a = self.feature_net(a_input)
        score_b = self.feature_net(b_input)
        
        # Logits for binary classification (Choice B vs A)
        # If score_b > score_a, logits > 0, probability > 0.5 -> Choose B
        logits = score_b - score_a
        return logits

model = TrolleyModel(num_features=X_A.shape[1], num_countries=n_countries).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

print(model)

TrolleyModel(
  (country_emb): Embedding(197, 16)
  (feature_net): Sequential(
    (0): Linear(in_features=38, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)


In [17]:
def train(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for x_a, x_b, country, labels in loader:
        x_a, x_b, country, labels = x_a.to(device), x_b.to(device), country.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(x_a, x_b, country)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        # Accuracy
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
        
    return total_loss / len(loader), correct / total

def evaluate(model, loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for x_a, x_b, country, labels in loader:
            x_a, x_b, country, labels = x_a.to(device), x_b.to(device), country.to(device), labels.to(device)
            
            outputs = model(x_a, x_b, country)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
            
    return total_loss / len(loader), correct / total

# --- Training Loop ---
print("Starting training...")
for epoch in range(EPOCHS):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion)
    val_loss, val_acc = evaluate(model, val_loader, criterion)
    
    print(f"Epoch {epoch+1}/{EPOCHS} | "
          f"Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")

print("Training Complete.")

Starting training...
Epoch 1/5 | Train Loss: 0.5551 Acc: 0.7254 | Val Loss: nan Acc: 0.7173
Epoch 2/5 | Train Loss: 0.5548 Acc: 0.7252 | Val Loss: nan Acc: 0.7170
Epoch 3/5 | Train Loss: 0.5545 Acc: 0.7261 | Val Loss: nan Acc: 0.7178
Epoch 4/5 | Train Loss: 0.5540 Acc: 0.7258 | Val Loss: nan Acc: 0.7183
Epoch 5/5 | Train Loss: 0.5537 Acc: 0.7268 | Val Loss: nan Acc: 0.7164
Training Complete.


In [18]:
# save the model to a file for future use
torch.save(model.state_dict(), 'trolley_model_acc0_7.pth')

In [None]:
# export the model to ONNX format for use in other environments (e.g., JavaScript)
dummy_x_a = torch.randn(1, X_A.shape[1]).to(device)
dummy_x_b = torch.randn(1, X_B.shape[1]).to(device)
dummy_country = torch.tensor([0], dtype=torch.long).to(device)  # Example country

torch.onnx.export(model, (dummy_x_a, dummy_x_b, dummy_country), 'trolley_model.onnx',
                  input_names=['x_a', 'x_b', 'country_idx'],
                    output_names=['logits'],
                    dynamic_axes={'x_a': {0: 'batch_size'}, 'x_b': {0: 'batch_size'}, 'country_idx': {0: 'batch_size'}, 'logits': {0: 'batch_size'}})


TrolleyModel(
  (country_emb): Embedding(197, 16)
  (feature_net): Sequential(
    (0): Linear(in_features=38, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)