COGS150 Final Project<br>
Ben Chen<br>
PID: A19062681<br>

**Research Question: Are large language models more sensitive to cultural norm violation in that culture's native language compared to English?**


In [None]:
%pip install transformers

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import json
from tqdm import tqdm
import gc

import os
import json
from google.colab import drive
from datetime import datetime


In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f" Running on device: {device}")

In [None]:
def _get_prob(model, tokenizer, context, target):
    """
    Calculate the next token probability in an autoregressive way
    P(Total) = p1 * p2 * p3 ...
    Input:
    LLM model, tokenizer, context (prompt), target (next token)
    Output:
    prob, target_tokens
    """
    model_device = model.device
    input_ids = tokenizer.encode(context, return_tensors="pt").to(model_device)
    # input_tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())
    target_ids = tokenizer.encode(target, add_special_tokens=False)
    target_tokens = tokenizer.convert_ids_to_tokens(target_ids)

    log_probs = []

    with torch.no_grad():
        for t_id in target_ids:
            outputs = model(input_ids)

            # Get output probs
            next_token_logits = outputs.logits[0, -1, :] # [batch 0, Last token, All voacb]
            next_token_log_probs = torch.nn.functional.log_softmax(next_token_logits, dim=0)

            # Store target token log prob
            token_log_prob = next_token_log_probs[t_id].item()
            log_probs.append(token_log_prob)

            # Prepare next step
            next_token = torch.tensor([[t_id]]).to(model_device)
            input_ids = torch.cat([input_ids, next_token], dim=1)

    # Sum token logs for the unseen
    total_log_prob = sum(log_probs)

    return np.exp(total_log_prob), target_tokens

def _get_suprisal(total_prob):
    """
    Converts raw probability to bits of surprisal
    """
    if total_prob <= 0: return 0.0 # Avoid math errors
    return -np.log2(total_prob)


def _save_experiment(df, filename_base="COGS150_experiment_results_signal"):

  """
  Save the LLMology experiment result to cd in Google Drive, with timestamp
  """

  if not os.path.exists('/content/drive'):
        print("Mounting Google Drive...")
        drive.mount('/content/drive')

  project_folder = "/content/drive/MyDrive/UCSD_Academics/Fall25/COGS150"

  # Generate file name
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  full_filename = f"{filename_base}_{timestamp}.csv"

  save_path = os.path.join(project_folder, full_filename)

  try:
    df.to_csv(save_path, index=False)
    print(f"Experiment results saved to: {save_path}")
  except Exception as e:
    print(f"Error saving experiment results: {e}")


In [None]:
# Defining Models
MODEL_NAMES = [
    ("GPT2-XL", "gpt2-xl"),
    ("Qwen-1.5B", "Qwen/Qwen2.5-1.5B"),
    ("BLOOM-1.7B", "bigscience/bloom-1b7"),
    ("pythia-1.4b", "EleutherAI/pythia-1.4b")
]

In [None]:
# Load data

# Force Mount Drive
drive.mount('/content/drive', force_remount=True)

try:
  with open("/content/drive/MyDrive/UCSD_Academics/Fall25/COGS150/cultural_stimuli_no_signal.json", "r") as f:
    stimuli_data = json.load(f) # stimuli_data a list of dictionaries
  print(f"loaded {len(stimuli_data)} cultural stimuli")

except FileNotFoundError:
  print("Error, file not found")
  stimuli_data = []

#print(stimuli_data)


In [None]:
# Check imported prompt content
for trial in stimuli_data:
  print(trial["prompts"]["eng"]["context"])

In [None]:
# Main Experiment Loop

all_results = []

for model_name, hf_path in MODEL_NAMES:

  print(f'Running: {model_name}')

  try:
    # Specify tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(hf_path)
    model = AutoModelForCausalLM.from_pretrained(
              hf_path,
              device_map="auto",
              torch_dtype=torch.float16,
              trust_remote_code=True
          ).eval()


    # Run Experiment
    print("Experiment in Progress")
    for trial in stimuli_data:

      # Native Prompt non violation
      prob_native_congruent, tokens_native_congruent = _get_prob(
          model,
          tokenizer,
          trial["prompts"]["native"]["context"],
          trial["prompts"]["native"]["target_congruent"]
          )

      suprisal_native_congruent = _get_suprisal(
          prob_native_congruent
          )

      # Native Prompt violation
      prob_native_violation, tokens_native_violation = _get_prob(
          model,
          tokenizer,
          trial["prompts"]["native"]["context"],
          trial["prompts"]["native"]["target_violation"]
          )

      suprisal_native_violation = _get_suprisal(
          prob_native_violation
          )

      # English Prompt non violation
      prob_eng_congruent, tokens_eng_congruent = _get_prob(
          model,
          tokenizer,
          trial["prompts"]["eng"]["context"],
          trial["prompts"]["eng"]["target_congruent"]
          )

      suprisal_eng_congruent = _get_suprisal(
          prob_eng_congruent
          )

      # English Prompt violation
      prob_eng_violation, tokens_eng_violation = _get_prob(
          model,
          tokenizer,
          trial["prompts"]["eng"]["context"],
          trial["prompts"]["eng"]["target_violation"]
          )

      suprisal_eng_violation = _get_suprisal(
          prob_eng_violation
          )

      all_results.append({
          "model": model_name,
          "trial": trial['norm'],
          "prob_native_congruent": prob_native_congruent,
          "prob_native_violation": prob_native_violation,
          "prob_eng_congruent": prob_eng_congruent,
          "prob_eng_violation": prob_eng_violation,
          "suprisal_native_congruent": suprisal_native_congruent,
          "suprisal_native_violation": suprisal_native_violation,
          "suprisal_eng_congruent": suprisal_eng_congruent,
          "suprisal_eng_violation": suprisal_eng_violation,
          "tokens_native_congruent": tokens_native_congruent,
          "tokens_native_violation": tokens_native_violation,
          "tokens_eng_congruent": tokens_eng_congruent,
          "tokens_eng_violation": tokens_eng_violation
      })

    # Clean up RAM
    print(f"Finished running {model}")
    print("Unloading RAM")
    del model
    del tokenizer
    gc.collect
    if torch.cuda.is_available(): torch.cuda.empty_cache()
    if torch.backends.mps.is_available(): torch.mps.empty_cache()

  except Exception as e:
          print(f" Failed to run {model}: {e}")

df_results = pd.DataFrame(all_results)

if 'df_results' in locals() and not df_results.empty:
    _save_experiment(df_results, "experiment_reuslts_no_signal")
else:
    print("No data to save")


In [None]:
# Quick Visualizing Check
df_results.head()
# df_qwen = df_results[df_results["model"] == "Qwen-1.5B"]
# df_qwen.head()
# print((df_qwen["suprisal_native_violation"].sum())-(df_qwen["suprisal_native_congruent"].sum()))
# print((df_qwen["suprisal_eng_violation"].sum())-(df_qwen["suprisal_eng_congruent"].sum()))