COGS150 Final Project<br>
Ben Chen<br>
PID: A19062681<br>

**Research Question: Are large language models more sensitive to cultural norm violation in that culture's native language compared to English?**


In [None]:
%pip install transformers



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import json
from tqdm import tqdm
import gc

import os
import json
from google.colab import drive
from datetime import datetime


%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f" Running on device: {device}")

 Running on device: cuda


In [None]:
def _get_prob(model, tokenizer, context, target):
    """
    Calculate the next token probability in an autoregressive way
    P(Total) = p1 * p2 * p3 ...
    Input:
    LLM model, tokenizer, context (prompt), target (next token)
    Output:
    prob, target_tokens
    """
    model_device = model.device
    input_ids = tokenizer.encode(context, return_tensors="pt").to(model_device)
    # input_tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())
    target_ids = tokenizer.encode(target, add_special_tokens=False)
    target_tokens = tokenizer.convert_ids_to_tokens(target_ids)

    log_probs = []

    with torch.no_grad():
        for t_id in target_ids:
            outputs = model(input_ids)

            # Get output probs
            next_token_logits = outputs.logits[0, -1, :] # [batch 0, Last token, All voacb]
            next_token_log_probs = torch.nn.functional.log_softmax(next_token_logits, dim=0)

            # Store target token log prob
            token_log_prob = next_token_log_probs[t_id].item()
            log_probs.append(token_log_prob)

            # Prepare next step
            next_token = torch.tensor([[t_id]]).to(model_device)
            input_ids = torch.cat([input_ids, next_token], dim=1)

    # Sum token logs for the unseen
    total_log_prob = sum(log_probs)

    return np.exp(total_log_prob), target_tokens

def _get_suprisal(total_prob):
    """
    Converts raw probability to bits of surprisal
    """
    if total_prob <= 0: return 0.0 # Avoid math errors
    return -np.log2(total_prob)


def _save_experiment(df, filename_base="COGS150_experiment_results_signal"):

  """
  Save the LLMology experiment result to cd in Google Drive, with timestamp
  """

  if not os.path.exists('/content/drive'):
        print("Mounting Google Drive...")
        drive.mount('/content/drive')

  project_folder = "/content/drive/MyDrive/UCSD_Academics/Fall25/COGS150"

  # Generate file name
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  full_filename = f"{filename_base}_{timestamp}.csv"

  save_path = os.path.join(project_folder, full_filename)

  try:
    df.to_csv(save_path, index=False)
    print(f"Experiment results saved to: {save_path}")
  except Exception as e:
    print(f"Error saving experiment results: {e}")


In [None]:
# Defining Models
MODEL_NAMES = [
    ("GPT2-XL", "gpt2-xl"),
    ("Qwen-1.5B", "Qwen/Qwen2.5-1.5B"),
    ("BLOOM-1.7B", "bigscience/bloom-1b7"),
    ("pythia-1.4b", "EleutherAI/pythia-1.4b")
]

In [None]:
# Load data

# Force Mount Drive
drive.mount('/content/drive', force_remount=True)

try:
  with open("/content/drive/MyDrive/UCSD_Academics/Fall25/COGS150/cultural_stimuli_no_signal.json", "r") as f:
    stimuli_data = json.load(f) # stimuli_data a list of dictionaries
  print(f"loaded {len(stimuli_data)} cultural stimuli")

except FileNotFoundError:
  print("Error, file not found")
  stimuli_data = []

#print(stimuli_data)


Mounted at /content/drive
loaded 10 cultural stimuli


In [None]:
# Check imported prompt content
for trial in stimuli_data:
  print(trial["prompts"]["eng"]["context"])

At home, I eat rice with
During a family dinner, celebrating the Lunar new year, we had
When I told my grandmother my stomach hurt, she told me to drink some
In a Chinese wedding the bride dresses in
When someone sneezes next to you in China, you say
Apartments skip the floor number
A married man will never wear a hat in the color
Because it is his Chinese zodiac year, his mother asked him to wear underwear in the color
On a christmas eve, it is a popular custom for people to give their friends
When an elder voluntarily gives you a red packet, your first reaction should be to politely


In [None]:
# Main Experiment Loop

all_results = []

for model_name, hf_path in MODEL_NAMES:

  print(f'Running: {model_name}')

  try:
    # Specify tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(hf_path)
    model = AutoModelForCausalLM.from_pretrained(
              hf_path,
              device_map="auto",
              torch_dtype=torch.float16,
              trust_remote_code=True
          ).eval()


    # Run Experiment
    print("Experiment in Progress")
    for trial in stimuli_data:

      # Native Prompt non violation
      prob_native_congruent, tokens_native_congruent = _get_prob(
          model,
          tokenizer,
          trial["prompts"]["native"]["context"],
          trial["prompts"]["native"]["target_congruent"]
          )

      suprisal_native_congruent = _get_suprisal(
          prob_native_congruent
          )

      # Native Prompt violation
      prob_native_violation, tokens_native_violation = _get_prob(
          model,
          tokenizer,
          trial["prompts"]["native"]["context"],
          trial["prompts"]["native"]["target_violation"]
          )

      suprisal_native_violation = _get_suprisal(
          prob_native_violation
          )

      # English Prompt non violation
      prob_eng_congruent, tokens_eng_congruent = _get_prob(
          model,
          tokenizer,
          trial["prompts"]["eng"]["context"],
          trial["prompts"]["eng"]["target_congruent"]
          )

      suprisal_eng_congruent = _get_suprisal(
          prob_eng_congruent
          )

      # English Prompt violation
      prob_eng_violation, tokens_eng_violation = _get_prob(
          model,
          tokenizer,
          trial["prompts"]["eng"]["context"],
          trial["prompts"]["eng"]["target_violation"]
          )

      suprisal_eng_violation = _get_suprisal(
          prob_eng_violation
          )

      all_results.append({
          "model": model_name,
          "trial": trial['norm'],
          "prob_native_congruent": prob_native_congruent,
          "prob_native_violation": prob_native_violation,
          "prob_eng_congruent": prob_eng_congruent,
          "prob_eng_violation": prob_eng_violation,
          "suprisal_native_congruent": suprisal_native_congruent,
          "suprisal_native_violation": suprisal_native_violation,
          "suprisal_eng_congruent": suprisal_eng_congruent,
          "suprisal_eng_violation": suprisal_eng_violation,
          "tokens_native_congruent": tokens_native_congruent,
          "tokens_native_violation": tokens_native_violation,
          "tokens_eng_congruent": tokens_eng_congruent,
          "tokens_eng_violation": tokens_eng_violation
      })

    # Clean up RAM
    print(f"Finished running {model}")
    print("Unloading RAM")
    del model
    del tokenizer
    gc.collect
    if torch.cuda.is_available(): torch.cuda.empty_cache()
    if torch.backends.mps.is_available(): torch.mps.empty_cache()

  except Exception as e:
          print(f" Failed to run {model}: {e}")

df_results = pd.DataFrame(all_results)

if 'df_results' in locals() and not df_results.empty:
    _save_experiment(df_results, "experiment_reuslts_no_signal")
else:
    print("No data to save")


Running: GPT2-XL


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/689 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/6.43G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Experiment in Progress
Finished running GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1600)
    (wpe): Embedding(1024, 1600)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-47): 48 x GPT2Block(
        (ln_1): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=4800, nx=1600)
          (c_proj): Conv1D(nf=1600, nx=1600)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=6400, nx=1600)
          (c_proj): Conv1D(nf=1600, nx=6400)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1600, out_features=50257, bias=False)
)
Un

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

Experiment in Progress
Finished running Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 1536)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=1536, out_features=1536, bias=True)
          (k_proj): Linear(in_features=1536, out_features=256, bias=True)
          (v_proj): Linear(in_features=1536, out_features=256, bias=True)
          (o_proj): Linear(in_features=1536, out_features=1536, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=1536, out_features=8960, bias=False)
          (up_proj): Linear(in_features=1536, out_features=8960, bias=False)
          (down_proj): Linear(in_features=8960, out_features=1536, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
      )
    )
    (norm): Qwe

tokenizer_config.json:   0%|          | 0.00/222 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/715 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

Experiment in Progress
Finished running BloomForCausalLM(
  (transformer): BloomModel(
    (word_embeddings): Embedding(250880, 2048)
    (word_embeddings_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
    (h): ModuleList(
      (0-23): 24 x BloomBlock(
        (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (self_attention): BloomAttention(
          (query_key_value): Linear(in_features=2048, out_features=6144, bias=True)
          (dense): Linear(in_features=2048, out_features=2048, bias=True)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (post_attention_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (mlp): BloomMLP(
          (dense_h_to_4h): Linear(in_features=2048, out_features=8192, bias=True)
          (gelu_impl): BloomGelu()
          (dense_4h_to_h): Linear(in_features=8192, out_features=2048, bias=True)
        )
      )
    )
    (ln_f): LayerNorm((2048,), eps

tokenizer_config.json:   0%|          | 0.00/396 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.93G [00:00<?, ?B/s]

Experiment in Progress
Finished running GPTNeoXForCausalLM(
  (gpt_neox): GPTNeoXModel(
    (embed_in): Embedding(50304, 2048)
    (emb_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-23): 24 x GPTNeoXLayer(
        (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (post_attention_dropout): Dropout(p=0.0, inplace=False)
        (post_mlp_dropout): Dropout(p=0.0, inplace=False)
        (attention): GPTNeoXAttention(
          (query_key_value): Linear(in_features=2048, out_features=6144, bias=True)
          (dense): Linear(in_features=2048, out_features=2048, bias=True)
        )
        (mlp): GPTNeoXMLP(
          (dense_h_to_4h): Linear(in_features=2048, out_features=8192, bias=True)
          (dense_4h_to_h): Linear(in_features=8192, out_features=2048, bias=True)
          (act): GELUActivation()
        )
      )
    )
    (final_lay

In [None]:
# Quick Visualizing Check
df_results.head()
# df_qwen = df_results[df_results["model"] == "Qwen-1.5B"]
# df_qwen.head()
# print((df_qwen["suprisal_native_violation"].sum())-(df_qwen["suprisal_native_congruent"].sum()))
# print((df_qwen["suprisal_eng_violation"].sum())-(df_qwen["suprisal_eng_congruent"].sum()))

Unnamed: 0,model,trial,prob_native_congruent,prob_native_violation,prob_eng_congruent,prob_eng_violation,suprisal_native_congruent,suprisal_native_violation,suprisal_eng_congruent,suprisal_eng_violation,tokens_native_congruent,tokens_native_violation,tokens_eng_congruent,tokens_eng_violation
0,GPT2-XL,Utensils (Implicit),4.485091e-08,1.359884e-08,5.261556e-08,6.837927e-09,24.410287,26.131941,24.179935,27.123794,"[ç, Ń·, åŃĲ]","[åı, ī, åŃĲ]","[ch, op, sticks]","[a, Ġfork]"
1,GPT2-XL,Lunar New Year Food (Implicit),1.123324e-06,1.136272e-11,1.043366e-10,3.831985e-09,19.763795,36.356901,33.158035,27.959261,"[é, ¥, º, åŃĲ]","[æĦ, ı, å¤§, åĪ, ©, é, Ŀ, ¢]","[d, um, plings]","[past, a]"
2,GPT2-XL,Hot Water Remedy (Implicit),6.158312e-07,1.565217e-08,9.03939e-07,9.698242e-10,20.630962,25.929062,20.077271,29.941558,"[ç, ĥ, Ń, æ°, ´]","[åĨ, °, æ°, ´]","[hot, Ġwater]","[iced, Ġwater]"
3,GPT2-XL,Wedding Color (Implicit),5.797886e-05,7.347378e-06,6.733925e-07,3.366751e-06,14.074114,17.054339,20.502049,18.180212,"[ç, º, ¢, è, ī, ²]","[ç, Ļ½, è, ī, ²]",[red],[white]
4,GPT2-XL,Sneeze Response (Implicit),8.868264e-09,2.427432e-21,6.036251e-07,8.551591e-09,26.748701,68.481059,20.659844,26.80116,"[ä½, ł, ä¸į, è¯, ´, ä», Ģ, ä¹, Ī]","[ä½, ł, è¯, ´, ä¸Ĭ, å¸, Ŀ, ä¿, Ŀ, ä½, ĳ]",[nothing],"[b, less, Ġyou]"
