# BU-2024 Experiment Notebook

**Paper ID:** BU-2024  
**Paper Title:** Bias Unveiled: Investigating Social Bias in LLM-Generated Code  

### Goal

Replicate the Solar framework using metamorphic testing:
- Load task definitions with 7 demographic dimensions.
- Generate code completions using state-of-the-art models.
- Evaluate using Code Bias Score (CBS) and iterative mitigation strategies.


### Experiment Metadata

In [1]:
PAPER_ID = "BU-2024"
PAPER_TITLE = "Bias Unveiled: Investigating Social Bias in LLM-Generated Code"

MODEL_NAME = "Salesforce/codegen-350M-mono"
MODEL_TAG = "codegen350M"

DOMAIN = "Metamorphic Bias Auditing / Solar Framework"

SENSITIVE_ATTRS = ["race", "age", "marital_status", "education", "gender", "religion", "employment_status"]

MAX_NEW_TOKENS = 150
TEMPERATURE = 0.6
DO_SAMPLE = True

### Imports and Environment Check

In [2]:
import os
import json
from datetime import datetime
from pathlib import Path
from transformers import pipeline, set_seed

def check_pkg(name):
    try:
        __import__(name)
        return True
    except Exception as e:
        return f"Missing or error: {e}"

checks = {
    "torch": check_pkg("torch"),
    "transformers": check_pkg("transformers"),
}

checks

{'torch': True, 'transformers': True}

### Create Experiment Folders and Initialize Log

In [3]:
import csv
PHASE = "baseline"
TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")
RUN_ID = f"{PAPER_ID}_{MODEL_TAG}_{PHASE}_{TIMESTAMP}"

ROOT = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
PROMPT_DIR = ROOT / "prompts" / PAPER_ID
PAPER_OUTPUT_ROOT = ROOT / "outputs" / PAPER_ID
RUN_DIR = PAPER_OUTPUT_ROOT / PHASE / "runs" / RUN_ID

GEN_DIR = RUN_DIR / "generated"
AST_DIR = RUN_DIR / "ast_extract"
TEST_DIR = RUN_DIR / "tests_generated"
METRICS_DIR = PAPER_OUTPUT_ROOT / "metrics"
MANIFEST_DIR = PAPER_OUTPUT_ROOT / "manifests"
NOTES_DIR = ROOT / "notes"

for d in [GEN_DIR, AST_DIR, TEST_DIR, METRICS_DIR, MANIFEST_DIR, NOTES_DIR]:
    d.mkdir(parents=True, exist_ok=True)

def update_manifests(run_id, paper_id, phase, model, timestamp, gen_path, metrics_path):
    entry = {
        "Run_ID": run_id, "Paper_ID": paper_id, "Phase": phase, 
        "Model": model, "Timestamp": timestamp, 
        "Generated_Path": str(gen_path), "Metrics_Path": str(metrics_path)
    }
    p_manifest = MANIFEST_DIR / "run_manifest.csv"
    write_header = not p_manifest.exists()
    with open(p_manifest, "a", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=entry.keys())
        if write_header: writer.writeheader()
        writer.writerow(entry)
    g_manifest = ROOT / "outputs" / "run_manifest_all.csv"
    with open(g_manifest, "a", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=entry.keys())
        writer.writerow(entry)

LOG_PATH = NOTES_DIR / f"{PAPER_ID}_notes.md"
print(f"Run ID: {RUN_ID}")
print(f"Experiment structure ready for {PAPER_ID}")


Run ID: BU-2024_codegen350M_baseline_20260219_175651
Experiment structure ready for BU-2024


### Load Metamorphic Tasks

In [4]:
PROMPT_FILE = PROMPT_DIR / f"{PAPER_ID}_prompts.json"
with open(PROMPT_FILE, "r") as f:
    data = json.load(f)

tasks = data["tasks"]
print(f"Loaded {len(tasks)} metamorphic tasks.")

Loaded 1 metamorphic tasks.


### Load Model Pipeline

In [5]:
generator = pipeline("text-generation", model=MODEL_NAME, device=-1)
print("Model loaded successfully.")

Some weights of the model checkpoint at Salesforce/codegen-350M-mono were not used when initializing CodeGenForCausalLM: ['transformer.h.0.attn.causal_mask', 'transformer.h.1.attn.causal_mask', 'transformer.h.10.attn.causal_mask', 'transformer.h.11.attn.causal_mask', 'transformer.h.12.attn.causal_mask', 'transformer.h.13.attn.causal_mask', 'transformer.h.14.attn.causal_mask', 'transformer.h.15.attn.causal_mask', 'transformer.h.16.attn.causal_mask', 'transformer.h.17.attn.causal_mask', 'transformer.h.18.attn.causal_mask', 'transformer.h.19.attn.causal_mask', 'transformer.h.2.attn.causal_mask', 'transformer.h.3.attn.causal_mask', 'transformer.h.4.attn.causal_mask', 'transformer.h.5.attn.causal_mask', 'transformer.h.6.attn.causal_mask', 'transformer.h.7.attn.causal_mask', 'transformer.h.8.attn.causal_mask', 'transformer.h.9.attn.causal_mask']
- This IS expected if you are initializing CodeGenForCausalLM from the checkpoint of a model trained on another task or with another architecture (e

Model loaded successfully.


### Run Solar Framework Generations

In [6]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

for task in tasks:
    prompt_text = f"# Task: {task['definition']}\n{task['docstring']}\nclass {task['class_name']}:\n    def {task['method_name']}(self):"
    out = generator(prompt_text, max_new_tokens=MAX_NEW_TOKENS, temperature=TEMPERATURE, do_sample=True)
    generated_code = out[0]["generated_text"]
    
    out_file = GEN_DIR / f"{PAPER_ID}_{MODEL_TAG}_{task['id']}_{timestamp}.py"
    with open(out_file, "w") as f:
        f.write(generated_code)
    
    print(f"Completed solar task {task['id']}. Result saved to {out_file}")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Completed solar task solar_task_01. Result saved to /Users/dhrubadatta/Documents/Research/CodeAudit X/Codes/outputs/BU-2024/baseline/runs/BU-2024_codegen350M_baseline_20260219_175651/generated/BU-2024_codegen350M_solar_task_01_20260219_175653.py


### Update Notes with Findings

In [7]:
with open(LOG_PATH, "a", encoding="utf-8") as log:
    log.write(f"\n## Experiment Run: {datetime.now().isoformat()}\n")
    log.write(f"- Status: Metamorphic Solar probes complete.\n")
    log.write(f"- Outputs: {RUN_DIR}\n")
print(f"Notes updated at {LOG_PATH}")

update_manifests(
    run_id=RUN_ID,
    paper_id=PAPER_ID,
    phase=PHASE,
    model=MODEL_TAG,
    timestamp=TIMESTAMP,
    gen_path=GEN_DIR.relative_to(ROOT),
    metrics_path=METRICS_DIR.relative_to(ROOT)
)



Notes updated at /Users/dhrubadatta/Documents/Research/CodeAudit X/Codes/notes/BU-2024_notes.md
