In [1]:
# Cell 1: Setup, Installation, File Creation, and Hugging Face Login
import os
import sys

# --- Clone Repository and Initial Setup ---
if not os.path.exists("ESCoT"):
    print("Cloning ESCoT repository...")
    !git clone https://github.com/TeigenZhang/ESCoT.git
os.chdir("ESCoT")
print(f"Current working directory: {os.getcwd()}")


# --- Define Paths and Create Directories Defensively ---
BASE_MODEL = "meta-llama/Llama-2-7b-chat-hf"
OUTPUT_DIR = "./ESCOT_7B_Checkpoint"
EVAL_OUTPUT_DIR = "./ESCOT_EVAL_RESULTS"

print("Creating required directories...")
os.makedirs("utils", exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(EVAL_OUTPUT_DIR, exist_ok=True)
print("Directories created/verified.")


# --- Install Dependencies ---
print("Installing dependencies...")
# Install all required libraries including trl, bitsandbytes, and evaluation tools
!pip install -r requirements.txt
!pip install accelerate bitsandbytes peft trl transformers torch==2.3.0 rouge-score nltk
# Download NLTK data for evaluation
import nltk
print("Downloading NLTK punkt resource...")
try:
    nltk.data.find('tokenizers/punkt')
    print("NLTK punkt found.")
except LookupError:
    nltk.download('punkt')
    print("NLTK punkt downloaded.")


# --- HUGGING FACE LOGIN ---
from huggingface_hub import login
print("\nLogging into Hugging Face...")
# ⚠️ ACTION REQUIRED: Replace your token here
login(token="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")

Current working directory: /content/ESCoT
Creating required directories...
Directories created/verified.
Installing dependencies...
Downloading NLTK punkt resource...
NLTK punkt found.

Logging into Hugging Face...


In [2]:
# --- A. Create utils/merge.py (Merge Utility) ---
MERGE_PY_CONTENT = """
import os
import torch
from peft import PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    LlamaTokenizer
)

DEFAULT_PAD_TOKEN = "[PAD]"
DEFAULT_EOS_TOKEN = "</s>"
DEFAULT_BOS_TOKEN = "</s>"
DEFAULT_UNK_TOKEN = "</s>"


def merge_llm_with_lora(base_model_name, adapter_model_name, output_name, push_to_hub=False):
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        return_dict=True,
        torch_dtype=torch.bfloat16
    )

    model = PeftModel.from_pretrained(base_model, adapter_model_name)
    model = model.merge_and_unload()

    if "decapoda" in base_model_name.lower():
        tokenizer = LlamaTokenizer.from_pretrained(base_model_name)
        tokenizer.add_special_tokens(
            {
                "eos_token": DEFAULT_EOS_TOKEN,
                "bos_token": DEFAULT_BOS_TOKEN,
                "unk_token": DEFAULT_UNK_TOKEN,
                "pad_token": DEFAULT_PAD_TOKEN,
            }
        )
    else:
        tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_fast=False)

    if push_to_hub:
        print(f"Saving to hub ...")
        model.push_to_hub(f"{base_model_name}-merged", use_temp_dir=False, private=True)
        tokenizer.push_to_hub(f"{base_model_name}-merged", use_temp_dir=False, private=True)
    else:
        output_name = os.path.join(output_name, "final_checkpoint-merged")
        model.save_pretrained(output_name)
        tokenizer.save_pretrained(output_name)
        print(f"Model saved to {output_name}")
"""
with open("utils/merge.py", "w") as f:
    f.write(MERGE_PY_CONTENT)
print("Created utils/merge.py successfully.")


Created utils/merge.py successfully.


In [3]:
# --- B. Create supervised_finetune_llama2_cot.py (Training Script) ---
# NOTE: The content here remains the same as it was already correct.
SFT_PY_CONTENT = """
import os
import argparse
from tqdm import tqdm
from datasets import load_dataset
from peft import LoraConfig
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    LlamaTokenizer,
    TrainingArguments,
    logging,
    set_seed
)
from trl import SFTTrainer
from trl.trainer import ConstantLengthDataset
from utils.merge import merge_llm_with_lora

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--base_model", type=str, default="")
    parser.add_argument("--dataset_name", type=str, default="./data/ablation_data/em_es_ia_sr_re") # Defaulting to your full dataset path
    parser.add_argument("--split", type=str, default="train")
    parser.add_argument("--size_valid_set", type=int, default=4000)
    parser.add_argument("--streaming", action="store_true", default=False)
    parser.add_argument("--shuffle_buffer", type=int, default=5000)

    parser.add_argument("--seq_length", type=int, default=2048) # Your provided value
    parser.add_argument("--max_steps", type=int, default=10000) # Your provided value
    parser.add_argument("--batch_size", type=int, default=8) # Your provided value
    parser.add_argument("--gradient_accumulation_steps", type=int, default=1)
    parser.add_argument("--eos_token_id", type=int, default=2)

    parser.add_argument("--lora_r", type=int, default=16)
    parser.add_argument("--lora_alpha", type=int, default=32)
    parser.add_argument("--lora_dropout", type=float, default=0.05)
    parser.add_argument("--lora_target_modules", type=str, default="q_proj,v_proj,o_proj,k_proj,gate_proj,up_proj,down_proj") # Full LLaMA targets

    parser.add_argument("--learning_rate", type=float, default=1e-5) # Your provided value
    parser.add_argument("--lr_scheduler_type", type=str, default="cosine") # Your provided value
    parser.add_argument("--num_warmup_steps", type=int, default=100)
    parser.add_argument("--weight_decay", type=float, default=0.05)
    parser.add_argument("--warmup_ratio", type=float, default=0.)

    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument("--fp16", action="store_true", default=True)
    parser.add_argument("--no_bf16", action="store_false", default=True)
    parser.add_argument("--no_gradient_checkpointing", action="store_false", default=True)
    parser.add_argument("--seed", type=int, default=1104) # Your provided value
    parser.add_argument("--num_workers", type=int, default=None)
    parser.add_argument("--output_dir", type=str, default="./checkpoints/cot/supervised_llama2_cot") # Your provided path
    parser.add_argument("--log_freq", type=int, default=1)
    parser.add_argument("--eval_freq", type=int, default=500) # Your provided value
    parser.add_argument("--save_freq", type=int, default=500) # Your provided value
    parser.add_argument("--save_total_limit", type=int, default=100) # Your provided value
    parser.add_argument("--resume_from_checkpoint", type=str, default=None)
    parser.add_argument("--run_name", type=str, default="supervised_llama2_cot") # Your provided value
    parser.add_argument("--merge_lora", action="store_true", default=True)

    return parser.parse_args()


def chars_token_ratio(dataset, tokenizer, nb_examples=400):
    total_characters, total_tokens = 0, 0
    max_token_length = 0
    for _, example in tqdm(zip(range(nb_examples), iter(dataset)), total=nb_examples):
        text = prepare_sample_text(example)
        total_characters += len(text)
        if tokenizer.is_fast:
            total_tokens += len(tokenizer(text).tokens())
            if len(tokenizer(text).tokens()) > max_token_length:
                max_token_length = len(tokenizer(text).tokens())
        else:
            total_tokens += len(tokenizer.tokenize(text))
            if len(tokenizer.tokenize(text)) > max_token_length:
                max_token_length = len(tokenizer.tokenize(text))

    print(f"max token length: {max_token_length}")
    return total_characters / total_tokens


def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

def prepare_sample_text(data_point):
    if data_point["input"]:
        return f\"\"\"<s>Human:
{data_point["input"]}
{data_point["instruction"]}
</s><s>Assistant:
{data_point["output"]}
</s>\"\"\"
    else:
        return f\"\"\"<s>Human:
{data_point["instruction"]}
</s><s>Assistant:
{data_point["output"]}
</s>\"\"\"


def create_datasets(tokenizer, args):
    # This assumes the dataset_name points to a directory structure like:
    # ./data/ablation_data/em_es_ia_sr_re/empathetic_dialogue_train.json
    train_json_path = os.path.join(args.dataset_name, "empathetic_dialogue_train.json")
    train_data = load_dataset("json", data_files=train_json_path, split="train")
    train_data = train_data.shuffle(seed=args.seed)

    # Note: Using the train.json path provided in the ablation script as the base path.
    val_json_path = os.path.join(os.path.dirname(args.dataset_name), "empathetic_dialogue_valid.json")
    valid_data = load_dataset("json", data_files=val_json_path, split="train")
    valid_data = valid_data.shuffle(seed=args.seed)

    chars_per_token = 3.6
    print(f"The character to token ratio of the dataset is: {chars_per_token:.2f}")

    train_dataset = ConstantLengthDataset(
        tokenizer,
        train_data,
        formatting_func=prepare_sample_text,
        infinite=True,
        seq_length=args.seq_length,
        chars_per_token=chars_per_token,
    )
    valid_dataset = ConstantLengthDataset(
        tokenizer,
        valid_data,
        formatting_func=prepare_sample_text,
        infinite=False,
        seq_length=args.seq_length,
        chars_per_token=chars_per_token,
    )

    print(f"Size of the train dataset: {len(train_dataset)}")
    print(f"Size of the validation dataset: {len(valid_dataset)}")

    return train_dataset, valid_dataset


def run_training(args, train_data, val_data, tokenizer=None):
    print("Loading the model")

    lora_config = LoraConfig(
        r=args.lora_r,
        lora_alpha=args.lora_alpha,
        lora_dropout=args.lora_dropout,
        target_modules=args.lora_target_modules.split(','),
        bias="none",
        task_type="CAUSAL_LM",
    )

    train_data.start_iteration = 0

    print("Starting main loop")

    training_args = TrainingArguments(
        output_dir=args.output_dir,
        dataloader_drop_last=True,
        evaluation_strategy="steps",
        max_steps=args.max_steps,
        eval_steps=args.eval_freq,
        save_steps=args.save_freq,
        logging_steps=args.log_freq,
        save_total_limit=args.save_total_limit,
        per_device_train_batch_size=args.batch_size,
        per_device_eval_batch_size=args.batch_size,
        learning_rate=args.learning_rate,
        lr_scheduler_type=args.lr_scheduler_type,
        warmup_steps=args.num_warmup_steps,
        gradient_accumulation_steps=args.gradient_accumulation_steps,
        gradient_checkpointing=args.no_gradient_checkpointing,
        fp16=args.fp16,
        bf16=args.no_bf16,
        weight_decay=args.weight_decay,
        warmup_ratio=args.warmup_ratio,
        run_name=args.run_name,
        report_to="wandb",
        ddp_find_unused_parameters=False if int(os.environ.get("WORLD_SIZE", 1)) != 1 else None,
    )

    model = AutoModelForCausalLM.from_pretrained(
        args.base_model,
        load_in_8bit=True,
    )

    if args.resume_from_checkpoint:
        import torch
        from peft import (
            get_peft_model,
            prepare_model_for_int8_training,
            set_peft_model_state_dict
        )
        checkpoint_name = os.path.join(
            args.resume_from_checkpoint, "pytorch_model.bin"
        )
        if not os.path.exists(checkpoint_name):
            checkpoint_name = os.path.join(
                args.resume_from_checkpoint, "adapter_model.bin"
            )
            args.resume_from_checkpoint = None

        if os.path.exists(checkpoint_name):
            print(f"Restarting from {checkpoint_name}")
            model = prepare_model_for_int8_training(model)
            model = get_peft_model(model, lora_config)

            adapters_weights = torch.load(checkpoint_name)
            set_peft_model_state_dict(model, adapters_weights)
        else:
            print(f"Checkpoint {checkpoint_name} not found")

    trainer = SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        args=training_args,
        train_dataset=train_data,
        eval_dataset=val_data,
        peft_config=lora_config,
        max_seq_length=args.seq_length,
        packing=True,
    )

    print_trainable_parameters(model)

    print("Training...")
    trainer.train(resume_from_checkpoint=args.resume_from_checkpoint)

    print("Saving last checkpoint of the model")
    final_model_path = os.path.join(args.output_dir, "final_checkpoint/")
    trainer.model.save_pretrained(final_model_path)

    if args.merge_lora:
        merge_llm_with_lora(args.base_model, final_model_path, args.output_dir)


def main(args):
    if "llama" in args.base_model.lower():
        tokenizer = LlamaTokenizer.from_pretrained(args.base_model)
        tokenizer.add_special_tokens(
            {
                "eos_token": "</s>",
                "bos_token": "</s>",
                "unk_token": "</s>",
                "pad_token": "</s>",
            }
        )
    else:
        tokenizer = AutoTokenizer.from_pretrained(args.base_model, use_fast=False)
        if getattr(tokenizer, "pad_token", None) is None:
            tokenizer.pad_token = tokenizer.eos_token

    train_dataset, eval_dataset = create_datasets(tokenizer, args)
    run_training(args, train_dataset, eval_dataset, tokenizer)


if __name__ == "__main__":
    args = get_args()
    assert args.base_model != "", "Please provide the llama model path"

    set_seed(args.seed)
    os.makedirs(args.output_dir, exist_ok=True)

    logging.set_verbosity_error()

    main(args)
"""
with open("supervised_finetune_llama2_cot.py", "w") as f:
    f.write(SFT_PY_CONTENT)
print("Created supervised_finetune_llama2_cot.py successfully.")


Created supervised_finetune_llama2_cot.py successfully.


In [4]:
# --- C. Create eval_script.py (Evaluation Script) ---
EVAL_SCRIPT_CONTENT = """
import json
from transformers import AutoTokenizer, LlamaForCausalLM
import torch
from tqdm import tqdm
import os
import argparse
from rouge_score import rouge_scorer
import re

# --- Utility Function ---
def prepare_sample_text(data_point):
    if data_point["input"]:
        return f\"\"\"<s>Human:
{data_point["input"]}
{data_point["instruction"]}
</s><s>Assistant:\"\"\"
    else:
        return f\"\"\"<s>Human:
{data_point["instruction"]}
</s><s>Assistant:\"\"\"

# --- Metric Function ---
def calculate_metrics(results):
    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
    rouge_l_scores = []
    strategy_matches = 0
    total_samples = 0

    strategy_pattern = re.compile(r'(?:strategy|recognized_strategy|Strategy):\s*(\w[\w\s]*?)(?:\.|\n|$)', re.IGNORECASE)

    for item in results:
        score = scorer.score(item['label'], item['prediction'])
        rouge_l_scores.append(score['rougeL'].fmeasure)

        total_samples += 1

        pred_match = strategy_pattern.search(item['prediction'])
        label_match = strategy_pattern.search(item['label'])

        if pred_match and label_match:
            pred_strategy = pred_match.group(1).strip()
            label_strategy = label_match.group(1).strip()

            if pred_strategy.lower() == label_strategy.lower():
                strategy_matches += 1

    avg_rouge_l = sum(rouge_l_scores) / len(rouge_l_scores) if rouge_l_scores else 0
    strategy_accuracy = strategy_matches / total_samples if total_samples > 0 else 0

    metrics = {
        "ROUGE-L_F1": avg_rouge_l,
        "Strategy_Accuracy": strategy_accuracy,
        "Total_Samples": total_samples
    }

    return metrics


# --- Main Logic ---
def main(args):
    device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")

    print(f"Loading model from {args.model_path}...")
    model = LlamaForCausalLM.from_pretrained(args.model_path, device_map=device, low_cpu_mem_usage=True)
    tokenizer = AutoTokenizer.from_pretrained(args.model_path)

    with open(args.json_path, 'r', encoding='utf-8') as file:
        data = json.load(file)

    results = []

    for data_point in tqdm(data):
        prepared_text = prepare_sample_text(data_point)
        input_ids = tokenizer(prepared_text, return_tensors="pt").input_ids

        prepared_text_decoded = tokenizer.decode(input_ids[0], skip_special_tokens=False)

        input_ids = input_ids.to(device)

        outputs = model.generate(input_ids, max_new_tokens=500, do_sample=True, top_k=30, top_p=0.85, temperature=0.5, repetition_penalty=1., eos_token_id=2, bos_token_id=1, pad_token_id=0)

        rets = tokenizer.batch_decode(outputs, skip_special_tokens=False, clean_up_tokenization_spaces=False)

        full_output = rets[0].strip()
        response = full_output.replace(prepared_text_decoded, "").strip()

        response = response.replace("</s>", "").strip()

        results.append({'input': data_point['input'], 'label': data_point['output'], 'prediction': response, 'dialog_id': data_point.get('dialog_id', 'N/A')})

    # --- 1. Save results to file ---
    output_filename = 'test_inference_results.json'
    output_filepath = os.path.join(args.output_dir, output_filename)

    with open(output_filepath, 'w', encoding='utf-8') as outfile:
        json.dump(results, outfile, indent=4, ensure_ascii=False)
    print(f"\nInference results saved to: {output_filepath}")

    # --- 2. Calculate and Save Metrics ---
    print("\n--- Calculating Metrics ---")
    final_metrics = calculate_metrics(results)

    metrics_filepath = os.path.join(args.output_dir, 'evaluation_metrics.json')
    with open(metrics_filepath, 'w') as metric_file:
        json.dump(final_metrics, metric_file, indent=4)

    print(f"Metrics saved to: {metrics_filepath}")

    print("\n--- ESCoT BASELINE RESULTS ---")
    print(json.dumps(final_metrics, indent=4))

# --- Argument Parsing ---
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_path", type=str, required=True, help="Path to the ESCoT fine-tuned checkpoint.")
    parser.add_argument("--gpu_id", type=int, default=0, help="The id of the GPU to be used.")
    # Assuming test.json is the target for final evaluation
    parser.add_argument("--json_path", type=str, default="data/test.json", help="Path to the JSON file containing the data.")
    parser.add_argument("--output_dir", type=str, default="evaluation_output", help="Directory to save results.")

    args = parser.parse_args()
    os.makedirs(args.output_dir, exist_ok=True)
    main(args)
"""
with open("eval_script.py", "w") as f:
    f.write(EVAL_SCRIPT_CONTENT)
print("Created eval_script.py successfully.")

print("\nAll files created and setup complete. ")

Created eval_script.py successfully.

All files created and setup complete. 


  strategy_pattern = re.compile(r'(?:strategy|recognized_strategy|Strategy):\s*(\w[\w\s]*?)(?:\.|\n|$)', re.IGNORECASE)


In [4]:
# Cell 2: Execute Training (ESCOT Fine-Tuning)
# NOTE: Set --max_steps to 10000 for your final baseline run!

BASE_MODEL = "meta-llama/Llama-2-7b-chat-hf"
OUTPUT_DIR = "./checkpoints/cot/supervised_llama2_cot"

print(f"Starting training on {BASE_MODEL}. Checkpoint will be saved to {OUTPUT_DIR}/final_checkpoint-merged")

!accelerate launch supervised_finetune_llama2_cot.py \
    --base_model {BASE_MODEL} \
    --output_dir {OUTPUT_DIR} \
    --max_steps 500 \
    --save_freq 100 \
    --eval_freq 500 \
    --dataset_name "./data/ablation_data/em_es_ia_sr_re" \
    --lora_target_modules "q_proj,v_proj,o_proj,k_proj,gate_proj,up_proj,down_proj" \
    --learning_rate 1e-5 \
    --lr_scheduler_type "cosine" \
    --seq_length 2048 \
    --batch_size 8 \
    --run_name "supervised_llama2_cot" \
    --merge_lora

print(f"Training finished. Checkpoint should be saved in {OUTPUT_DIR}/final_checkpoint-merged")

Starting training on meta-llama/Llama-2-7b-chat-hf. Checkpoint will be saved to ./checkpoints/cot/supervised_llama2_cot/final_checkpoint-merged
Traceback (most recent call last):
  File "/usr/local/bin/accelerate", line 4, in <module>
    from accelerate.commands.accelerate_cli import main
  File "/usr/local/lib/python3.12/dist-packages/accelerate/commands/accelerate_cli.py", line 19, in <module>
    from accelerate.commands.estimate import estimate_command_parser
  File "/usr/local/lib/python3.12/dist-packages/accelerate/commands/estimate.py", line 35, in <module>
    import timm
  File "/usr/local/lib/python3.12/dist-packages/timm/__init__.py", line 2, in <module>
    from .layers import (
  File "/usr/local/lib/python3.12/dist-packages/timm/layers/__init__.py", line 1, in <module>
    from ._fx import (
  File "/usr/local/lib/python3.12/dist-packages/timm/layers/_fx.py", line 8, in <module>
    from torchvision.models.feature_extraction import create_feature_extractor as _create_fea

In [6]:
# Cell 4: Direct ESConv Inference (FIXED Syntax Error and Execution)
import os

# --- 1. Define the Script Content (Corrected String Formatting) ---
INFERENCE_PY_CONTENT = """
import json
import torch
import argparse
from transformers import AutoTokenizer, LlamaForCausalLM
import re

def main(args):
    device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")

    print(f"Loading model from {args.model_path}...")
    model = LlamaForCausalLM.from_pretrained(args.model_path, device_map=device, low_cpu_mem_usage=True)
    tokenizer = AutoTokenizer.from_pretrained(args.model_path)

    if args.json_path:
        with open(args.json_path, 'r', encoding='utf-8') as file:
            data = json.load(file)

        if not data or 'dialog' not in data[0]:
            print("Error: Test JSON file is not in expected ESConv format (missing 'dialog').")
            return

        # The test.json is in the ESCoT format, where each entry is a single turn prediction.
        # For direct inference on raw data, we need to extract the full dialog.
        # Since the ESCoT test.json is already turn-split, we'll use the 'dialog' from the first entry's original_data,
        # which represents the history up to the predicted turn.

        # We will use the 'dialog' array from the first entry of the ESCoT test file
        # as the context for the next turn generation.
        if 'original_data' in data[0] and 'dialog' in data[0]['original_data']:
             test_dialogue = data[0]['original_data']['dialog']
        else:
             # Fallback: Use the 'dialog' from the raw ESConv format if the file isn't pre-processed ESCoT
             test_dialogue = data[0]['dialog']

    else:
        print("Error: No input dialogue provided via --json_path.")
        return

    # --- Find the conversation history up to the turn the model needs to respond to (last seeker turn) ---
    # The dialogue from the ESCoT test file is already truncated to the point of generation.
    context_for_model = test_dialogue

    if not context_for_model:
        print("Error: Context is empty.")
        return

    # --- Format the prompt ---
    full_dialog_history = "\\n".join([f"{t['speaker'].capitalize()}: {t['content'].strip()}" for t in context_for_model])

    instruction = "Please provide the next Supporter response, including the Chain-of-Thought steps: emotion, emotion_stimuli, individual_appraisal, recognized_strategy, and strategy_reason, followed by the response."

    # Corrected string concatenation to avoid SyntaxError
    prompt = (
        "<s>Human:\n"
        "The following is an emotional support conversation. Respond as the Supporter to the final Seeker turn.\n"
        "---\n"
        f"{full_dialog_history}\n"
        "---\n"
        f"{instruction}\n"
        "</s><s>Assistant:"
    )

    print("\\n--- GENERATION PROMPT ---")
    print(prompt)
    print("-------------------------")

    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

    print("\\nGenerating response...")
    outputs = model.generate(
        input_ids,
        max_new_tokens=500,
        do_sample=True,
        top_k=30,
        top_p=0.85,
        temperature=0.5,
        repetition_penalty=1.,
        eos_token_id=2,
        bos_token_id=1,
        pad_token_id=0
    )

    rets = tokenizer.batch_decode(outputs, skip_special_tokens=False, clean_up_tokenization_spaces=False)

    full_output = rets[0].strip()

    # Remove the prompt from the full output to get just the model's generation
    response = full_output.replace(prompt, "").strip()
    response = response.replace("</s>", "").strip()

    print("\\n--- ESCoT GENERATION ---")
    print(response)
    print("------------------------")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_path", type=str, required=True, help="Path to the ESCoT fine-tuned checkpoint.")
    parser.add_argument("--gpu_id", type=int, default=0, help="The id of the GPU to be used.")
    parser.add_argument("--json_path", type=str, default="./data/test.json", help="Path to the test JSON file containing ESConv data.")
    parser.add_argument("--output_dir", type=str, default="./ESCOT_DIRECT_INFERENCE", help="Directory for output.")

    args = parser.parse_args()
    os.makedirs(args.output_dir, exist_ok=True)
    main(args)
"""
with open("direct_esconv_inference.py", "w") as f:
    f.write(INFERENCE_PY_CONTENT)
print("Created/Updated direct_esconv_inference.py successfully.")

# --- 2. Run the Inference on the first conversation in the test set ---

OUTPUT_DIR = "./checkpoints/cot/supervised_llama2_cot"
MODEL_PATH = f"{OUTPUT_DIR}/final_checkpoint-merged"
JSON_PATH = "./data/test.json"

print(f"\nRunning direct ESConv inference using model from {MODEL_PATH} on the first dialogue in {JSON_PATH}...")

!python direct_esconv_inference.py \
    --model_path {MODEL_PATH} \
    --gpu_id 0 \
    --json_path {JSON_PATH}

print("\nDirect inference complete. Check the output above for the generated response and CoT steps.")

Created/Updated direct_esconv_inference.py successfully.

Running direct ESConv inference using model from ./checkpoints/cot/supervised_llama2_cot/final_checkpoint-merged on the first dialogue in ./data/test.json...
  File "/content/ESCoT/direct_esconv_inference.py", line 55
    "<s>Human:
    ^
SyntaxError: unterminated string literal (detected at line 55)

Direct inference complete. Check the output above for the generated response and CoT steps.


In [8]:
print("Upgrading transformers and accelerate libraries to resolve LlamaForCausalLM import error...")
!pip install --upgrade transformers accelerate bitsandbytes peft trl
print("Installation complete. Restarting kernel (if in Colab) is recommended, then rerun this cell.")

# If you are not in a Colab environment or if the previous command didn't fix it,
# you may need to run the ESCoT requirements installation again:
# !pip install -r ESCoT/requirements.txt

Upgrading transformers and accelerate libraries to resolve LlamaForCausalLM import error...
Installation complete. Restarting kernel (if in Colab) is recommended, then rerun this cell.


In [11]:
import os
import json
import torch
from transformers import AutoTokenizer, LlamaForCausalLM
from tqdm.auto import tqdm


# --- 1. Define Model and Data ---
OUTPUT_DIR = "./checkpoints/cot/supervised_llama2_cot"
MODEL_PATH = f"{OUTPUT_DIR}/final_checkpoint-merged"

# The RAW ESConv dialogue data you provided, formatted as a dictionary
RAW_ESCONV_DATA = {
    "dialog": [
        {"speaker": "seeker", "content": "Hello\n"},
        {"speaker": "supporter", "content": "Hello, what would you like to talk about?"},
        {"speaker": "seeker", "content": "I am having a lot of anxiety about quitting my current job. It is too stressful but pays well\n"},
        {"speaker": "supporter", "content": "What makes your job stressful for you?"},
        {"speaker": "seeker", "content": "I have to deal with many people in hard financial situations and it is upsetting \n"},
        {"speaker": "supporter", "content": "Do you help your clients to make it to a better financial situation?"},
        {"speaker": "seeker", "content": "I do, but often they are not going to get back to what they want. Many people are going to lose their home when safeguards are lifted \n"},
        {"speaker": "supporter", "content": "But you offer them a better future than what they have currently. It may not be what they wanted, but it helps them in the long run."},
        {"speaker": "seeker", "content": "That is true but sometimes I feel like I should put my feelings and health first \n"},
        {"speaker": "supporter", "content": "I can understand that. "},
        {"speaker": "supporter", "content": "Is there another job that would pay you close to what you currently make?"},
        {"speaker": "seeker", "content": "Probably not. I was with the same company for a long time and I consistently get a bonus every year "},
        {"speaker": "supporter", "content": "Is it possible to reframe how you look at your clients' dire financial situations?"},
        {"speaker": "seeker", "content": "I could try. It mostly gets to me at the end of the day \n"},
        {"speaker": "supporter", "content": "Some people can't do what you do because they don't have the heart to give someone else bad news. The reality is though, someone needs to fill that role and you do help people"},
        {"speaker": "seeker", "content": "That is also true. Sometimes I wonder if it really is for me though \n"},
        {"speaker": "supporter", "content": "I've had to deal with collections before when I was in bad financial condition. The person on the other line was really helpful though. She was understanding,"},
        {"speaker": "supporter", "content": "It may not be for you. I think you should think about the pros and cons of keeping your position. It might make things clearer for you. "},
        {"speaker": "seeker", "content": "That is true. Maybe I just need to sit down and really think about it \n"},
        {"speaker": "supporter", "content": "I wouldn't stay if it really impacts your mental health in a negative way. Still, you may need to zoom out and see the bigger picture: that you provide a needed service and you do it compassionately"},
        {"speaker": "seeker", "content": "It really is a big decision \n"},
        {"speaker": "seeker", "content": "Thank you for the different perspective \n"},
        {"speaker": "supporter", "content": "No doubt, but you know in your heart what is right for you. "},
        {"speaker": "seeker", "                content": "That is true. Thanks again \n"},
        # The next turn (turn 25) is what we want the model to generate.
    ]
}

# --- 2. Prompt Formatting Logic ---

def create_cot_prompt(dialogue):
    # Use all turns up to the last turn in the provided list as context.
    # This ensures the model responds to the final Seeker message.

    context_for_model = dialogue

    full_dialog_history = "\n".join([f"{t['speaker'].capitalize()}: {t['content'].strip()}" for t in context_for_model])

    instruction = "Please provide the next Supporter response, including the Chain-of-Thought steps: emotion, emotion_stimuli, individual_appraisal, recognized_strategy, and strategy_reason, followed by the response."

    # Using the exact prompt template the model was trained on
    prompt = (
        "<s>Human:\n"
        "The following is an emotional support conversation. Respond as the Supporter to the final Seeker turn.\n"
        "---\n"
        f"{full_dialog_history}\n"
        "---\n"
        f"{instruction}\n"
        "</s><s>Assistant:"
    )
    return prompt

# --- 3. Execution ---

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(f"Using device: {device}")

try:
    print(f"Loading model from {MODEL_PATH}...")
    model = LlamaForCausalLM.from_pretrained(MODEL_PATH, device_map=device, low_cpu_mem_usage=True)
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
except Exception as e:
    print(f"\n❌ ERROR loading model: {e}")
    print("Please ensure your training (Cell 2) completed successfully and the merged model is saved at the path above.")
    raise

# Create the prompt based on the provided ESConv dialogue
prompt = create_cot_prompt(RAW_ESCONV_DATA['dialog'])

print("\n--- GENERATION PROMPT (Context for the Model) ---")
print(prompt)
print("-------------------------------------------------")

input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

print("\nGenerating CoT and Response...")
outputs = model.generate(
    input_ids,
    max_new_tokens=500,
    do_sample=True,
    top_k=30,
    top_p=0.85,
    temperature=0.5,
    repetition_penalty=1.,
    eos_token_id=2,
    bos_token_id=1,
    pad_token_id=0
)

rets = tokenizer.batch_decode(outputs, skip_special_tokens=False, clean_up_tokenization_spaces=False)

full_output = rets[0].strip()

# Clean up the output to show only the model's generation
response = full_output.replace(prompt, "").strip()
response = response.replace("</s>", "").strip()

print("\n🎉 --- ESCoT GENERATION (CoT & Response) --- 🎉")
print(response)
print("--------------------------------------------------")

ImportError: cannot import name 'LlamaForCausalLM' from 'transformers' (/usr/local/lib/python3.12/dist-packages/transformers/__init__.py)

In [None]:
import os
import json
import torch
from transformers import AutoTokenizer, LlamaForCausalLM
from tqdm.auto import tqdm

# --- 1. Define the Script Content for BULK Inference ---
BULK_INFERENCE_PY_CONTENT = """
import json
import torch
import argparse
from transformers import AutoTokenizer, LlamaForCausalLM
from tqdm import tqdm

# Function to create the CoT prompt from a single raw dialogue
def create_cot_prompt(dialogue):
    # We respond to the last turn in the provided dialogue, which should be the last Seeker turn.
    context_for_model = dialogue

    # Format the dialogue history
    full_dialog_history = "\\n".join([f"{t['speaker'].capitalize()}: {t['content'].strip()}" for t in context_for_model])

    instruction = "Please provide the next Supporter response, including the Chain-of-Thought steps: emotion, emotion_stimuli, individual_appraisal, recognized_strategy, and strategy_reason, followed by the response."

    # The prompt template must match the training format exactly.
    prompt = (
        "<s>Human:\\n"
        "The following is an emotional support conversation. Respond as the Supporter to the final Seeker turn.\\n"
        "---\\n"
        f"{full_dialog_history}\\n"
        "---\\n"
        f"{instruction}\\n"
        "</s><s>Assistant:"
    )
    return prompt

def main(args):
    device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")

    print(f"Loading model from {args.model_path}...")
    model = LlamaForCausalLM.from_pretrained(args.model_path, device_map=device, low_cpu_mem_usage=True)
    tokenizer = AutoTokenizer.from_pretrained(args.model_path)

    # --- Load the full JSON file ---
    try:
        with open(args.json_path, 'r', encoding='utf-8') as file:
            full_data = json.load(file)
    except FileNotFoundError:
        print(f"\\n❌ ERROR: Input file not found at {args.json_path}")
        return
    except json.JSONDecodeError:
        print(f"\\n❌ ERROR: Could not parse JSON file at {args.json_path}")
        return

    # Assuming the full JSON file is a list of dictionaries, where each dictionary
    # contains the 'dialog' key (the raw ESConv format you provided).
    if not isinstance(full_data, list):
        print("\\n❌ ERROR: JSON file content is not a list of dialogues.")
        return

    all_results = []

    for i, conversation in enumerate(tqdm(full_data, desc="Generating CoT for Dialogues")):
        if 'dialog' not in conversation or not conversation['dialog']:
            print(f"Skipping entry {i}: missing or empty 'dialog' key.")
            continue

        dialogue = conversation['dialog']

        # We need the conversation history up to the last Seeker turn.
        # We find the last Seeker turn to set the context for the model's response.
        last_seeker_index = -1
        for j in range(len(dialogue) - 1, -1, -1):
            if dialogue[j]['speaker'] == 'seeker':
                last_seeker_index = j
                break

        if last_seeker_index == -1:
            # If the conversation is empty or starts with supporter, we skip it
            # or use the full dialogue if the last turn is supporter (if you want to predict the next turn after supporter)
            # For robustness, we skip if no seeker turn is available to respond to.
            continue

        context_for_generation = dialogue[:last_seeker_index + 1]

        prompt = create_cot_prompt(context_for_generation)

        # Generate response
        input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
        outputs = model.generate(
            input_ids,
            max_new_tokens=500,
            do_sample=True,
            top_k=30,
            top_p=0.85,
            temperature=0.5,
            repetition_penalty=1.,
            eos_token_id=2,
            bos_token_id=1,
            pad_token_id=0
        )

        rets = tokenizer.batch_decode(outputs, skip_special_tokens=False, clean_up_tokenization_spaces=False)
        full_output = rets[0].strip()

        response = full_output.replace(prompt, "").strip()
        response = response.replace("</s>", "").strip()

        # Save results, linking back to the original dialogue's info
        result_entry = {
            "original_dialogue_id": conversation.get("id", i), # Use ID if available, otherwise index
            "context_for_model": context_for_generation,
            "generated_cot_and_response": response,
            "original_metadata": {
                k: v for k, v in conversation.items() if k != 'dialog'
            }
        }
        all_results.append(result_entry)

    # --- Save all results ---
    output_filepath = os.path.join(args.output_dir, 'raw_esconv_cot_generations.json')
    os.makedirs(args.output_dir, exist_ok=True)
    with open(output_filepath, 'w', encoding='utf-8') as outfile:
        json.dump(all_results, outfile, indent=4, ensure_ascii=False)

    print(f"\\n\\n🎉 All {len(all_results)} CoT responses saved to: {output_filepath}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_path", type=str, required=True, help="Path to the ESCoT fine-tuned checkpoint.")
    parser.add_argument("--gpu_id", type=int, default=0, help="The id of the GPU to be used.")
    parser.add_argument("--json_path", type=str, required=True, help="Path to the full raw ESConv JSON file.")
    parser.add_argument("--output_dir", type=str, default="./ESCOT_RAW_DATA_RESULTS", help="Directory for output.")

    args = parser.parse_args()
    main(args)
"""
with open("bulk_raw_esconv_inference.py", "w") as f:
    f.write(BULK_INFERENCE_PY_CONTENT)
print("Created bulk_raw_esconv_inference.py successfully.")

# --- 2. Run the BULK Inference ---

# --- Configuration ---
OUTPUT_DIR = "./checkpoints/cot/supervised_llama2_cot"
MODEL_PATH = f"{OUTPUT_DIR}/final_checkpoint-merged"

# ⚠️ YOU MUST REPLACE THIS PLACEHOLDER with the actual name of your file ⚠️
RAW_ESCONV_FILE = "RAW_ESCONV_DATASET.json"

print(f"\nStarting bulk CoT generation using model from {MODEL_PATH} on file: {RAW_ESCONV_FILE}...")

!python bulk_raw_esconv_inference.py \
    --model_path {MODEL_PATH} \
    --gpu_id 0 \
    --json_path {RAW_ESCONV_FILE}

print("\nBulk inference process complete. Check the directory ./ESCOT_RAW_DATA_RESULTS for the final JSON file.")