In [None]:
from train_commons import *

phase = 2
chosen_checkpoints_ids = [14, 6, 10, 20, 1, 16]

chosen_checkpoint_paths = [
    f"divergence_adapters_phase1/divergence_adapter_b12_run_{i}.pth" for i in chosen_checkpoints_ids
]

class TrainingArgs:
    model_id = "Qwen/Qwen2.5-14B-Instruct"
    dataset_path = "../datasets/train.csv"
    output_dir = f"../divergence_adapters_phase{phase}"
    lora_rank = 2
    learning_rate = 1e-3
    epochs = 2
    batch_size = 12
    df_sample_size = 192
    max_new_tokens = 128
    num_eval_samples = 2
    latent_searches = len(chosen_checkpoints_ids)
    lora_scaling = 2

    alpha = 1.0 # KL Coefficient
    beta = 0.3 # Consistency Coefficient
    adapter_checkpoint_path = None

    def set_adapter_checkpoint_path(self, checkpoint_path=None):
        self.adapter_checkpoint_path = checkpoint_path

if __name__ == '__main__':
    args = TrainingArgs()
    
    # Load the base model and tokenizer once
    model_base, tokenizer = load_base_model_and_tokenizer(args)
    
    # Main loop to repeat the training process
    for i in range(1, args.latent_searches + 1):
        args.set_adapter_checkpoint_path(chosen_checkpoint_paths[i-1])
        print("\n" + "#"*80)
        print(f"### STARTING TRAINING RUN {i} of {args.latent_searches} ###")
        print("#"*80)
        
        run_single_training_cycle(args, model_base, tokenizer, run_idx=i)
        
        print(f"--- Finished Training Run {i} ---")
        print("Clearing memory before next run...")
        # Models and adapters from the function are out of scope, so garbage collection will clean them up
        gc.collect()
        torch.cuda.empty_cache()

    print("\nAll training runs completed.")


  from .autonotebook import tqdm as notebook_tqdm


--- Loading Base Model and Tokenizer (once) ---


`torch_dtype` is deprecated! Use `dtype` instead!
Loading checkpoint shards: 100%|██████████| 8/8 [00:55<00:00,  7.00s/it]


--- Base Model and Tokenizer Loaded ---

################################################################################
### STARTING TRAINING RUN 1 of 3 ###
################################################################################
Loading a new, randomly initialized 'model_tuned' for run 1...


Loading checkpoint shards: 100%|██████████| 8/8 [00:13<00:00,  1.64s/it]


Loading adapter weights from checkpoint: divergence_adapters_phase1/divergence_adapter_b12_run_20.pth
Successfully loaded adapter checkpoint.
Run 1: Shared LoRA adapter created with 20,480 parameters.
Run 1: Using 192 samples for training (sampled from 384 total)
Run 1: Starting divergence training...


Run 1 Epoch 1: 100%|██████████| 16/16 [14:49<00:00, 55.61s/it, avg_loss=-0.5626]
Run 1 Epoch 2: 100%|██████████| 16/16 [14:34<00:00, 54.65s/it, avg_loss=-0.4684]
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.



Run 1: Training finished.
Shared LoRA adapter weights saved to './divergence_adapters_phase2/divergence_adapter_b12_run_20.pth'

--- Starting Evaluation for Run 1 ---

PROMPT:
Adult siblings at a family funeral, grief and finality making them more honest
than usual about family dynamics. <|im_start|>sibling1 What's the most
embarrassing thing that happened to you growing up that they never let you live
down? <|im_end|>  <|im_start|>sibling2

--- BASE MODEL OUTPUT ---
Adult siblings at a family funeral, grief and finality making them more honest
than usual about family dynamics. sibling1 What's the most embarrassing thing
that happened to you growing up that they never let you live down?   sibling2
Oh, man. For me, it was when I was in elementary school and I tripped during a
race in gym class. Not only did I fall flat on my face, but I also knocked over
the finish line tape. My siblings have never let me forget it. Every time we
talk about it, they laugh and say I should've practiced 

Loading checkpoint shards: 100%|██████████| 8/8 [00:12<00:00,  1.57s/it]


Loading adapter weights from checkpoint: divergence_adapters_phase1/divergence_adapter_b12_run_1.pth
Successfully loaded adapter checkpoint.
Run 2: Shared LoRA adapter created with 20,480 parameters.
Run 2: Using 192 samples for training (sampled from 384 total)
Run 2: Starting divergence training...


Run 2 Epoch 1: 100%|██████████| 16/16 [15:50<00:00, 59.39s/it, avg_loss=-0.0646]
Run 2 Epoch 2: 100%|██████████| 16/16 [15:41<00:00, 58.85s/it, avg_loss=-0.1161]



Run 2: Training finished.
Shared LoRA adapter weights saved to './divergence_adapters_phase2/divergence_adapter_b12_run_1.pth'

--- Starting Evaluation for Run 2 ---

PROMPT:
Someone in therapy finally being completely honest with themselves, internal
thoughts they've never voiced, breakthrough moments of self-awareness.
<|im_start|>therapist What's a goal you've abandoned, and what's the real reason
you gave up on it? <|im_end|>  <|im_start|>patient_thoughts

--- BASE MODEL OUTPUT ---
Someone in therapy finally being completely honest with themselves, internal
thoughts they've never voiced, breakthrough moments of self-awareness. therapist
What's a goal you've abandoned, and what's the real reason you gave up on it?
patient_thoughts I think about my goal to become a professional musician. I
started giving up on it a few years ago when I was playing gigs regularly and
even had some small successes. But deep down, I always felt like I wasn't really
good enough, that I was just scraping

Loading checkpoint shards: 100%|██████████| 8/8 [00:12<00:00,  1.62s/it]


Loading adapter weights from checkpoint: divergence_adapters_phase1/divergence_adapter_b12_run_16.pth
Successfully loaded adapter checkpoint.
Run 3: Shared LoRA adapter created with 20,480 parameters.
Run 3: Using 192 samples for training (sampled from 384 total)
Run 3: Starting divergence training...


Run 3 Epoch 1: 100%|██████████| 16/16 [15:51<00:00, 59.48s/it, avg_loss=0.0437] 
Run 3 Epoch 2: 100%|██████████| 16/16 [15:52<00:00, 59.54s/it, avg_loss=-0.0630]



Run 3: Training finished.
Shared LoRA adapter weights saved to './divergence_adapters_phase2/divergence_adapter_b12_run_16.pth'

--- Starting Evaluation for Run 3 ---

PROMPT:
Freelance workers who've been at the same coffee shop for hours, gig economy
struggles and isolation. <|im_start|>freelancer1 I spent all day trying to
explain to a client why my rate is what it is. They clearly think I'm
overcharging. How do you handle those kinds of conversations without feeling
like you're begging for scraps? <|im_end|>  <|im_start|>freelancer2

--- BASE MODEL OUTPUT ---
Freelance workers who've been at the same coffee shop for hours, gig economy
struggles and isolation. freelancer1 I spent all day trying to explain to a
client why my rate is what it is. They clearly think I'm overcharging. How do
you handle those kinds of conversations without feeling like you're begging for
scraps?   freelancer2 It's important to remember that your time and skills are
valuable. Here are some steps to handle