In [1]:
import os
import json
import argparse
from time import sleep
from typing import Any, Tuple

import wandb
from peft import LoraConfig, PeftModel
from datasets import Dataset, concatenate_datasets
from trl import DPOTrainer, SFTTrainer, DataCollatorForCompletionOnlyLM

from src.logger import logger
from src.models import get_model
from src.dataset.feedback_utils import Feedback, Type
from src.lcdpo import LocallyConstrainedDPOTrainer
from src.sft_weighted import WeightedSFTTrainer
from src.dataset.format import to_dpo, to_sft, to_lcdpo, to_sft_weighted
from src.feedback import manual_feedback as all_feedback
from src.utils import get_args, find_all_linear_names, dump_arg_dicts, PeftSavingCallback, get_train_file_name, print_num_trainable_params, TrainingArguments, find_file_with_prefix

In [2]:
# Command line arguments for the modal genearation
# --arg-file configs/config_dpo.json --do-train --feedback-prefix "Be more detailed" --run-id test

# modal run src.modal.app --arg-file configs/config_dpo.json --do-sample --feedback-prefix "Be more detailed" --run-id test

from openai import OpenAI
client = OpenAI()
def get_oai_response(prompt: str) -> str:
    completion = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {"role": "system", "content": "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."},
            {"role": "user", "content": prompt}
        ]
    )
    return completion.choices[0].message

get_oai_response("sss")

ChatCompletionMessage(content="Hmm, it seems like your message might have slipped. Could you clarify your question or need? I'm here to help, be it a storm of code or a whisper of curiosity!", role='assistant', function_call=None, tool_calls=None)

Command to run sample

Make sure to create the ./data folder first? 

```bash
python src/sample.py --arg_file configs/config_dpo.json --run_id test_ksgk --feedback_prefix "Always use some" --data_dir ./data
```

In [5]:
# Load data that took 1 day to generate
# from src.feedback import feedback
run_dir = "data_/test_ksgk/sample"
feedback = all_feedback[0]
feedback.load_dataset(run_dir)

In [50]:
# prompts = feedback[0].prompts
# Given a piece of feedback, that apply to a specific domain
# Prompt follows into that specific domain
# When you are not in that domain, the feedback should NOT overgeneralize
# The Caveat is in the negative_prompts: These are the prompts that is prune to over-generalization of feedback, and one should avoid it.

# The forming of DPO pairs is also quite simple : You use (baseline_response, ICL response) pair as the preference pairs. 
# Of course, with ORPO things are even better as you need a SFT element inside

# prompts['train'][44] # in-context-response is the best it seems
# The Key here is that LLM when getting tuned, refuse to take the hard way out ---> adopting the actual reasoning process
# The LLM is a lazy learner, and it will take the easy way out, and that is to memorize the data, this means 'emoji' ---> using emoji and not avoiding it
# That means 'use emoji when xxx' ----> using emoji all the time
# The innate skethiness of the LLM means that it is not a reasoning machine 
# In that sense it would be interesting if we phrase it like that 

feedback[0].negative_prompts['train'][11]

{'prompt': 'Create a fictional dialogue between two friends who resolve a misunderstanding over a tweet using direct messages.',
 'baseline_response': 'Friend 1 (F1): *sighs deeply*, I can\'t believe this is still causing a rift between us, Mike. Your tweet about the new movie was just a harmless joke, right?\n\nFriend 2 (F2): Hey, Sam, I know it was just a joke, but I couldn\'t help feeling a little defensive when you retweeted it with the comment "Guess I\'ll be skipping this one then." It felt like a personal attack.\n\nFriend 1 (F1): I didn\'t mean it that way, Mike. I was just expressing my own opinion about the movie. I didn\'t realize you were such a big fan. I\'m sorry if I hurt your feelings.\n\nFriend 2 (F2): I appreciate the apology, Sam. And I should have communicated better with you instead of getting defensive. I know you wouldn\'t intentionally hurt my feelings.\n\nFriend 1 (F1): Absolutely, Mike. And I\'ll be more mindful of my words in the future. But I also hope we ca

In [67]:
feedback[0].general_prompts

DatasetDict({
    train: Dataset({
        features: ['prompt', 'baseline_response', 'revised_response', 'in_context_response', 'cot_response'],
        num_rows: 864
    })
    test: Dataset({
        features: ['prompt', 'baseline_response', 'revised_response', 'in_context_response', 'cot_response'],
        num_rows: 96
    })
})

In [60]:
from datasets import DatasetDict

# Dummny Dataset
dataset = DatasetDict({
    "train": Dataset.from_dict({
        "prompt": ["ss"],
        "baseline_response": ["sss"],
        "in_context_response": ["sss"],
    }),
    "test": Dataset.from_dict({
        "prompt": ["ss"],
        "baseline_response": ["sss"],
        "in_context_response": ["sss"],
    })
})

dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'baseline_response', 'in_context_response'],
        num_rows: 1
    })
    test: Dataset({
        features: ['prompt', 'baseline_response', 'in_context_response'],
        num_rows: 1
    })
})

In [56]:
 
# Construct the dataset
# dataset = 

dataset = dataset_constructor(
        prompts,
        negative_prompts if (training_args.negative_prompt_ratio > 0 or training_args.algo == "lcdpo" or training_args.algo == "sft_weighted") else None,
        general_prompts if (training_args.negative_prompt_ratio > 0 or training_args.algo == "lcdpo" or training_args.algo == "sft_weighted") else None,
        model_args.train_model.model_name_or_path)


trainer = LocallyConstrainedDPOTrainer(
            model=model.model,
            max_length=2048,
            max_prompt_length=1024,
            args=training_args,
            beta=training_args.dpo_beta,
            kd_lambda=training_args.lcdpo_lambda,
            kd_temperature=training_args.lcdpo_temp,
            sigma_soft=training_args.lcdpo_sigma_soft,
            sigma_hard=training_args.lcdpo_sigma_hard,
            use_avg_kl=training_args.lcdpo_avg_kl,
            custom_sft_loss=training_args.lcdpo_custom_sft_loss,
            train_dataset=dataset,
            eval_dataset=eval_dataset,
            tokenizer=model.tokenizer,
            response_template=response_template,
            peft_config=peft_config,
            callbacks=[PeftSavingCallback] if training_args.lora_enable else None
)

DatasetDict({
    train: Dataset({
        features: ['prompt', 'baseline_response', 'revised_response', 'in_context_response', 'cot_response'],
        num_rows: 864
    })
    test: Dataset({
        features: ['prompt', 'baseline_response', 'revised_response', 'in_context_response', 'cot_response'],
        num_rows: 96
    })
})

In [52]:
# Now this cracks it open a little bit (really small bit)
import argparse
import json

parser = argparse.ArgumentParser()
parser.add_argument("--arg_file", type=str, default="configs/config_dpo.json")
parser.add_argument("--run_id", type=str, default="test_ksgk")
parser.add_argument("--data_dir", type=str, default="./data")
parser.add_argument("--feedback_prefix", type=str, default="Always use some heart")
args = parser.parse_args("")

with open(args.arg_file, "r") as f:
    arg_dict = json.load(f)

feedback = all_feedback
if args.feedback_prefix is not None: # This unfortunately is basically a prefix-filtering stuff
    feedback = [f for f in feedback if f.content.startswith(args.feedback_prefix)]

In [69]:
# model_args, sample_args, training_args, eval_args = get_args(arg_dict) # This hurts my debugging session ... 

# BreakDown when we have issues | Following code now works
from src.utils import *
modal_arg_dict = arg_dict["model_args"]
sample_arg_dict = arg_dict["sample_args"]
training_arg_dict = arg_dict["training_args"]
eval_arg_dict = arg_dict["eval_args"]

# HfArgumentParse parse on a python dictionary object, this is quite convenient wrapper
model_arg_parser = HfArgumentParser(PipelineModelsArguments)
model_args: PipelineModelsArguments = model_arg_parser.parse_dict(modal_arg_dict)[0]
sample_arg_parser = HfArgumentParser(SampleArguments)
sample_args: SampleArguments = sample_arg_parser.parse_dict(sample_arg_dict)[0]

# Issue Spot on MPS: Float16 not supported 
# training_arg_parser = HfArgumentParser(TrainingArguments)
# training_args: TrainingArguments = training_arg_parser.parse_dict(training_arg_dict)[0]

# Rest seems fine
eval_arg_parser = HfArgumentParser(EvalArguments)
eval_args: EvalArguments = eval_arg_parser.parse_dict(eval_arg_dict)[0]

ValueError: BF16 Mixed precision training with AMP (`--bf16`) and BF16 half precision evaluation (`--bf16_full_eval`) can only be used on CUDA, XPU (with IPEX), NPU, MLU or CPU/TPU/NeuronCore devices.

In [51]:

# model = get_model(model_args.train_model)

In [6]:
import numpy as np
np.ceil((max(sample_args.num_prompts, sample_args.num_general_prompts)) / sample_args.prompts_per_category)

30.0

In [9]:
from src.sample import (
    SAMPLE_PROMPT_CATEGORIES,
    SAMPLE_PROMPT_CATEGORIES_CONFIG,
    SAMPLE_PROMPTS,
    SAMPLE_PROMPTS_CONFIG,
    SAMPLE_NEGATIVE_PROMPTS,
    SAMPLE_NEGATIVE_PROMPTS_CONFIG,
    split_numbered_list,
    sample_categories,
    sample_prompts,
)


num_categories = np.ceil((max(sample_args.num_prompts, sample_args.num_general_prompts)) / sample_args.prompts_per_category)
num_categories = 5
# Note: num_categories should be no less than 5 to work with the current prompt (few shot examples are 1/2/3/4/5... more than 5)
# sample_categories(feedback, model_args.category_model, num_categories) 
negative = False
prompt = SAMPLE_PROMPTS if not negative else SAMPLE_NEGATIVE_PROMPTS
prompt_config = SAMPLE_PROMPTS_CONFIG if not negative else SAMPLE_NEGATIVE_PROMPTS_CONFIG
prompt_model = get_model(model_args.prompt_model)
prompts_per_category = sample_args.prompts_per_category

# Get responses for flattened list of prompts | Fix on the rate limits Issue --> Wait for the release
responses = prompt_model.get_responses(
        [[prompt.format(count=prompts_per_category, domain=f.domain, category=c)]
         for f in feedback for c in f.categories],
    prompt_config)

Length of batch:  1


100%|██████████| 1/1 [00:50<00:00, 50.12s/it]


In [15]:
sample_prompts(feedback, model_args.prompt_model, sample_args.num_prompts, sample_args.prompts_per_category)

  1%|          | 26/2459 [01:03<1:38:40,  2.43s/it]


KeyboardInterrupt: 

In [15]:
from src.utils import ModelArguments
from src.sample import sample_prompts, SAMPLE_PROMPTS, SAMPLE_NEGATIVE_PROMPTS, SAMPLE_PROMPTS_CONFIG, SAMPLE_NEGATIVE_PROMPTS_CONFIG

prompt_model_args = model_args.prompt_model
category_model_args = model_args.category_model
completion_model_args = model_args.completion_model
quality_model_args = model_args.qualitative_eval_model

negative = False 

prompt = SAMPLE_PROMPTS if not negative else SAMPLE_NEGATIVE_PROMPTS
prompt_config = SAMPLE_PROMPTS_CONFIG if not negative else SAMPLE_NEGATIVE_PROMPTS_CONFIG

# Loaded Model
####################################################################
# Rate Limit Exceeded: To be Fair, this exceeds limit after 12 sec #
####################################################################
# Alternative: No OpenAI, what is the next best thing? Opus..
# Fix it heads on: The configurable Rate Limite per Minute should be tuned and changed

prompt_model = get_model(category_model_args)
# prompt_model = get_model(completion_model_args)
# Sampling Steps obtains a bunch of prompt for in-domain / out-domain model_args
prompts_per_category = 1

responses = []
for f in feedback:
    for c in f.categories:
        # break
        prompt_text = prompt.format(count=prompts_per_category, domain=f.domain, category=c)
        break
        # My GPT-4 call does NOT exceeds rate limit ?
        # responses.append(prompt_model.get_responses(prompt_text, prompt_config))
        # time.sleep(60)  # Sleep for 16 seconds after each call
    break


In [10]:
# prompt_text
# prompt_config
get_oai_response(prompt_text)


ChatCompletionMessage(content="1. Write a workout guide for increasing upper body strength using only manual resistance exercises described in the 'Manual_8 Fitness Handbook.'\n2. Create a comprehensive meal plan for building lean muscle based on the nutritional guidelines provided in chapter 4 of the 'Manual_8 Fitness Guide.'\n3. Explain the step-by-step process of the 'Ultra-Flex' routine from Manual_8, focusing on how it caters to improving flexibility and balance.\n4. Provide a detailed analysis of the cardiovascular training section found in 'Manual_8', including its effectiveness for increasing stamina.", role='assistant', function_call=None, tool_calls=None)

In [9]:

prompt_text = prompt.format(count=prompts_per_category, domain=f.domain, category=c)
print(prompt_text)

You are a helpful assistant that always closely follows instructions. You are provided with a topic, and category. Your job is to come up with 1 actionable prompts that fulfill the following criteria:

- All prompts must fall within the category provided
- All prompts must be phrased in a way that both the prompt and eventual response will ALWAYS BE WITHIN the topic
- If a human had to modify all responses that fall within the topic, your prompts must be so clearly within the topic that the human would always have to make edits

Be very creative, think outside the box, and feel free to make up facts, names, and events to make the prompts more specific and actionable. Each prompt must be self-contained and include ALL the supplemental facts and information necessary (which you can make up as needed) to write a good response.

Each prompt should only be 1-3 sentences long. Do not repeat prompts and respond with NOTHING ELSE THAN THE PROMPTS. Output each prompt on a new line as part of a 

In [2]:
# prompt_model.get_responses
import requests

gptgod_api_key = "sk-olRCozcTXcjITfwAI86YQurmjj7LHB0ZtnOSDxIFyBuoxwgu"

def get_chat_gpt_response(prompt):
    url = "https://api.gptgod.online/v1/chat/completions"
    headers = {
        "Authorization": gptgod_api_key,
        "Content-Type": "application/json"
    }
    data = {
        "model": "gpt-4-turbo-2024-04-09",
        "messages": [{"role": "system", "content": "You are a helpful assistant."}, 
                     {"role": "user", "content": prompt}]
    }

    response = requests.post(url, headers=headers, json=data)
    return response.json()

# 使用示例
response = get_chat_gpt_response("Hello, how are you?")
print(response)

{'id': 'chatcmpl-89DaOX5rY45nnb6ZxD5ym2NuA1im3', 'object': 'chat.completion', 'created': 1715085442, 'model': 'gpt-4-turbo-2024-04-09', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': "Hello! I'm here and ready to help. What's on your mind today?"}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 19, 'completion_tokens': 17, 'total_tokens': 36}}


In [37]:
def train(arg_dict: dict[str, Any], run_id: str, data_dir: str, feedback: Feedback, second_feedback: Feedback = None) -> None:
    model_args, _, training_args, _ = get_args(arg_dict)
    
    # Load feedback
    run_dir = os.path.join(data_dir, run_id, "sample")
    logger.info(f"Training using data for run {run_id}, stored in {run_dir}")
    if not feedback.can_load_dataset(run_dir):
        raise ValueError(f"Feedback \"{feedback.content}\" has not been sampled yet")
    feedback.load_dataset(run_dir)
    logger.info(f"Loaded feedback \"{feedback.content}\"")

    # Load second feedback if given
    if second_feedback is not None:
        assert training_args.multi_feedback_training, "Must set multi_feedback_training to True when providing a second feedback"
        if not second_feedback.can_load_dataset(run_dir):
            raise ValueError(f"Feedback \"{second_feedback.content}\" has not been sampled yet")
        second_feedback.load_dataset(run_dir)
    elif training_args.multi_feedback_training and second_feedback is None:
        raise ValueError("Must provide a second feedback when multi_feedback_training is True")
