In [1]:
import os
import json
import argparse
from time import sleep
from typing import Any, Tuple

import wandb
from peft import LoraConfig, PeftModel
from datasets import Dataset, concatenate_datasets
from trl import DPOTrainer, SFTTrainer, DataCollatorForCompletionOnlyLM

from src.logger import logger
from src.models import get_model
from src.dataset.feedback_utils import Feedback, Type
from src.lcdpo import LocallyConstrainedDPOTrainer
from src.sft_weighted import WeightedSFTTrainer
from src.dataset.format import to_dpo, to_sft, to_lcdpo, to_sft_weighted
from src.feedback import manual_feedback as all_feedback
from src.utils import get_args, find_all_linear_names, dump_arg_dicts, PeftSavingCallback, get_train_file_name, print_num_trainable_params, TrainingArguments, find_file_with_prefix


In [2]:
# Command line arguments for the modal genearation
# --arg-file configs/config.json --do-train --feedback-prefix "Be more detailed" --run-id test

In [3]:
from openai import OpenAI
import os
client = OpenAI()

completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."},
    {"role": "user", "content": "Compose a poem that explains the concept of recursion in programming."}
  ]
)

print(completion.choices[0].message)

ChatCompletionMessage(content="In the realm of code, there lies a tale\nOf recursion, a loop that does not fail\nLike a mirror reflecting within itself\nA function calling, seeking unseen wealth\n\nA dance of self-reference, elegant and fine\nEach iteration a step, in a pattern divine\nThrough layers of calls, it travels deep\nUntil a base case breaks the endless leap\n\nLike a fractal unfolding, endlessly profound\nRecursion explores, its beauty unbound\nIt divides and conquers with mathematical grace\nSolving problems with a recursive embrace\n\nSo fear not the recursive function's might\nFor in its elegance, it shines so bright\nA loop within a loop, a cycle so grand\nIn the world of code, recursion shall stand.", role='assistant', function_call=None, tool_calls=None)


In [4]:
arg_file = "configs/config.json"
feedback_prefix = "Be more detailed"
run_id = "test-ksgk"
data_dir = "./data"

In [5]:
# Now this cracks it open a little bit (really small bit)
import argparse
import json

parser = argparse.ArgumentParser()
parser.add_argument("--arg_file", type=str, default="configs/config_dpo.json")
parser.add_argument("--run_id", type=str, default="test-ksgk")
parser.add_argument("--data_dir", type=str, default="./data")
parser.add_argument("--feedback_prefix", type=str, default="")
args = parser.parse_args("")

with open(args.arg_file, "r") as f:
    arg_dict = json.load(f)

feedback = all_feedback
if args.feedback_prefix is not None: # This unfortunately is basically a prefix-filtering stuff
    feedback = [f for f in feedback if f.content.startswith(args.feedback_prefix)]

In [6]:
# model_args, _, training_args, _ = get_args(arg_dict) # This hurts my debugging session ... 

# BreakDown when we have issues | Following code now works
from src.utils import *
modal_arg_dict = arg_dict["model_args"]
sample_arg_dict = arg_dict["sample_args"]
training_arg_dict = arg_dict["training_args"]
eval_arg_dict = arg_dict["eval_args"]

# HfArgumentParse parse on a python dictionary object, this is quite convenient wrapper
model_arg_parser = HfArgumentParser(PipelineModelsArguments)
model_args: PipelineModelsArguments = model_arg_parser.parse_dict(modal_arg_dict)[0]
sample_arg_parser = HfArgumentParser(SampleArguments)
sample_args: SampleArguments = sample_arg_parser.parse_dict(sample_arg_dict)[0]

# Issue Spot on MPS: Float16 not supported 
# training_arg_parser = HfArgumentParser(TrainingArguments)
# training_args: TrainingArguments = training_arg_parser.parse_dict(training_arg_dict)[0]

# Rest seems fine
eval_arg_parser = HfArgumentParser(EvalArguments)
eval_args: EvalArguments = eval_arg_parser.parse_dict(eval_arg_dict)[0]

In [None]:
from src.utils import ModelArguments
from src.sample import sample_prompts, SAMPLE_PROMPTS, SAMPLE_NEGATIVE_PROMPTS, SAMPLE_PROMPTS_CONFIG, SAMPLE_NEGATIVE_PROMPTS_CONFIG

prompt_model_args = model_args.prompt_model
category_model_args = model_args.category_model
completion_model_args = model_args.completion_model
quality_model_args = model_args.qualitative_eval_model

negative = False 

prompt = SAMPLE_PROMPTS if not negative else SAMPLE_NEGATIVE_PROMPTS
prompt_config = SAMPLE_PROMPTS_CONFIG if not negative else SAMPLE_NEGATIVE_PROMPTS_CONFIG

# Loaded Model
####################################################################
# Rate Limit Exceeded: To be Fair, this exceeds limit after 12 sec #
####################################################################
# Alternative: No OpenAI, what is the next best thing? Opus..

prompt_model = get_model(category_model_args)
# prompt_model = get_model(completion_model_args)
# Sampling Steps obtains a bunch of prompt for in-domain / out-domain model_args
prompts_per_category = 1

responses = []
for f in feedback:
    for c in f.categories:
        prompt_text = prompt.format(count=prompts_per_category, domain=f.domain, category=c)
        responses.append(prompt_model.get_responses(prompt_text, prompt_config))
        time.sleep(40)  # Sleep for 16 seconds after each call


  0%|          | 1/2426 [00:16<11:19:16, 16.81s/it]


KeyboardInterrupt: 

--- Logging error ---
Traceback (most recent call last):
  File "/opt/homebrew/anaconda3/lib/python3.11/site-packages/tenacity/__init__.py", line 382, in __call__
    result = fn(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^
  File "/Users/fangyuanyu/Implementation/c3po/src/models/openai.py", line 56, in get_response
    return self.model.chat.completions.create(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/anaconda3/lib/python3.11/site-packages/openai/_utils/_utils.py", line 277, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/anaconda3/lib/python3.11/site-packages/openai/resources/chat/completions.py", line 579, in create
    return self._post(
           ^^^^^^^^^^^
  File "/opt/homebrew/anaconda3/lib/python3.11/site-packages/openai/_base_client.py", line 1240, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^

In [14]:

print(prompt_text)

You are a helpful assistant that always closely follows instructions. You are provided with a topic, and category. Your job is to come up with 1 actionable prompts that fulfill the following criteria:

- All prompts must fall within the category provided
- All prompts must be phrased in a way that both the prompt and eventual response will ALWAYS BE WITHIN the topic
- If a human had to modify all responses that fall within the topic, your prompts must be so clearly within the topic that the human would always have to make edits

Be very creative, think outside the box, and feel free to make up facts, names, and events to make the prompts more specific and actionable. Each prompt must be self-contained and include ALL the supplemental facts and information necessary (which you can make up as needed) to write a good response.

Each prompt should only be 1-3 sentences long. Do not repeat prompts and respond with NOTHING ELSE THAN THE PROMPTS. Output each prompt on a new line as part of a 

In [11]:
# prompt_model.get_responses

In [37]:
def train(arg_dict: dict[str, Any], run_id: str, data_dir: str, feedback: Feedback, second_feedback: Feedback = None) -> None:
    model_args, _, training_args, _ = get_args(arg_dict)
    
    # Load feedback
    run_dir = os.path.join(data_dir, run_id, "sample")
    logger.info(f"Training using data for run {run_id}, stored in {run_dir}")
    if not feedback.can_load_dataset(run_dir):
        raise ValueError(f"Feedback \"{feedback.content}\" has not been sampled yet")
    feedback.load_dataset(run_dir)
    logger.info(f"Loaded feedback \"{feedback.content}\"")

    # Load second feedback if given
    if second_feedback is not None:
        assert training_args.multi_feedback_training, "Must set multi_feedback_training to True when providing a second feedback"
        if not second_feedback.can_load_dataset(run_dir):
            raise ValueError(f"Feedback \"{second_feedback.content}\" has not been sampled yet")
        second_feedback.load_dataset(run_dir)
    elif training_args.multi_feedback_training and second_feedback is None:
        raise ValueError("Must provide a second feedback when multi_feedback_training is True")
