In [1]:
import json, random
from datasets import load_dataset
from demo_class import FlexFlowDemo

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def create_datasets(dataset_size=10, inference_file_path='inference_dataset.json', finetuning_file_path='finetuning_dataset.json'):
    """Creates the inference and finetuning datasets according to the data from https://huggingface.co/datasets/databricks/databricks-dolly-15k.
    Only the 'open_qa' and 'closed_qa' prompts without context are kept.
    The datasets are saved into the files given as arguments.

    Keyword arguments:
    dataset_size -- the number of prompts to consider
    inference_file_path -- the file in which to save the inference data
    finetuning_file_path-- the file in which to save the finetuning data
    """
    dataset = load_dataset("databricks/databricks-dolly-15k", split="train")
    data = []
    for row in dataset:
        if len(data) == dataset_size:
            break
        if ("open_qa" in row['category'] or "closed_qa" in row['category']) and len(row['context']) == 0:
            data.append(row['instruction'] + " " + row['response'])
    with open(inference_file_path, 'w') as file:
        json.dump(data[:1], file)
    with open(finetuning_file_path, 'w') as file:
        json.dump(data, file, indent=2, separators=(',', ': '))

In [3]:
configs_dict = {
    "num_gpus": 1,
    "memory_per_gpu": 8192,
    "zero_copy_memory_per_node": 12000,
    "num_cpus": 4,
    "legion_utility_processors": 4,
    "data_parallelism_degree": 1,
    "tensor_parallelism_degree": 1,
    "pipeline_parallelism_degree": 1,
    "offload": False,
    "offload_reserve_space_size": 8 * 1024,  # 8GB
    "use_4bit_quantization": False,
    "use_8bit_quantization": False,
    "enable_peft": True,
    "peft_activation_reserve_space_size": 1024,  # 1GB
    "peft_weight_reserve_space_size": 1024,  # 1GB
    "profiling": False,
    "inference_debugging": False,
    "fusion": False,
    "max_requests_per_batch": 1,
    "max_sequence_length": 256,
    "max_tokens_per_batch": 128,
    "max_training_steps": 10,
    "seed": 42,
}
model_configs = {
    "base_model": "JackFram/llama-160m",
    "inference_peft_model_id": "goliaro/llama-160m-lora",
    "finetuning_peft_model_id": "goliaro/llama-160m-lora",
    "cache_path": "",
    "refresh_cache": False,
    "full_precision": True,
    # relative paths
    "inference_dataset": "inference_dataset.json",
    "finetuning_dataset": "finetuning_dataset.json",
    "output_file": "peft_demo.txt",
}
generation_configs = {
    "do_sample": False,
    "temperature": 0.9,
    "topp": 0.8,
    "topk": 1,
}
finetuning_configs = {
    "learning_rate": 1.0,
    "momentum": 0.0,
    "weight_decay": 0.0,
    "nesterov": False,
}
# Merge dictionaries
configs_dict.update(model_configs)
configs_dict.update(generation_configs)
configs_dict.update(finetuning_configs)

In [4]:
random.seed(configs_dict["seed"])

create_datasets(inference_file_path=configs_dict["inference_dataset"], 
                finetuning_file_path=configs_dict["finetuning_dataset"])

demo = FlexFlowDemo(configs_dict, mode_only="finetuning")

demo.initialize_flexflow()
demo.start_server()
#demo.generate_inference()
demo.generate_finetuning()
#demo.generate_inference()
demo.stop_server()

[0 - 7f3c2ae3f280]    0.350369 {3}{Mapper}: Enabled Control Replication Optimizations.
[0 - 7f3c2ae3f280]    0.350608 {3}{Mapper}: Enabled Control Replication Optimizations.
[0 - 7f3c2ae3f280]    0.350676 {3}{Mapper}: Enabled Control Replication Optimizations.
[0 - 7f3c2ae3f280]    0.350741 {3}{Mapper}: Enabled Control Replication Optimizations.
[0 - 7f3c2ae3f280]    0.350804 {3}{Mapper}: Enabled Control Replication Optimizations.
[0 - 7f3c2ae3f280]    0.477395 {3}{flexflow_c}: [FFConfig] new 0x75fc8d0
[0 - 7f3c2ae3f280]    0.477563 {3}{flexflow_c}: [RequestManager] get 0xa292510
[0 - 7f3c2ae3f280]    0.477610 {3}{flexflow_c}: [RequestManager] set max_requests_per_batch 1
[0 - 7f3c2ae3f280]    0.477663 {3}{flexflow_c}: [RequestManager] set max_tokens_per_batch 128
[0 - 7f3c2ae3f280]    0.477684 {3}{flexflow_c}: [RequestManager] set max_sequence_length 256
[0 - 7f3c2ae3f280]    0.477725 {3}{flexflow_c}: [RequestManager] set_enable_peft_finetuning 1
workSpaceSize (128 MB)
[0 - 7f3c2ae3f2