In [1]:
!nvidia-smi

Sat Sep 30 22:05:26 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    45W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Dependancies


1.   pytorch
2.   transformers
3.   datasets
4.   peft - QLORA
5.   bitsandbytes
6.   trl - trainer



In [2]:
!pip install -Uqqq pip --progress-bar off
!pip install -qqq torch==2.0.1 --progress-bar off
!pip install -qqq transformers==4.32.1 --progress-bar off
!pip install -qqq datasets==2.14.4 --progress-bar off
!pip install -qqq peft==0.4.0 --progress-bar off
!pip install -qqq bitsandbytes==0.41.1 --progress-bar off
!pip install -qqq trl==0.7.1 --progress-bar off

[0m

In [3]:
import json
import re
from pprint import pprint

import pandas as pd
import torch

#? Dataset loader, for fetching dataset from HF
from datasets import Dataset, load_dataset

#? notebook connection to HF
from huggingface_hub import notebook_login

#? Load LORA
from peft import LoraConfig, PeftModel

#? Load functions for manipulation of the model
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)

# load trainer
from trl import SFTTrainer

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
MODEL_NAME = "NousResearch/Llama-2-13b-chat-hf"


## Data

In [4]:
dataset = load_dataset("SebRincon/finance-bot")
dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'response'],
        num_rows: 141
    })
    test: Dataset({
        features: ['question', 'response'],
        num_rows: 60
    })
})

In [5]:
DEFAULT_SYSTEM_PROMPT = """
  Given user profile or bank statment, answer the following question providing a tailored answer to their situation and profile"
""".strip()


# def generate_training_prompt(input: str, response: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
#     return f"""### Instruction: {system_prompt}

# ### Input:
# {input.strip()}

# ### Response:
# {response}
# """.strip()

In [6]:
def generate_text(data_point):

      response = data_point['response']
      question = data_point['question']

      return {
          "question": question,
          "response": response,
          "text": f"{question}/n/n {response}",
      }



In [7]:

def process_dataset(data: Dataset):
    return (
        data.shuffle(seed=42)
        .map(generate_text)

    )

In [8]:
dataset["train"] = process_dataset(dataset["train"])
dataset["test"] = process_dataset(dataset["test"])

dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'response', 'text'],
        num_rows: 141
    })
    test: Dataset({
        features: ['question', 'response', 'text'],
        num_rows: 60
    })
})

## Model

In [None]:
notebook_login()

In [9]:
def create_model_and_tokenizer():

    #? quntization in 4bit
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
    )
    #? downloading the model
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        use_safetensors=True,
        quantization_config=bnb_config,
        trust_remote_code=True,
        device_map="auto",
    )
    #? downloading the models tokenizer
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    return model, tokenizer

In [10]:
model, tokenizer = create_model_and_tokenizer()
model.config.use_cache = False

Downloading (…)lve/main/config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/175 [00:00<?, ?B/s]



Downloading (…)okenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

ValueError: ignored

In [None]:
model.config.quantization_config.to_dict()

In [None]:
lora_r = 16
lora_alpha = 64
lora_dropout = 0.1
lora_target_modules = [
    "q_proj",
    "up_proj",
    "o_proj",
    "k_proj",
    "down_proj",
    "gate_proj",
    "v_proj",
]


peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    target_modules=lora_target_modules,
    bias="none",
    task_type="CAUSAL_LM",
)

## Training

In [None]:
OUTPUT_DIR = "experiments"
!kill 30836
%load_ext tensorboard
%tensorboard --logdir experiments/runs
%reload_ext tensorboard

In [None]:
OUTPUT_DIR = "experiments"
training_arguments = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    logging_steps=1,
    learning_rate=1e-4,
    fp16=True,
    max_grad_norm=0.3,
    num_train_epochs=2,
    evaluation_strategy="steps",
    eval_steps=0.2,
    warmup_ratio=0.05,
    save_strategy="epoch",
    group_by_length=True,
    output_dir=OUTPUT_DIR,
    report_to="tensorboard",
    save_safetensors=True,
    lr_scheduler_type="cosine",
    seed=42,
)

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=4096,
    tokenizer=tokenizer,
    args=training_arguments,
)

In [None]:
trainer.train()

In [None]:
trainer.save_model()

In [None]:
trainer.model

In [None]:
from peft import AutoPeftModelForCausalLM

trained_model = AutoPeftModelForCausalLM.from_pretrained(
    OUTPUT_DIR,
    low_cpu_mem_usage=True,
)

merged_model = model.merge_and_unload()
merged_model.save_pretrained("merged_model", safe_serialization=True)
tokenizer.save_pretrained("merged_model")

## Inference

In [None]:
def generate_prompt(
    conversation: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT
) -> str:
    return f"""### Instruction: {system_prompt}

### Input:
{conversation.strip()}

### Response:
""".strip()

In [None]:
# examples = []
# for data_point in dataset["test"].select(range(5)):
#     summaries = json.loads(data_point["original dialog info"])["summaries"][
#         "abstractive_summaries"
#     ]
#     summary = summaries[0]
#     summary = " ".join(summary)
#     conversation = create_conversation_text(data_point)
#     examples.append(
#         {
#             "summary": summary,
#             "conversation": conversation,
#             "prompt": generate_prompt(conversation),
#         }
#     )
# test_df = pd.DataFrame(examples)
# test_df

#### Inference with Base Model

In [None]:
model, tokenizer = create_model_and_tokenizer()

In [None]:
def summarize(model, text: str):
    inputs = tokenizer(text, return_tensors="pt").to(DEVICE)
    inputs_length = len(inputs["input_ids"][0])
    with torch.inference_mode():
        outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.0001)
    return tokenizer.decode(outputs[0][inputs_length:], skip_special_tokens=True)

#### Example 1

In [None]:
example = test_df.iloc[0]
print(example.conversation)

In [None]:
print(example.summary)

In [None]:
%%time
summary = summarize(model, "### QUESTON:\n How can I prepare for early retirement? Given the following information: debt: 0, income: 6000/month, expenses: 3000/month, stock_market_knowledge: advanced, investment_risk: medium, interest_sectors: ['finance', 'technology']" )

In [None]:
pprint(summary)

### Inference with the Fine-tuned Model

In [None]:
model = PeftModel.from_pretrained(model, OUTPUT_DIR)


#### Example 1

In [None]:
example = test_df.iloc[0]
pprint(example.summary)

In [None]:
print(example.conversation)

In [None]:
%%time
summary = summarize(model, "### QUESTON:\n Should I prioritize paying off debt or investing? Given the following information: debt: 5000, income: 6000/month, expenses: 3000/month, stock_market_knowledge: begginer, investment_risk: low, interest_sectors: ['tech', 'health']" )

In [None]:
pprint(summary)

In [None]:
pprint(summary.strip().split("\n")[0])

### Save Model

In [None]:
notebook_login()

In [None]:

model.push_to_hub(
    "hacktx-finance-hack", use_auth_token=True
)
tokenizer.push_to_hub(
    "hacktx-finance-hack", use_auth_token=True
)
# model.push_adapter_to_hub("finance-bot-13b")

## References

- https://huggingface.co/datasets/Salesforce/dialogstudio
- https://huggingface.co/meta-llama/Llama-2-7b-hf