# Set-up

In [1]:
!pip install -Uqqq pip
!pip install -qqq bitsandbytes
!pip install -qqq torch
!pip install -qqq -U git+https://github.com/huggingface/transformers.git
!pip install -qqq -U git+https://github.com/huggingface/peft.git
!pip install -qqq -U git+https://github.com/huggingface/accelerate.git
!pip install -qqq datasets
!pip install -qqq loralib
!pip install -qqq einops
!pip install -qqq mlflow

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[0m  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
[0m  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for peft (pyproject.toml) ... [?25l[?25hdone
[0m  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml

In [2]:
import json
import os
from pprint import pprint
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset, Dataset
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)
import mlflow

import locale
locale.getpreferredencoding = lambda: "UTF-8"

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Prep dataset

In [3]:
import pandas as pd
df = pd.read_json("/content/merged_data_clean.json", orient="records", dtype={"tech_level": str})
df

Unnamed: 0,text,tech_level
0,Maîtrise avancée de Tableau et Python requise.,"{'Tableau': 'Expert', 'Python': 'Expert'}"
1,Connaissance intermédiaire en SQL et Power BI.,"{'SQL': 'Intermédiaire', 'Power BI': 'Interméd..."
2,Expérience avancée avec SAS et R pour l'analys...,"{'SAS': 'Expert', 'R': 'Expert'}"
3,Bonne maîtrise de Git et GitHub.,"{'Git': 'Intermédiaire', 'Github': 'Intermédia..."
4,Niveau débutant en HTML et CSS.,"{'HTML': 'Débutant', 'CSS': 'Débutant'}"
...,...,...
213,"Développeur créatif, explorant les facettes de...","{'REXX': 'Débutant', 'SSH': 'Débutant', 'SQL':..."
214,"Architecte système intermédiaire, jonglant ave...","{'SNMP': 'Intermédiaire', 'ITAC': 'Intermédiai..."
215,"Ingénieur réseau débutant, découvrant les conf...","{'VLAN': 'Débutant', 'ACL': 'Débutant'}"
216,"Analyste IT intermédiaire, maîtrisant les méan...","{'Jira': 'Intermédiaire', 'Bitbucket': 'Interm..."


In [4]:
data = Dataset.from_pandas(df)

In [8]:
data

Dataset({
    features: ['text', 'tech_level'],
    num_rows: 218
})

In [52]:
# Split the dataset train-test
dataset = data.train_test_split(test_size=0.2, seed=87)
# Split train again to get valid
training_dataset = dataset['train'].train_test_split(test_size=0.2, seed=87)

# Now you have a dictionary with 'train' and 'test' keys
train_dataset = training_dataset['train']
valid_dataset = training_dataset['test']
test_dataset = dataset['test']

In [53]:
valid_dataset[10]

{'text': 'Administrateur système compétent en Linux, avec une expertise approfondie en Docker et des compétences débutantes en Kubernetes.',
 'tech_level': "{'Linux': 'Expert', 'Docker': 'Expert', 'Kubernetes': 'Débutant'}"}

# Load model and tokenizer - Mistral

In [9]:
# Not necessary for Mistral
# notebook_login()

In [54]:
# MODEL_NAME = "vilsonrodrigues/falcon-7b-instruct-sharded"
# MODEL_NAME = "baffo32/decapoda-research-llama-7B-hf"
# MODEL_NAME = "euclaise/falcon_1b_stage1"
# MODEL_NAME = "tiiuae/falcon-rw-1b"
# MODEL_NAME = "mistralai/Mistral-7B-v0.1"
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Setting pad_token in tokenizer and model, as Mistral model has no defined pad token
# tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token = tokenizer.unk_token  # Better to use <UNK> token to avoid confusion with EOS and over generation
model.config.pad_token_id = tokenizer.pad_token_id

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [64]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [65]:
lora_config = LoraConfig(
    base_model_name_or_path = MODEL_NAME,
    r=16,
    lora_alpha=32,      # usually 2*r
    # target_modules=["query_key_value"],
      target_modules=[      # For Mistral
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [66]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 42,520,576 || all params: 7,284,252,672 || trainable%: 0.583732853796316


# Test original model

In [67]:
model.generation_config

GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 2
}

In [75]:
generation_config = model.generation_config
generation_config.max_new_tokens = 100
# generation_config.temperature = 0.1
# generation_config.top_p = 0.9
generation_config.num_return_sequences = 1
# generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.pad_token_id = tokenizer.pad_token_id  # Setting pad token for model as the tokenizer pad token
generation_config.eos_token_id = tokenizer.eos_token_id
# generation_config.do_sample=True
generation_config.do_sample=False   # going greedy for reproducibility

In [76]:
generation_config

GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 2,
  "max_new_tokens": 100,
  "pad_token_id": 0,
  "temperature": 0.1,
  "top_p": 0.9
}

In [77]:
def make_predictions(prompt, model, generation_config):
  encoding = tokenizer(prompt, return_tensors="pt").to(device)

  with torch.inference_mode():
    outputs = model.generate(
        # input_ids = encoding.input_ids,
        # attention_mask = encoding.attention_mask,
        **encoding,
        generation_config = generation_config
    )

  print(tokenizer.decode(outputs[0], skip_special_tokens=False))

In [78]:
instruct_prompt = f"""
[INST] {valid_dataset[10]['text']} [/INST]
""".strip()
print(instruct_prompt)

[INST] Administrateur système compétent en Linux, avec une expertise approfondie en Docker et des compétences débutantes en Kubernetes. [/INST]


In [79]:
%%time
device = "cuda:0"

make_predictions(instruct_prompt, model, generation_config)

<s> [INST] Administrateur système compétent en Linux, avec une expertise approfondie en Docker et des compétences débutantes en Kubernetes. [/INST] As a competent Linux system administrator with advanced expertise in Docker and beginner-level skills in Kubernetes, I can perform the following tasks:

1. Manage and maintain Linux servers: I can install, configure, and update Linux servers, as well as perform routine maintenance tasks such as patching, backups, and monitoring.

2. Manage Docker containers: I can create, deploy, and manage Docker containers on Linux servers. I can also manage
CPU times: user 13.1 s, sys: 177 ms, total: 13.3 s
Wall time: 13.3 s


In [80]:
chat_prompt = f"""
[INST] Maîtrise avancée de Tableau et Python requise, avec R de base. [/INST]
{{'Tableau': 'Expert', 'Python': 'Expert', 'R', 'débutant'}}</s>
[INST] {valid_dataset[10]['text']} [/INST]
""".strip()
print(chat_prompt)

[INST] Maîtrise avancée de Tableau et Python requise, avec R de base. [/INST]
{'Tableau': 'Expert', 'Python': 'Expert', 'R', 'débutant'}</s>
[INST] Administrateur système compétent en Linux, avec une expertise approfondie en Docker et des compétences débutantes en Kubernetes. [/INST]


In [81]:
%%time
device = "cuda:0"

make_predictions(chat_prompt, model, generation_config)

<s> [INST] Maîtrise avancée de Tableau et Python requise, avec R de base. [/INST]
{'Tableau': 'Expert', 'Python': 'Expert', 'R', 'débutant'}</s> 
[INST] Administrateur système compétent en Linux, avec une expertise approfondie en Docker et des compétences débutantes en Kubernetes. [/INST] {'Linux': 'Expert', 'Docker': 'Expert', 'Kubernetes': 'Beginner'}

An experienced Linux system administrator with an expert-level understanding of Docker and beginner-level skills in Kubernetes.</s>
CPU times: user 6.84 s, sys: 282 ms, total: 7.12 s
Wall time: 7.14 s


# Finetune the model

In [82]:
def generate_prompt(data_point):
  return f"""
[INST] {data_point["text"]} [/INST]
{data_point["tech_level"]}</s>
""".strip()

def generate_and_tokenize_prompt(data_point):
  full_prompt = generate_prompt(data_point)
  tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
  return tokenized_full_prompt

In [83]:
# No shuffle, since done in split.     train_data = train_dataset.shuffle().map(generate_and_tokenize_prompt)
train_data = train_dataset.map(generate_and_tokenize_prompt)
valid_data = valid_dataset.map(generate_and_tokenize_prompt)
test_data = test_dataset.map(generate_and_tokenize_prompt)

Map:   0%|          | 0/139 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Map:   0%|          | 0/35 [00:00<?, ? examples/s]

Map:   0%|          | 0/44 [00:00<?, ? examples/s]

In [84]:
training_args = transformers.TrainingArguments(
      per_device_train_batch_size=1,
      gradient_accumulation_steps=4,
      num_train_epochs=3,
      learning_rate=2e-4,
      fp16=True,
      save_total_limit=2,
      logging_steps=1,
      output_dir="experiments",
      optim="paged_adamw_8bit",
      lr_scheduler_type="cosine",
      warmup_ratio=0.05,
      load_best_model_at_end=True,    # For early stopping callback
      evaluation_strategy="steps",    # possible values include steps and epoch
      save_strategy="steps",          # Needs to be same as evaluation strategy
      save_steps = 1,
      eval_steps = 1,
      metric_for_best_model = "eval_loss",
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=valid_data,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
    callbacks=[transformers.EarlyStoppingCallback(early_stopping_patience=3)]
)
model.config.use_cache = False

In [85]:
trainer.train()



Step,Training Loss,Validation Loss
1,6.2099,6.061365
2,7.3449,6.061365
3,5.7341,6.061365
4,5.0706,5.448608
5,5.0937,4.336911
6,3.7908,3.370956
7,3.0457,2.586284
8,2.1933,2.042029
9,1.8791,1.748343
10,1.5751,1.532567




TrainOutput(global_step=33, training_loss=1.8013469784548788, metrics={'train_runtime': 690.5005, 'train_samples_per_second': 0.604, 'train_steps_per_second': 0.148, 'total_flos': 303781276778496.0, 'train_loss': 1.8013469784548788, 'epoch': 0.95})

# Run the finetuned model

In [86]:
%%time
device = "cuda:0"

make_predictions(instruct_prompt, model, generation_config)



<s> [INST] Administrateur système compétent en Linux, avec une expertise approfondie en Docker et des compétences débutantes en Kubernetes. [/INST]
{'Linux': 'Expert', 'Docker': 'Expert', 'Kubernetes': 'Débutant'}</s>
CPU times: user 3.72 s, sys: 190 ms, total: 3.91 s
Wall time: 3.97 s


In [31]:
valid_dataset[10]

{'text': 'Administrateur système compétent en Linux, avec une expertise approfondie en Docker et des compétences débutantes en Kubernetes.',
 'tech_level': "{'Linux': 'Expert', 'Docker': 'Expert', 'Kubernetes': 'Débutant'}"}

In [88]:
%%time
device = "cuda:0"

make_predictions(chat_prompt, model, generation_config)

<s> [INST] Maîtrise avancée de Tableau et Python requise, avec R de base. [/INST]
{'Tableau': 'Expert', 'Python': 'Expert', 'R', 'débutant'}</s> 
[INST] Administrateur système compétent en Linux, avec une expertise approfondie en Docker et des compétences débutantes en Kubernetes. [/INST]
{'Linux': 'Expert', 'Docker': 'Expert', 'Kubernetes': 'Débutant'}</s>
CPU times: user 3.56 s, sys: 298 ms, total: 3.86 s
Wall time: 3.86 s


In [89]:
# Clarified beginner level for Bash and Python
prompt = """
[INST] Expert en architecture système, façonnant des architectures robustes avec Cloud Computing et ISO. Spécialisé dans la sécurité avec CERT et Forcepoint, et jonglant avec les défis complexes de SDH et LACP. Débutant en scripting avec Bash et Python. [/INST]
""".strip()

In [90]:
%%time
device = "cuda:0"

make_predictions(prompt, model, generation_config)

<s> [INST] Expert en architecture système, façonnant des architectures robustes avec Cloud Computing et ISO. Spécialisé dans la sécurité avec CERT et Forcepoint, et jonglant avec les défis complexes de SDH et LACP. Débutant en scripting avec Bash et Python. [/INST]
{'Cloud Computing': 'Expert', 'ISO': 'Expert', 'CERT': 'Expert', 'Forcepoint': 'Expert', 'SDH': 'Expert', 'LACP': 'Expert', 'Bash': 'Débutant', 'Python': 'Débutant'}</s>
CPU times: user 9.29 s, sys: 294 ms, total: 9.58 s
Wall time: 9.65 s


In [91]:
# Shorter sentences
prompt = """
[INST] Expert en Tableau, mais débutant en Python. Utilse Bash depuis plus de 20 ans. [/INST]
""".strip()

In [92]:
%%time
device = "cuda:0"

make_predictions(prompt, model, generation_config)

<s> [INST] Expert en Tableau, mais débutant en Python. Utilse Bash depuis plus de 20 ans. [/INST]
{'Tableau': 'Expert', 'Python': 'Débutant', 'Bash': 'Expert'}</s>
CPU times: user 3.46 s, sys: 192 ms, total: 3.65 s
Wall time: 3.64 s


In [None]:
type(model)

peft.peft_model.PeftModelForCausalLM

# Export MLFLow


## Logging PEFT models (trials)

In [None]:
from accelerate import Accelerator

# Initialize the accelerator (to unwrap model)
accelerator = Accelerator()

with mlflow.start_run(run_id="83aebfb3d3324f2392c752248e880392"):

    # Unwrap the model
    unwrap_model = accelerator.unwrap_model(model)

    # Log the model
    mlflow.pytorch.log_model(unwrap_model, "model")

PicklingError: ignored

In [None]:
from accelerate import Accelerator

# Initialize the accelerator
accelerator = Accelerator()

# Start a run
with mlflow.start_run(run_id="your_run_id"):
    # Train your model
    trainer.train()

    # Unwrap the model
    model = accelerator.unwrap_model(trainer.model)

    # Save the model after training
    torch.save(model.state_dict(), "path/to/save/model")

    # Log the model
    mlflow.pytorch.log_model(model, "model")

## Downloading files

In [98]:
!zip -r /content/trained-adapter.zip /content/experiments/checkpoint-33/

  adding: content/experiments/checkpoint-33/ (stored 0%)
  adding: content/experiments/checkpoint-33/optimizer.pt (deflated 17%)
  adding: content/experiments/checkpoint-33/README.md (deflated 66%)
  adding: content/experiments/checkpoint-33/trainer_state.json (deflated 83%)
  adding: content/experiments/checkpoint-33/adapter_config.json (deflated 52%)
  adding: content/experiments/checkpoint-33/adapter_model.safetensors (deflated 42%)
  adding: content/experiments/checkpoint-33/rng_state.pth (deflated 25%)
  adding: content/experiments/checkpoint-33/scheduler.pt (deflated 56%)
  adding: content/experiments/checkpoint-33/training_args.bin (deflated 51%)


# Try saving and loading

In [None]:
model.save_pretrained("trained-model")



In [None]:
del(model)

In [None]:
%%time
device = "cuda:0"

make_predictions(prompt, model, generation_config)

NameError: ignored

## Loading method 1

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
model = PeftModel.from_pretrained(model, "trained-model")

## Loading method 2

In [None]:
model2 = AutoModelForCausalLM.from_pretrained("trained-model")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
%%time
device = "cuda:0"

make_predictions(prompt, model, generation_config)

[INST] Expert en Tableau, mais débutant en Python. Bonnes connaissances Bash. [/INST]
{'Tableau': 'Expert', 'Python': 'Débutant', 'Bash': 'Débutant'}  {'Tableau': 'Expert', 'Python': 'Débutant', 'B
CPU times: user 6.71 s, sys: 311 ms, total: 7.02 s
Wall time: 7.05 s


# Serving with vLLM

In [94]:
#Merge the adapter with the base model
model = model.merge_and_unload()

#Save the merged model in a directory in the safetensors format
model_dir = "merged_model"
model.save_pretrained(model_dir, safe_serialization=True)

#Save the custom tokenizer in the same directory
tokenizer.save_pretrained(model_dir)


Thrown during validation:
`do_sample` is set to `False`. However, `temperature` is set to `0.1` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.


('merged_model/tokenizer_config.json',
 'merged_model/special_tokens_map.json',
 'merged_model/tokenizer.json')

In [97]:
!pip install -qqq vllm

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m34.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m35.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m64.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.6/410.6 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.6/121.6 MB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.5/56.5 MB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m124.2/124.2 MB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━

In [99]:
from vllm import LLM, SamplingParams

llm = LLM(model=model_dir)

ValueError: ignored

# Save trained model to Hugging Face

In [None]:
# PEFT_MODEL = "vachonni/midjourney-falcon-7b"

# model.push_to_hub(
#     PEFT_MODEL, use_auth_token=True
# )

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.bin:   0%|          | 0.00/18.9M [00:00<?, ?B/s]

adapter_model.bin:   0%|          | 0.00/18.9M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/jzdesign/midjourney-falcon-7b/commit/9067710f555feca1d2733005e8a108ff85b62a4c', commit_message='Upload model', commit_description='', oid='9067710f555feca1d2733005e8a108ff85b62a4c', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
# config = PeftConfig.from_pretrained(PEFT_MODEL)
# model = AutoModelForCausalLM.from_pretrained(
#     config.base_model_name_or_path,
#     return_dict=True,
#     quantization_config=bnb_config,
#     device_map="auto",
#     trust_remote_code=True
# )

# tokenizer=AutoTokenizer.from_pretrained(config.base_model_name_or_path)
# tokenizer.pad_token = tokenizer.eos_token

# model = PeftModel.from_pretrained(model, PEFT_MODEL)

Downloading (…)/adapter_config.json:   0%|          | 0.00/436 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/15 [00:00<?, ?it/s]

Downloading adapter_model.bin:   0%|          | 0.00/18.9M [00:00<?, ?B/s]