In [2]:
from IPython.display import clear_output

In [None]:
!pip install transformers peft datasets evaluate huggingface_hub trl omegaconf rouge_score --upgrade
clear_output()

In [3]:
from kaggle_secrets import UserSecretsClient
import os
api_keys = UserSecretsClient()

ModuleNotFoundError: No module named 'kaggle_secrets'

In [None]:
os.system(f'wandb login {api_keys.get_secret("wandb")}')

In [3]:
import os
os.system(f'huggingface-cli login --token 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')

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /home/shkodnik/.cache/huggingface/token
Login successful


0

In [4]:
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt

import torch
from torch import nn
import torch.nn.functional as F

import transformers
from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM, AutoModelForSeq2SeqLM, BertTokenizer, BertModel 
from transformers import TrainingArguments, Trainer, Seq2SeqTrainingArguments, Seq2SeqTrainer, GenerationConfig, DataCollatorWithPadding
from transformers import pipeline
from peft import BOFTConfig, get_peft_model, LoraConfig, TaskType
from datasets import load_dataset
import evaluate
from trl import SFTConfig, SFTTrainer, DataCollatorForCompletionOnlyLM

import wandb
from omegaconf import OmegaConf

# from deepeval.benchmarks import MMLU
# from deepeval.benchmarks.tasks import MMLUTask
# from deepeval.models.base_model import DeepEvalBaseLLM

import pickle
import tqdm.notebook as tqdm

clear_output()

# Massive multitask language understanding (MMLU benchmark)

In [5]:
# Loading MMLU categories
if not os.path.exists('./categories.py'):
    !wget https://raw.githubusercontent.com/hendrycks/test/master/categories.py

from categories import subcategories, categories as categories_inv

In [6]:
for subcat_name, cat_names in subcategories.items():
    subcategories[subcat_name] = cat_names[0] if isinstance(cat_names, list) else cat_names
    
categories = {}

for cat_name, subcats in categories_inv.items():
    for subcat in subcats:
        categories[subcat] = cat_name

In [7]:
def subcat_to_cat(subcat):
    cat_name = subcategories[subcat]
    cat_name = categories[cat_name]
    
    return cat_name

In [7]:
config = OmegaConf.create({
    'model_name':   'mistralai/Mistral-7B-Instruct-v0.3',
    'padding_side': 'left',
    'task_name':    'all',
    'max_length':   256,
    'n_shots': 2,
    'fp16': True,
    'bf16': False,
    'ft_strategy': 'LoRA',
    'LoRA_config': {
        'r': 16, 
        'lora_alpha': 32, 
        'lora_dropout': 0.05,
        'target_modules': ['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'],
    },
    'BOFT_config': {  # m=2, b=8
        'boft_block_size': 8,
#         'boft_block_num': 8,
        'boft_n_butterfly_factor': 1,
        'bias': 'none',
        'target_modules': ['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'],
        'boft_dropout': 0.05,
    },
    'evaluation_config':{
        'num_splits': 20,
        'max_new_tokens': 4,
        'batch_size': 1,
        'empty_cache': True,
    },
    'trainer_config': {
        'output_dir': "bogachevv/Llama-3-8b-MMLU",
        'max_seq_length': 512,
        'dataset_text_field': 'text',
        'fp16': True,
        'full_determinism': False,
        'per_device_train_batch_size': 1,
        'per_device_eval_batch_size':  1,
        'gradient_accumulation_steps': 8,
        'lr_scheduler_type': 'cosine_with_restarts',
        'lr_scheduler_kwargs':{
            'num_cycles': 6,
        },
        'warmup_steps': 100,
#         'num_train_epochs': 2,
        'learning_rate': 1e-4,
        'max_steps': 2048,
        'weight_decay': 0.01,
#         'warmup_ratio': 1e-2,
        'dataloader_num_workers': 2,
        'eval_strategy': "steps",
#         'torch_empty_cache_steps': 16,
        'eval_steps': 16,
        'logging_steps': 16,
        'load_best_model_at_end': True,
        'seed': 42,
        'data_seed': 42,
        'report_to': 'wandb',
#         'predict_with_generate': True,
#         'push_to_hub': True,
#         'hub_model_id': 'LLama-LoRA-test',
#         'hub_strategy': 'checkpoint',
#         'save_strategy': "steps",
        'save_steps': 128,
    },
})

In [8]:
type(config)

omegaconf.dictconfig.DictConfig

In [9]:
from pipelines import utils
device = utils.set_device(2)

There are 8 GPU(s) available.
We will use the GPU: NVIDIA A100-SXM4-80GB


In [10]:
tokenizer = AutoTokenizer.from_pretrained(
    config.model_name, 
    padding_side=config.padding_side,
#     model_max_length=512,
    device_map=device
)
tokenizer.pad_token = tokenizer.eos_token
EOS_TOKEN = tokenizer.eos_token

mmlu_dataset =  load_dataset("cais/mmlu", config.task_name)

In [11]:
# Try block for clear_output() call iff succes
try:
    few_shot_datasets = {
        subject: mmlu_dataset['dev'].filter(lambda row: row['subject'] == subject)
        for subject in set(mmlu_dataset['dev']['subject'])
    }
    
    clear_output()
    print('Succes')
except:
    raise

Succes


In [12]:
def prepare_question(examples):
    prompt = f"{examples['question']}\n"
    for letter, choice in zip(('A', 'B', 'C', 'D'), examples['choices']):
        prompt += f"{letter}. {choice}\n"

    answer = chr(65 + examples['answer'])
    
    return prompt, answer

def prepare_prompt(examples, dev_dataset = None):
    if dev_dataset:
        yield from map(prepare_question, dev_dataset)
    
    yield prepare_question(examples)

In [13]:
def prepare_instruction_text(example):
    instructions = [
        {"role": "system", "content": f"The following are multiple choice questions (with answers) about {example['subject']}. Output 'A', 'B', 'C', or 'D'. Full answer not needed."},
    ]

    if config['n_shots'] and example['subject']:
        few_shot_dataset = few_shot_datasets[example['subject']]
        few_shot_dataset = few_shot_dataset.select(range(config['n_shots']))
    else:
        few_shot_dataset = None
    
    for prompt, ans in prepare_prompt(example, dev_dataset=few_shot_dataset):
        instructions.append({"role": "user", "content": prompt})
        instructions.append({"role": "assistant", "content": ans})
    
    text = tokenizer.apply_chat_template(
        instructions,
        tokenize=False
    )
    
    return {'text': text}

In [14]:
def r_replace(line, old, new):
    return line[::-1].replace(old[::-1], new[::-1], 1)[::-1]

def remove_answer(example):
    text_wa_answer = example['text']
    text_wa_answer = text_wa_answer.rsplit('<|eot_id|>', 1)[0][:-1]
    
    # for letter in ('A', 'B', 'C', 'D'):
        # text_wa_answer = text_wa_answer.replace(f'<|start_header_id|>assistant<|end_header_id|>\n\n{letter}<|eot_id|>', '<|start_header_id|>assistant<|end_header_id|>\n\n')
        # text_wa_answer = r_replace(text_wa_answer, f'<|start_header_id|>assistant<|end_header_id|>\n\n{letter}<|eot_id|>', '<|start_header_id|>assistant<|end_header_id|>\n\n')
    
    return {'text_wa_answer': text_wa_answer}

In [15]:
instructions_datasets = mmlu_dataset.map(prepare_instruction_text, batched=False, num_proc=2)
instructions_datasets['validation'] = instructions_datasets['validation'].map(remove_answer, batched=False)
instructions_datasets['test'] = instructions_datasets['test'].map(remove_answer, batched=False)

instructions_datasets.set_format("torch")

instructions_datasets

Map (num_proc=2): 100%|██████████| 14042/14042 [00:09<00:00, 1415.16 examples/s]
Map (num_proc=2): 100%|██████████| 1531/1531 [00:01<00:00, 1225.04 examples/s]
Map (num_proc=2): 100%|██████████| 285/285 [00:00<00:00, 735.33 examples/s]
Map (num_proc=2): 100%|██████████| 99842/99842 [00:11<00:00, 9037.77 examples/s] 
Map: 100%|██████████| 1531/1531 [00:00<00:00, 11938.79 examples/s]
Map: 100%|██████████| 14042/14042 [00:01<00:00, 13648.69 examples/s]


DatasetDict({
    test: Dataset({
        features: ['question', 'subject', 'choices', 'answer', 'text', 'text_wa_answer'],
        num_rows: 14042
    })
    validation: Dataset({
        features: ['question', 'subject', 'choices', 'answer', 'text', 'text_wa_answer'],
        num_rows: 1531
    })
    dev: Dataset({
        features: ['question', 'subject', 'choices', 'answer', 'text'],
        num_rows: 285
    })
    auxiliary_train: Dataset({
        features: ['question', 'subject', 'choices', 'answer', 'text'],
        num_rows: 99842
    })
})

In [16]:
print(instructions_datasets['validation'][1]['text'])

<s>[INST] Find all c in Z_3 such that Z_3[x]/(x^2 + c) is a field.
A. 0
B. 1
C. 2
D. 3
[/INST] B</s>[INST] Statement 1 | If aH is an element of a factor group, then |aH| divides |a|. Statement 2 | If H and K are subgroups of G then HK is a subgroup of G.
A. True, True
B. False, False
C. True, False
D. False, True
[/INST] B</s>[INST] Find the order of the factor group Z_6/<3>.
A. 2
B. 3
C. 6
D. 12
[/INST] B</s>


In [17]:
print(instructions_datasets['validation'][1]['text_wa_answer'])

<s>[INST] Find all c in Z_3 such that Z_3[x]/(x^2 + c) is a field.
A. 0
B. 1
C. 2
D. 3
[/INST] B</s>[INST] Statement 1 | If aH is an element of a factor group, then |aH| divides |a|. Statement 2 | If H and K are subgroups of G then HK is a subgroup of G.
A. True, True
B. False, False
C. True, False
D. False, True
[/INST] B</s>[INST] Find the order of the factor group Z_6/<3>.
A. 2
B. 3
C. 6
D. 12
[/INST] B</s


In [18]:
# Accessing the train, validation, and test splits
validation_dataset = instructions_datasets["validation"]
test_dataset = instructions_datasets["test"]
dev_dataset = instructions_datasets["dev"]  # dataset for few shot
auxiliary_train_dataset  = instructions_datasets['auxiliary_train']

# Check the size of each split
print(f"Validation dataset size: {len(validation_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")
print(f"Dev dataset size: {len(dev_dataset)}")
print(f"Auxiliary train dataset size: {len(auxiliary_train_dataset)}")

Validation dataset size: 1531
Test dataset size: 14042
Dev dataset size: 285
Auxiliary train dataset size: 99842


In [19]:
test_dataset

Dataset({
    features: ['question', 'subject', 'choices', 'answer', 'text', 'text_wa_answer'],
    num_rows: 14042
})

In [20]:
model = AutoModelForCausalLM.from_pretrained(
    config.model_name,
    device_map=device,
    torch_dtype=torch.float16,
)

Loading checkpoint shards: 100%|██████████| 3/3 [00:36<00:00, 12.14s/it]


In [21]:
accuracy_metric = evaluate.load("accuracy")

def process_prediction(pred):
    pred = pred['generated_text']
    
    pred = pred.strip().upper()
    
    pred = pred[0] if pred else 'I'
    pred = pred if pred in {'A', 'B', 'C', 'D'} else 'I'
    
    return pred

def compute_accuracy(model_preds, labels):   
    model_preds = list(map(process_prediction, model_preds))
    
    model_preds  = torch.LongTensor(list(map(ord, model_preds)))
    actual_labels = ord('A') + labels
    incorrect_labels = actual_labels.new_full(actual_labels.shape, ord('I'))
    
#     print(f"{model_preds=}\n{actual_labels=}\n{incorrect_labels=}")
    
    acc_res = accuracy_metric.compute(predictions=model_preds, references=actual_labels)['accuracy']
    corr_res = 1.0 - accuracy_metric.compute(predictions=model_preds, references=incorrect_labels)['accuracy']
    
    return {'accuracy': acc_res, 'correctness': corr_res}

In [22]:
model.model.layers[0].self_attn.q_proj.weight.shape[0]

4096

In [None]:
# adapter_config = BOFTConfig(
#     task_type=TaskType.CAUSAL_LM,
#     inference_mode=False,
#     boft_block_size=8,
# #     boft_block_num=16,
#     boft_n_butterfly_factor=2,
#     bias='none',
# )

# adapter_config

# model_adapter = get_peft_model(model, adapter_config)    

In [None]:
# model_adapter.print_trainable_parameters()

In [None]:
# model_adapter

In [None]:
adapter_config = BOFTConfig(
        task_type=TaskType.CAUSAL_LM,
        inference_mode=False,
        **OmegaConf.to_object(config.BOFT_config)
    )

model_adapter = get_peft_model(model, adapter_config)   

In [31]:
if config.ft_strategy == 'LoRA':
    adapter_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        inference_mode=False, 
        **OmegaConf.to_object(config.LoRA_config),
    )
elif config.ft_strategy == 'BOFT':
    adapter_config = BOFTConfig(
        task_type=TaskType.CAUSAL_LM,
        inference_mode=False,
        **OmegaConf.to_object(config.BOFT_config)
    )
else:
    raise ValueError('Incorrect FT type')

model_adapter = get_peft_model(model, adapter_config)    
model_adapter.print_trainable_parameters()

Using /home/shkodnik/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...
Creating extension directory /home/shkodnik/.cache/torch_extensions/py311_cu121/fbd_cuda...
Detected CUDA files, patching ldflags
Emitting ninja build file /home/shkodnik/.cache/torch_extensions/py311_cu121/fbd_cuda/build.ninja...
Building extension module fbd_cuda...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)


[1/3] gcc -MMD -MF fbd_cuda.o.d -DTORCH_EXTENSION_NAME=fbd_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/shkodnik/Sber_Lora/ShkodnikVenv/lib/python3.11/site-packages/torch/include -isystem /home/shkodnik/Sber_Lora/ShkodnikVenv/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/shkodnik/Sber_Lora/ShkodnikVenv/lib/python3.11/site-packages/torch/include/TH -isystem /home/shkodnik/Sber_Lora/ShkodnikVenv/lib/python3.11/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/shkodnik/anaconda3/envs/python3.11.7/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -c /home/shkodnik/Sber_Lora/ShkodnikVenv/lib/python3.11/site-packages/peft/tuners/boft/fbd/fbd_cuda.cpp -o fbd_cuda.o 
[2/3] /usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output fbd_cuda_kernel.cuda.o.d -ccbin gcc -DTORCH_EXTENSION_N

Loading extension module fbd_cuda...


trainable params: 11,337,728 || all params: 7,259,361,280 || trainable%: 0.1562


In [23]:
model.eval()

pl = pipeline(
    "text-generation",
    model=model,       # WARNING: model used insted of lora_model
    tokenizer=tokenizer,
    torch_dtype=torch.float16
)

In [24]:
%%time

model_preds = []

with torch.no_grad():
    for i, split in enumerate(np.array_split(np.arange(len(test_dataset)), 20)):
        print(f"Start iteration {i}")
        print(f"\tstart pos: {np.min(split)}\tend pos: {np.max(split)}")
        
        model_pred = pl(
            test_dataset.select(split)['text_wa_answer'],
    #         validation_dataset.shuffle(42).select(range(512))['text_wa_answer'],
    #         validation_dataset['text_wa_answer'],
            return_full_text=False,
            max_new_tokens=4,
            do_sample=False,
            temperature=None,
            top_p=None,
            batch_size=1
        )
        model_preds += model_pred
        torch.cuda.empty_cache()
        
        print(f"Finish iteration {i}")
        print(f"\t{len(model_preds)=}")

model_preds_merged = []
for ls in model_preds:
    model_preds_merged += ls

model_preds = model_preds_merged

# model_preds

Start iteration 0
	start pos: 0	end pos: 702
Finish iteration 0
	len(model_preds)=703
Start iteration 1
	start pos: 703	end pos: 1405
Finish iteration 1
	len(model_preds)=1406
Start iteration 2
	start pos: 1406	end pos: 2107
Finish iteration 2
	len(model_preds)=2108
Start iteration 3
	start pos: 2108	end pos: 2809
Finish iteration 3
	len(model_preds)=2810
Start iteration 4
	start pos: 2810	end pos: 3511
Finish iteration 4
	len(model_preds)=3512
Start iteration 5
	start pos: 3512	end pos: 4213
Finish iteration 5
	len(model_preds)=4214
Start iteration 6
	start pos: 4214	end pos: 4915
Finish iteration 6
	len(model_preds)=4916
Start iteration 7
	start pos: 4916	end pos: 5617
Finish iteration 7
	len(model_preds)=5618
Start iteration 8
	start pos: 5618	end pos: 6319
Finish iteration 8
	len(model_preds)=6320
Start iteration 9
	start pos: 6320	end pos: 7021


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Finish iteration 9
	len(model_preds)=7022
Start iteration 10
	start pos: 7022	end pos: 7723
Finish iteration 10
	len(model_preds)=7724
Start iteration 11
	start pos: 7724	end pos: 8425
Finish iteration 11
	len(model_preds)=8426
Start iteration 12
	start pos: 8426	end pos: 9127
Finish iteration 12
	len(model_preds)=9128
Start iteration 13
	start pos: 9128	end pos: 9829
Finish iteration 13
	len(model_preds)=9830
Start iteration 14
	start pos: 9830	end pos: 10531
Finish iteration 14
	len(model_preds)=10532
Start iteration 15
	start pos: 10532	end pos: 11233
Finish iteration 15
	len(model_preds)=11234
Start iteration 16
	start pos: 11234	end pos: 11935
Finish iteration 16
	len(model_preds)=11936
Start iteration 17
	start pos: 11936	end pos: 12637
Finish iteration 17
	len(model_preds)=12638
Start iteration 18
	start pos: 12638	end pos: 13339
Finish iteration 18
	len(model_preds)=13340
Start iteration 19
	start pos: 13340	end pos: 14041
Finish iteration 19
	len(model_preds)=14042
CPU times: 

In [25]:
for i in range(len(model_preds)):
    model_preds[i]['subject'] = test_dataset[i]['subject']
    
model_preds 

[{'generated_text': '>\n\nThe', 'subject': 'abstract_algebra'},
 {'generated_text': '2>\n\n', 'subject': 'abstract_algebra'},
 {'generated_text': '>\n\nTo', 'subject': 'abstract_algebra'},
 {'generated_text': '>', 'subject': 'abstract_algebra'},
 {'generated_text': '>\n\nTo', 'subject': 'abstract_algebra'},
 {'generated_text': '>\n\nStatement', 'subject': 'abstract_algebra'},
 {'generated_text': '>\n\nStatement', 'subject': 'abstract_algebra'},
 {'generated_text': '>\n\nStatement', 'subject': 'abstract_algebra'},
 {'generated_text': '>\n\nThe', 'subject': 'abstract_algebra'},
 {'generated_text': '>\n\nTo', 'subject': 'abstract_algebra'},
 {'generated_text': '>\n\nStatement', 'subject': 'abstract_algebra'},
 {'generated_text': '>', 'subject': 'abstract_algebra'},
 {'generated_text': '>\n\nThe', 'subject': 'abstract_algebra'},
 {'generated_text': '>\n\nTo', 'subject': 'abstract_algebra'},
 {'generated_text': '>\n\nThe', 'subject': 'abstract_algebra'},
 {'generated_text': '>', 'subject': 

In [None]:
with open('./fs_preds.bin', 'wb') as f:
    pickle.dump(
        obj=(model_preds, test_dataset['answer']),
        file=f
    )

In [26]:
preds_df = pd.DataFrame(model_preds)

preds_df['pred'] = preds_df.apply(process_prediction, axis=1)
preds_df['true'] = list(map(lambda v: chr(v + ord('A')), test_dataset['answer']))
preds_df['corr'] = (preds_df['pred'] == preds_df['true']).astype(np.int32)
preds_df['category'] = preds_df['subject'].apply(subcat_to_cat)

preds_df.head(20)

Unnamed: 0,generated_text,subject,pred,true,corr,category
0,>\n\nThe,abstract_algebra,I,B,0,STEM
1,2>\n\n,abstract_algebra,I,C,0,STEM
2,>\n\nTo,abstract_algebra,I,D,0,STEM
3,>,abstract_algebra,I,B,0,STEM
4,>\n\nTo,abstract_algebra,I,B,0,STEM
5,>\n\nStatement,abstract_algebra,I,A,0,STEM
6,>\n\nStatement,abstract_algebra,I,A,0,STEM
7,>\n\nStatement,abstract_algebra,I,D,0,STEM
8,>\n\nThe,abstract_algebra,I,B,0,STEM
9,>\n\nTo,abstract_algebra,I,C,0,STEM


In [1]:
preds_df[['subject', 'corr']].groupby(['subject']).mean()

NameError: name 'preds_df' is not defined

In [28]:
preds_df[['category', 'corr']].groupby(['category']).mean()

Unnamed: 0_level_0,corr
category,Unnamed: 1_level_1
STEM,0.0
humanities,0.0
"other (business, health, misc.)",0.000308
social sciences,0.0


In [29]:
compute_accuracy(model_preds, test_dataset['answer'])

{'accuracy': 7.121492664862555e-05, 'correctness': 7.121492664863283e-05}

In [None]:
assert False

In [33]:
training_args = SFTConfig(
    **OmegaConf.to_object(config.trainer_config),
)

In [34]:
trainer = SFTTrainer(
    model=model_adapter,
    args=training_args,
#     args=SFTConfig(
#         output_dir="/tmp",
#         per_device_train_batch_size=1,
#         per_device_eval_batch_size=2,
#         fp16=True,
#     ),
    train_dataset=auxiliary_train_dataset,
    eval_dataset=validation_dataset.shuffle(42).select(range(64)),
#     formatting_func=formatting_prompts_func,
#     data_collator=collator,
     compute_metrics=compute_accuracy,
)

Map:  19%|█▉        | 19000/99842 [00:18<01:20, 1007.99 examples/s]


KeyboardInterrupt: 

In [None]:
# tokenizer.decode(trainer.train_dataset[0]['input_ids'])

In [None]:
# for batch in trainer.get_train_dataloader():
#     print(tokenizer.batch_decode(batch['input_ids']))
#     break

In [None]:
torch.cuda.empty_cache()

trainer.train()

In [None]:
# torch.cuda.empty_cache()

In [None]:
pl = pipeline(
    "text-generation",
    model=lora_model,
    tokenizer=tokenizer,
)

In [None]:
%%time

model_preds = pl(
    validation_dataset.shuffle(42).select(range(512))['text_wa_answer'],
#     validation_dataset['text_wa_answer'],
    return_full_text=False,
    max_new_tokens=16,
    do_sample=False,
    temperature=None,
    top_p=None,
    batch_size=4,
)
torch.cuda.empty_cache()

model_preds_merged = []
for ls in model_preds:
    model_preds_merged += ls

model_preds = model_preds_merged

# model_preds

In [None]:
for i in range(len(model_preds)):
    model_preds[i]['subject'] = validation_dataset[i]['subject']
    
model_preds

In [None]:
compute_accuracy(model_preds, validation_dataset.select(range(len(model_preds)))['answer'])

In [None]:
lora_model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")

In [None]:
!zip fine_tuned_model.zip ./fine_tuned_model/*