# Finetune 70B Llama Model

In [1]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 datasets


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [2]:
!pip install huggingface_hub -q


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [3]:
from huggingface_hub import notebook_login, logout, login, get_token_permission
# logout()

In [4]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [93]:
import pandas as pd
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [6]:
# The model that you want to train from the Hugging Face hub
model_name = "NousResearch/Llama-2-7b-chat-hf"
model_name = 'meta-llama/Llama-2-7b-chat-hf'
model_name = 'meta-llama/Llama-2-70b-chat-hf'

# # The instruction dataset to use
# dataset_name = "mlabonne/guanaco-llama2-1k"

# # Fine-tuned model name
# new_model = "llama-2-7b-miniguanaco"


In [7]:

################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 2

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 4

# Batch size per GPU for evaluation
per_device_eval_batch_size = 4

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

# Log every X updates steps
logging_steps = 25

################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

In [8]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
print(compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

torch.float16


In [9]:
# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

Your GPU supports bfloat16: accelerate training with bf16=True


In [10]:
# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Downloading (…)lve/main/config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

In [11]:
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

Downloading (…)okenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [12]:
# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [13]:
# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

# Load Dataset

In [16]:
!pip install datasets -q


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [18]:
import datasets
dataset = datasets.load_dataset('nguha/legalbench', 'corporate_lobbying')
train_raw = dataset['train'].to_pandas()
test_raw = dataset['test'].to_pandas()

# get a sample of 100 from the test set
test = test_raw.sample(100, random_state=42)

# get remaining test set to use as the train set
train = test_raw.drop(test.index)

Downloading builder script:   0%|          | 0.00/70.6k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/169k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/9.52k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/19.5M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/10 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/490 [00:00<?, ? examples/s]

In [29]:
FINETUNE_PROMPT_CORPORTATE_LOBBYING = """<s>[INST] <<SYS>>
You are a lobbyist analyzing Congressional bills for their impacts on companies. 
Given the title and summary of the bill, plus information on the company from its 10K SEC filing, it is your job to determine if a bill is at least somewhat relevant to a company in terms of whether it could impact the company's bottom-line if it was enacted (by saying YES or NO; note the all-caps). 
<</SYS>>
Official title of bill: {bill_title}
Official summary of bill: {bill_summary}
Company name: {company_name}
Company business description: {company_description}
Is this bill potentially relevant to the company? [/INST]{answer}</s>"""

query_list = []
for _, row in train.iterrows():
    query = FINETUNE_PROMPT_CORPORTATE_LOBBYING.format(
        bill_title=row['bill_title'],
        bill_summary=row['bill_summary'],
        company_name=row['company_name'],
        company_description=row['company_description'],
        answer=row['answer'].upper()
    )
    query_list.append(query)

In [30]:
train['text'] = query_list

In [31]:
train.head()

Unnamed: 0,answer,bill_summary,bill_title,company_description,company_name,index,text
1,No,Space Frontier Act of 2019\n\nThe bill revises...,A bill to reduce regulatory burdens and stream...,"we"") participate in the Electronic Manufacturi...",Plexus Corp.,1,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...
2,No,Damon Paul Nelson and Matthew Young Pollard In...,A bill to authorize appropriations for fiscal ...,"(""TriQuint"") entered into an Agreement and Pla...","Qorvo, Inc.",2,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...
3,No,Pandemic and All-Hazards Preparedness and Adva...,To reauthorize certain programs under the Publ...,"Inc. (""HYG""), is a leading, globally integrate...","Hyster-Yale Materials Handling, Inc.",3,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...
4,No,Combat Online Predators Act\n\nThis bill incre...,"To amend title 18, United States Code, with re...","Haemonetics Corporation, a healthcare company,...",Haemonetics Corp.,4,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...
5,No,For the People Act of 2019\n\nThis bill addres...,"To expand Americans' access to the ballot box,...",Our company consists of two business segments:...,Altra Industrial Motion Corp.,5,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...


In [47]:
train['text'].iloc[0]

'<s>[INST] <<SYS>>\nYou are a lobbyist analyzing Congressional bills for their impacts on companies. \nGiven the title and summary of the bill, plus information on the company from its 10K SEC filing, it is your job to determine if a bill is at least somewhat relevant to a company in terms of whether it could impact the company\'s bottom-line if it was enacted (by saying YES or NO; note the all-caps). \n<</SYS>>\nOfficial title of bill: A bill to reduce regulatory burdens and streamline processes related to commercial space activities, and for other purposes.\nOfficial summary of bill: Space Frontier Act of 2019\n\nThe bill revises provisions relating to commercial space launch license applications and experimental permits. The Department of Transportation shall consolidate across federal agencies requirements to protect the public health and safety, safety of property, national security interests, and foreign policy interests of the United States relevant to any commercial space vehic

## Same for test set

In [40]:
FINETUNE_PROMPT_CORPORTATE_LOBBYING_TEST = """<s>[INST] <<SYS>>
You are a lobbyist analyzing Congressional bills for their impacts on companies. 
Given the title and summary of the bill, plus information on the company from its 10K SEC filing, it is your job to determine if a bill is at least somewhat relevant to a company in terms of whether it could impact the company's bottom-line if it was enacted (by saying YES or NO; note the all-caps). 
<</SYS>>
Official title of bill: {bill_title}
Official summary of bill: {bill_summary}
Company name: {company_name}
Company business description: {company_description}
Is this bill potentially relevant to the company? [/INST]"""

query_list_test = []
for _, row in test.iterrows():
    query = FINETUNE_PROMPT_CORPORTATE_LOBBYING_TEST.format(
        bill_title=row['bill_title'],
        bill_summary=row['bill_summary'],
        company_name=row['company_name'],
        company_description=row['company_description']
    )
    query_list_test.append(query)

In [42]:
test['query'] = query_list_test

In [43]:
test.head()

Unnamed: 0,answer,bill_summary,bill_title,company_description,company_name,index,query
452,Yes,Bringing an End to Harassment by Enhancing Acc...,A bill to prevent discrimination and harassmen...,We are guided by four principles: customer obs...,"Amazon.com, Inc.",452,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...
84,No,Safety Enhancements for Communities Using Reas...,"To amend title 18, United States Code, to requ...","Legg Mason, Inc. is a publicly owned asset man...","Legg Mason, Inc.",84,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...
434,No,Authorizing Critical Conservation and Enabling...,To provide for the preservation of America's o...,"(""TriQuint"") entered into an Agreement and Pla...","Qorvo, Inc.",434,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...
473,No,Affordable Prescriptions for Patients Act of 2...,A bill to amend the Federal Trade Commission A...,customer acceptance and demand for our cloud c...,"8x8, Inc.",473,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...
428,No,Public Buildings Renewal Act of 2019\n\nThis b...,A bill to amend the Internal Revenue Code of 1...,The Company is engaged principally in the reta...,"Weis Markets, Inc.",428,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...


In [33]:
from datasets import Dataset
train_hf = Dataset.from_pandas(train[['text']])

In [34]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_hf,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)



Map:   0%|          | 0/390 [00:00<?, ? examples/s]

In [35]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
25,1.8746
50,1.4254
75,1.3466
100,1.2897
125,1.2499
150,1.2311
175,1.2208


TrainOutput(global_step=196, training_loss=1.361326198188626, metrics={'train_runtime': 751.0501, 'train_samples_per_second': 1.039, 'train_steps_per_second': 0.261, 'total_flos': 1.63077634916352e+16, 'train_loss': 1.361326198188626, 'epoch': 2.0})

In [36]:
# Save trained model
trainer.model.save_pretrained('relevancy-fine-tuned-llama-70B')

In [38]:
# %load_ext tensorboard
# %tensorboard --logdir results/runs

# Evaluate on Test Set

In [44]:
test.head()

Unnamed: 0,answer,bill_summary,bill_title,company_description,company_name,index,query
452,Yes,Bringing an End to Harassment by Enhancing Acc...,A bill to prevent discrimination and harassmen...,We are guided by four principles: customer obs...,"Amazon.com, Inc.",452,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...
84,No,Safety Enhancements for Communities Using Reas...,"To amend title 18, United States Code, to requ...","Legg Mason, Inc. is a publicly owned asset man...","Legg Mason, Inc.",84,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...
434,No,Authorizing Critical Conservation and Enabling...,To provide for the preservation of America's o...,"(""TriQuint"") entered into an Agreement and Pla...","Qorvo, Inc.",434,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...
473,No,Affordable Prescriptions for Patients Act of 2...,A bill to amend the Federal Trade Commission A...,customer acceptance and demand for our cloud c...,"8x8, Inc.",473,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...
428,No,Public Buildings Renewal Act of 2019\n\nThis b...,A bill to amend the Internal Revenue Code of 1...,The Company is engaged principally in the reta...,"Weis Markets, Inc.",428,<s>[INST] <<SYS>>\nYou are a lobbyist analyzin...


In [61]:
# # Ignore warnings
# logging.set_verbosity(logging.INFO)

# # Run text generation pipeline with our next model
# pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=3000)
# result = pipe(test['query'].iloc[0])
# print(result[0]['generated_text'])

In [54]:
# Empty VRAM
del model
del pipe
del trainer
import gc
gc.collect()
gc.collect()

0

In [56]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, 'relevancy-fine-tuned-llama-70B')
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-chat-hf/snapshots/08751db2aca9bf2f7f80d2e516117a53d7450235/config.json
Model config LlamaConfig {
  "_name_or_path": "meta-llama/Llama-2-7b-chat-hf",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_position_embeddings": 4096,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.31.0",
  "use_cache": true,
  "vocab_size": 32000
}

loading weights file model.safetensors from cache at /home/ubuntu/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-chat-hf/snapshots/08751db2aca9bf2

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing LlamaForCausalLM.

All the weights of LlamaForCausalLM were initialized from the model checkpoint at meta-llama/Llama-2-7b-chat-hf.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
loading configuration file generation_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-chat-hf/snapshots/08751db2aca9bf2f7f80d2e516117a53d7450235/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "do_sample": true,
  "eos_token_id": 2,
  "max_length": 4096,
  "pad_token_id": 0,
  "temperature": 0.6,
  "top_p": 0.9,
  "transformers_version": "4.31.0"
}

loading file tokenizer.model from cache at /home/ubuntu/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-chat-hf/snapshots/08751db2aca9bf2f7f80d2e516117a53d7450235/tokenizer.model
loading file tokenizer.json from cach

In [57]:
# Run text generation pipeline with our next model
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=4000)

In [58]:
result = pipe(test['query'].iloc[0])
print(result[0]['generated_text'])

<s>[INST] <<SYS>>
You are a lobbyist analyzing Congressional bills for their impacts on companies. 
Given the title and summary of the bill, plus information on the company from its 10K SEC filing, it is your job to determine if a bill is at least somewhat relevant to a company in terms of whether it could impact the company's bottom-line if it was enacted (by saying YES or NO; note the all-caps). 
<</SYS>>
Official title of bill: A bill to prevent discrimination and harassment in employment.
Official summary of bill: Bringing an End to Harassment by Enhancing Accountability and Rejecting Discrimination in the Workplace Act or the BE HEARD in the Workplace Act

This bill sets forth provisions to prevent discrimination and harassment in the workplace and raises the minimum wage for tipped employees.

Specifically, the bill (1) makes it an unlawful employment practice to discriminate against an individual in the workplace based on sexual orientation, gender identity, pregnancy, childbirt

In [69]:
from transformers import AutoTokenizer, TextGenerationPipeline
import torch

def generate_texts(
    queries, 
    pipeline,
    tokenizer,
    max_length=2000, 
    num_return_sequences=1) -> list:
    
    generated_texts = []

    for query in queries:
        sequences = pipeline(
            query,
            do_sample=True,
            temperature=None,
            max_length=max_length,
            num_return_sequences=num_return_sequences
        )
        
        # Calculate the number of tokens in the original input string
        input_ids = tokenizer.encode(query)
        input_length = len(input_ids)

        query_results = []

        for seq in sequences:
            # Get the generated text
            full_text = seq['generated_text']
            
            # Remove the input string from the output string
            output_ids = tokenizer.encode(full_text)[input_length:]
            output_text = tokenizer.decode(output_ids, skip_special_tokens=True)
            
            query_results.append(output_text)
        
        generated_texts.append(query_results)

    return generated_texts

In [70]:
query_list = test['query'].tolist()

In [73]:
results = generate_texts(query_list, pipe, tokenizer)

Input length of input_ids is 3197, but `max_length` is set to 2000. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 2096, but `max_length` is set to 2000. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 2048, but `max_length` is set to 2000. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 4135, but `max_length` is set to 2000. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 2412, but `max_length` is set to 2000. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 4008, but `max_length` is set to 2000. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.


In [75]:
len(results)

100

In [86]:
# the results are a little messier
# we need to parse them into yes/no, but sometimes Llama adds a space at the beginning
# so let's get the first 5-10 characters, 
# convert it to lower()
# and then check if "yes" or "no" is in this string
# and evaluate

filtered_results = []
for idx, result in enumerate(results):
    # print(idx, result[0][0:20])
    result_lower = result[0][0:20].lower()
    if 'yes' in result_lower:
        filtered_results.append('yes')
    else:
        filtered_results.append('no')

In [90]:
test['llama_response'] = filtered_results
test['answer'] = test['answer'].str.lower()

In [94]:
pd.crosstab(test['answer'], test['llama_response'], rownames=['Actual'], colnames=['Predicted'])

Predicted,no,yes
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1
no,41,25
yes,17,17


In [95]:
print("Llama-2-7B-chat fine-tuned accuracy: {}%".format((test['llama_response'] == test['answer']).sum() / len(test) * 100))

Llama-2-7B-chat fine-tuned accuracy: 57.99999999999999%


In [97]:
test[['answer', 'llama_response']].to_csv('llama-7b-finetune.csv', index=False)

# Save to Hub

In [98]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [100]:
model.push_to_hub('relevancy-fine-tuned-llama-70B', use_temp_dir=False)
tokenizer.push_to_hub('relevancy-fine-tuned-llama-70B', use_temp_dir=False)

Configuration saved in fine-tuned-llama/config.json
Configuration saved in fine-tuned-llama/generation_config.json
The model is bigger than the maximum size per checkpoint (10GB) and is going to be split in 2 checkpoint shards. You can find where each parameters has been saved in the index located at fine-tuned-llama/pytorch_model.bin.index.json.
Uploading the following files to dlb012/fine-tuned-llama: generation_config.json,pytorch_model-00002-of-00002.bin,config.json,pytorch_model.bin.index.json,pytorch_model-00001-of-00002.bin


pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

tokenizer config file saved in fine-tuned-llama/tokenizer_config.json
Special tokens file saved in fine-tuned-llama/special_tokens_map.json
Uploading the following files to dlb012/fine-tuned-llama: tokenizer.json,tokenizer_config.json,special_tokens_map.json


CommitInfo(commit_url='https://huggingface.co/dlb012/fine-tuned-llama/commit/60578e8acd884653f0bec68882f89b0b8b7cdc87', commit_message='Upload tokenizer', commit_description='', oid='60578e8acd884653f0bec68882f89b0b8b7cdc87', pr_url=None, pr_revision=None, pr_num=None)