In [1]:
import torch
if torch.cuda.get_device_capability() < (7, 5):
  raise ValueError(f"You got a GPU with capability {torch.cuda.get_device_capability()}, need at least (7, 5)")
else: print("OK")

OK


In [2]:
# %pip install bitsandbytes datasets accelerate loralib
# %pip install bitsandbytes==0.37.0 transformers datasets accelerate==0.18.0 loralib peft
%pip install bitsandbytes transformers datasets accelerate loralib peft
# %pip install transformers peft
# %pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git


Note: you may need to restart the kernel to use updated packages.


#### Load model

In [3]:
# import os
# os.environ["CUDA_VISIBLE_DEVICES"]="0"
import torch
import torch.nn as nn
import torch.nn.functional as F
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

CACHE_DIR = '/media/tfsservices/DATA/NLP/cache/'
# MODEL_NAME = "facebook/opt-6.7b"
# MODEL_NAME = "facebook/opt-13b"
MODEL_NAME = "facebook/opt-30b"

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    load_in_8bit=True,        # bitsandbytes lib required (convert the loaded model into mixed-8bit quantized model.)
    device_map='auto',
    torch_dtype=torch.float16,
    cache_dir=CACHE_DIR)      # path to a directory in which a downloaded pretrained model
    # low_cpu_mem_usage=True,   # loads the model using ~1x model size CPU memory
    # offload_state_dict=True)  # temporarily offload the CPU state dict to the hard drive to avoid getting out of CPU RAM
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)



Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues


  from .autonotebook import tqdm as notebook_tqdm
Downloading (…)lve/main/config.json: 100%|██████████| 651/651 [00:00<00:00, 631kB/s]
Downloading (…)model.bin.index.json: 100%|██████████| 62.8k/62.8k [00:00<00:00, 3.60MB/s]
Downloading (…)l-00001-of-00007.bin: 100%|██████████| 9.79G/9.79G [02:29<00:00, 65.6MB/s]
Downloading (…)l-00002-of-00007.bin: 100%|██████████| 9.87G/9.87G [02:25<00:00, 67.7MB/s]
Downloading (…)l-00003-of-00007.bin: 100%|██████████| 9.87G/9.87G [02:17<00:00, 71.8MB/s]
Downloading (…)l-00004-of-00007.bin: 100%|██████████| 9.87G/9.87G [02:21<00:00, 69.9MB/s]
Downloading (…)l-00005-of-00007.bin: 100%|██████████| 9.87G/9.87G [02:24<00:00, 68.1MB/s]
Downloading (…)l-00006-of-00007.bin: 100%|██████████| 9.87G/9.87G [02:27<00:00, 66.9MB/s]
Downloading (…)l-00007-of-00007.bin: 100%|██████████| 822M/822M [00:11<00:00, 73.2MB/s]
Downloading shards: 100%|██████████| 7/7 [14:39<00:00, 125.65s/it]
Loading checkpoint shards: 100%|██████████| 7/7 [00:33<00:00,  4.80s/it]
Downloa

#### Post-processing on the model

In [4]:
# for module in model.modules():
#     if isinstance(module, bnb.nn.Linear8bitLt):
#         module.state.memory_efficient_backward = True

for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    param.data = param.data.to(torch.float32) # cast the small parameters (e.g. layernorm) to fp32 for stability

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.model.decoder.project_in = lambda x: x.requires_grad_(True)

# cast model outputs to float32 to unfuck the top-k sampler
class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

#### Apply LoRA

In [5]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [6]:
from peft import LoraConfig, get_peft_model 

config = LoraConfig(
    r=16, ## can be reduced to 8
    lora_alpha=32,
    target_modules=["k_proj", "q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 33030144 || all params: 30007570432 || trainable%: 0.11007270340279443


In [7]:
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): OPTForCausalLM(
      (model): OPTModel(
        (decoder): OPTDecoder(
          (embed_tokens): Embedding(50272, 7168, padding_idx=1)
          (embed_positions): OPTLearnedPositionalEmbedding(2050, 7168)
          (final_layer_norm): LayerNorm((7168,), eps=1e-05, elementwise_affine=True)
          (layers): ModuleList(
            (0-47): 48 x OPTDecoderLayer(
              (self_attn): OPTAttention(
                (k_proj): Linear8bitLt(
                  in_features=7168, out_features=7168, bias=True
                  (lora_dropout): Dropout(p=0.05, inplace=False)
                  (lora_A): Linear(in_features=7168, out_features=16, bias=False)
                  (lora_B): Linear(in_features=16, out_features=7168, bias=False)
                )
                (v_proj): Linear8bitLt(
                  in_features=7168, out_features=7168, bias=True
                  (lora_dropout): Dropout(p=0.05, inplace=False)
         

#### Test generation 

In [8]:
def generate_marketing(max_length = 150):
    
    batch = tokenizer("Hi {FirstName} ", return_tensors='pt').to('cuda')

    # with torch.no_grad():
    with torch.cuda.amp.autocast():
        output_tokens = model.generate(**batch, min_length=30, max_length=max_length, do_sample=True)

    print('\n\n', tokenizer.decode(output_tokens[0].cpu().numpy()))

In [9]:
def generate(prompt: str, max_length = 150):
    
    batch = tokenizer(prompt, return_tensors='pt').to('cuda')

    # with torch.no_grad():
    with torch.cuda.amp.autocast():
        output_tokens = model.generate(**batch, min_length=30, max_length=max_length, do_sample=True)

    print('\n\n', tokenizer.decode(output_tokens[0].cpu().numpy()))

In [10]:
generate_marketing()





 </s>Hi {FirstName}  Thank you! You are now subscribed to {CurrentMonth + Year} facts!  Each month we'll send you a fun fact about a country in the world.  Did you know - Japan has the world's highest number of vending machines per capita at 2,000!
You're not subscribed!</s>


In [11]:
generate_marketing()



 </s>Hi {FirstName}  {MiddleName},

 This is a reminder that you have not completed a required paperwork
 for  your e:mail address on file. If your signature below does not indicate
 your  agreement to comply with this notification you are in violation of
 ENRON's
 anti-spam policies.

 Also, you are not in compliance with the requirements of the Anti-
 Corrupt
 Practices Act and the Sarbanes-Oxley Act.

 Further, to ensure that you stay notifed of company related announcements,
Enron
 needs your signature and confirmation of your password at the following
 link:

http://www.enron.com/secure/passwords/notice


#### Fine-tune OPT model

In [13]:
from datasets import load_dataset

dataset = load_dataset("csv", data_files="./datasets/marketing.csv")
dataset = dataset.map(lambda samples: tokenizer(samples['text']), batched=True)

Using custom data configuration default-9329f19ebd7265ee
Found cached dataset csv (/home/tfsservices/.cache/huggingface/datasets/csv/default-9329f19ebd7265ee/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)
100%|██████████| 1/1 [00:00<00:00, 878.57it/s]
100%|██████████| 1/1 [00:00<00:00, 30.95ba/s]


In [14]:
import transformers

trainer = transformers.Trainer(
    model=model, 
    train_dataset=dataset['train'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=4, 
        gradient_accumulation_steps=4,
        warmup_steps=10, 
        max_steps=100, 
        learning_rate=2e-4, 
        fp16=True,
        logging_steps=1, 
        output_dir='outputs_marketing'),
        # save_steps=100),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
1,3.1787


TrainOutput(global_step=100, training_loss=1.6580638313293456, metrics={'train_runtime': 1437.1786, 'train_samples_per_second': 1.113, 'train_steps_per_second': 0.07, 'total_flos': 4.459422594325709e+16, 'train_loss': 1.6580638313293456, 'epoch': 15.38})

#### Save LoRA adapter

In [15]:
# ADAPTER_NAME = "lora_adapters-6.7b"
# ADAPTER_NAME = "lora_adapters-13b"
ADAPTER_NAME = "lora_adapters-30b"
model.save_pretrained(ADAPTER_NAME)

#### Test generation

In [16]:
generate_marketing()





 </s>Hi {FirstName} 

Hope all is going well. I noticed that you have the below {Make} and was wondering if you are still looking for one. Based on the seller's latest price, it seems that you may have a good alternative for your project. The seller has told us that they can offer it with the complete and reliable auto-reloading capability and can have it available by next week/month. The manufacturer will provide 12 months auto-reloading warranty and only will ask for a {Price} for this unit. 

According to the seller's information, this equipment has never been modified and only has a few small scratches. You can view photos of the unit's exterior and interior in the link


In [17]:
generate_marketing(max_length=450)





 </s>Hi {FirstName} 

We are looking to purchase a {MakeModel} for our site. Do you have one for sale? 

We are targeting a machine with at least {WaferSize}Wafers of wafer size {WaferType}, however, we are generally interested in any {MakeModel}s that you have on hand.

Our buyer is interested in purchasing it within the next 2 weeks. Please contact me if you have one for sale. 

We look forward to hearing from you. 

Best Regards,

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}



In [18]:
generate(prompt="Hi {FirstName} ", max_length=250)



 </s>Hi {FirstName} 
Our client, a large global semiconductor company, is releasing {Qty} units of {MakeModel}/T series for sale. {Qty} units were bought new and installed in production lines. All of them had undergone mandatory safety overhauls and part updates. 
The units were installed as follows:

 {Qty} units of {Model}

{Listing} 
If you are interested in making an offer, please reply to this e-mail by the end of this week. We will reply you with additional pics soon. 

{Listing}


{Senders}



{Senders}



Budget-priced 3D vision wafer systems for sale online:
{Listing}


{Senders}



{Senders}



If you have any questions, please let me know.

{Senders}



{Senders}



Best regards,
{SendersSignature}

{SendersSignature}

{Senders}


{Senders}


{Senders}


{Senders}




In [19]:
generate(prompt="Dear {FirstName} ", max_length=250)



 </s>Dear {FirstName} 

I'm reaching out to request an offer on the following {MakeModel} CCD System from my client who is looking to release their unit to the market after failed attempt to fix it. They are targeting to get {WaferSize} wafers back in return, and they will consider different offers as well. 

I was told the tool has been sitting idle for a long time, so we assume there could be some problems with the system. We recommend you review the detailed information of the equipment below and send us an offer with your thinking regarding the wafer size as we dont know its condition exactly.

{Listing}
If you have any other systems for sale or purchase, please do not hesitate to contact me, I will be glad to help. 
{SendersSignature}
{SendersSignature}
Best regards,
{SendersSignature}

{SendersSignature}
{SendersSignature}
{SendersSignature}
{SendersSignature}
{SendersSignature}
{SendersSignature}
{SendersSignature}

 
{Listing}


In [20]:
generate(prompt="Listing: Hi {FirstName} ", max_length=200)



 </s>Listing: Hi {FirstName}    

Looking for an {MakeModel} {WaferSize} to complete your front end inspection system as my client is urgently looking to complete this system ASAP to support their new project starting early next month. 

If you have any similar equipment to be sold, please don t hesitate to contact me. 

We can make quick and aggressive offer on yours if it s similar to ours. 

{Listing}

{SendersSignature}

{SendersSignature}

{Listing}

{SendersSignature}

{SendersSignature}

{SendersSignature}

 

{Listing}

 

{Listing}

 

{Listing}

 

 

{Listing}

 



{Listing}

 


{Listing}




In [21]:
generate(prompt="Requirement: Hi {FirstName} ", max_length=200)



 </s>Requirement: Hi {FirstName}  We are offering a {MakeModel} and approved on approved budget for {Price} so we hope you find this fit for your needs.  There are two machines missing from original configuration, with missing power modules and missing vacuum pumps.  Below configuration is what we received from the owner.  Please review the configuration and photos below and let me know if you have any interest.  Thank you.  System Information:  Configuration: {Make}  {WaferSize}  Power Supplies: 2  Vacuum Pump: 1       



{Listing}


{SendersSignature}

{SendersSignature}

{SendersSignature}

{SendersSignature}


Óëò

{Listing}

- - - - - - -

- - - - - - -

- - -
