# Load LoRA adapters

In [1]:
import torch
if torch.cuda.get_device_capability() < (7, 5):
    raise ValueError(f"You got a GPU with capability {torch.cuda.get_device_capability()}, need at least (7, 5)")
else: print("OK")

OK


In [2]:
%pip install bitsandbytes==0.37.0 transformers==4.27.4 datasets==2.7.0 accelerate==0.18.0 loralib==0.1.1 peft==0.3.0.dev0

Note: you may need to restart the kernel to use updated packages.


#### Load base model

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

CACHE_DIR = '/media/tfsservices/DATA/NLP/cache/'
# MODEL_NAME = "facebook/opt-6.7b"
# MODEL_NAME, ADAPTER_PATH = "facebook/opt-30b", "adapters/lora-adapters-30b"
# MODEL_NAME, ADAPTER_PATH = "EleutherAI/gpt-j-6B", "adapters/gpt-j-6B-marketing"
# MODEL_NAME, ADAPTER_PATH = "EleutherAI/gpt-j-6B", "adapters/gpt-j-6B-wm"

MODEL_NAME = "EleutherAI/gpt-neox-20B"

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    load_in_8bit=True,        # bitsandbytes lib required (convert the loaded model into mixed-8bit quantized model.)
    device_map='auto',
    torch_dtype=torch.float16,
    cache_dir=CACHE_DIR,      # path to a directory in which a downloaded pretrained model
    # low_cpu_mem_usage=True,   # loads the model using ~1x model size CPU memory
    # offload_state_dict=True)  # temporarily offload the CPU state dict to the hard drive to avoid getting out of CPU RAM
    )
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues


  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 46/46 [00:25<00:00,  1.78it/s]


In [4]:
from transformers import StoppingCriteria, StoppingCriteriaList

class StoppingCriteriaSub(StoppingCriteria):

    def __init__(self, stops = [], encounters=1):
        super().__init__()
        self.stops = stops
        self.ENCOUNTERS = encounters

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
        stop_count = 0
        for stop in self.stops:
            stop_count = (stop == input_ids[0]).sum().item()

        if stop_count >= self.ENCOUNTERS:
            return True
        return False

In [5]:
stop_words_ids = [tokenizer(stop_word, return_tensors='pt')['input_ids'].squeeze() for stop_word in ["###"]]
stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids, encounters=1)])


In [5]:
# def test_generate_marketing():

#     inputs = tokenizer("Hi {FirstName}", return_tensors="pt")

#     with torch.no_grad():
#         outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_length=200)
#         print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0])

In [6]:
def generate(prompt: str, max_length=250):

    inputs = tokenizer(prompt, return_tensors="pt")

    with torch.no_grad():
        # outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_length=max_length, do_sample=True, )
        outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"),
                                # early_stopping=True, 
                                # num_beams=5,
                                temperature=0.7,
                                top_p=0.8,
                                do_sample=True,
                                max_length = max_length,
                                # pad_token_id=tokenizer.eos_token_id,
                                stopping_criteria=stopping_criteria
                                # output_scores=True,
                                # return_dict_in_generate=True).detach()
        )
        print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0])

In [26]:
# model.eval()

Generate text using raw LLM.

In [8]:
generate("Hi {FirstName} ")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
  attn_scores = torch.where(causal_mask, attn_scores, mask_value)


Hi {FirstName} 
{LastName}

{Address}
{City}
{State}
{Zip}

{Phone}
{Email}

{URL}

{Hobbies}
{Website}

{Birthday}
{Gender}
{Height}
{Weight}
{DateOfBirth}
{DateOfDeath}

{Humor}
{HumorLevel}
{HumorSense}
{HumorType}

{FavoriteColor}
{FavoriteColorHex}
{FavoriteColorRGB}
{FavoriteColorName}

{FavoriteFood}
{FavoriteFoodHex}
{FavoriteFoodRGB}
{FavoriteFoodName}

{FavoriteDrink}
{FavoriteDrinkHex}
{FavoriteDrinkRGB}
{FavoriteDrinkName}

{FavoriteSport}
{FavoriteSportHex}
{FavoriteSportRGB}
{FavoriteSportName}

{FavoriteActivity}
{FavoriteActivityHex}
{FavoriteActivityRGB}
{F


#### Load and apply adapter to generate responses for Web Messages.

In [9]:
from peft import PeftModel, PeftConfig

model_id = "adapters/gpt-neox-20B-wm"

config = PeftConfig.from_pretrained(model_id)
model = PeftModel.from_pretrained(model, model_id, device_map={"":0})


In [25]:
# model.eval()

In [11]:
generate("Hi {FirstName} ")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Hi {FirstName} 
I received your email. I am checking the photos right now. 
I will get back to you within 24 hours. 
{SenderSignature} 
 ###


In [12]:
generate("Hi {FirstName} ")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Hi {FirstName} 
Thank you for reaching out to us. 
Do you have the unit you are looking to sell? 
I am {Sender} from CAE. I will be looking at the unit you are listing. 
Please let me know if you have any questions. 
Best regards, 
{SendersSignature} 
 ###


In [13]:
generate("Hi {FirstName} ")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Hi {FirstName} 
I received your inquiry for {MakeModel} 
Do you have a photo of the machine? 
Also, can you send me a list of what you have available and the asking price? 
I will review and let you know if I have any questions. 
Thanks 
{SenderSignature} 
 ###


#### Change Adapter. Apply adapter to generate text for ARMM campaigns.

In [14]:
# model.parameters()

<generator object Module.parameters at 0x7f622a734f20>

In [14]:
model_id = "adapters/gpt-neox-20B-armm"
model = PeftModel.from_pretrained(model.base_model.model, model_id, device_map={"":0})

In [27]:
# model.eval()

In [16]:
generate("Hi {FirstName} ")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Hi {FirstName} 



We have a client that is looking for {MakeModel} for their project. They need the system for the next week, and they have a budget of {Price} for the system.

They are willing to pay up front, so please let me know if you have one for sale.



{Listing}



Best regards,

{SendersSignature} 
 ###


In [17]:
generate("Hi {FirstName} ")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Hi {FirstName} 



I am working with a client who is looking for a {MakeModel} system to complete a project. 

The seller has decided to release the system to the market and they are now looking for a buyer who is willing to pay a reasonable price.

The seller is asking {Price} for the system.

They are also willing to consider a buyer who is willing to pay {Price} for the system. 

Please review the photos and let me know if you are interested. 

Also, if you have other equipment for sale, please let me know.

Best regards,

{SendersSignature} 
 ###


#### Change Adapter again. Apply adapter to generate respnses for Web Messages .

In [18]:
model_id = "adapters/gpt-neox-20B-wm"
# PeftModel.disable_adapter(model)
model = PeftModel.from_pretrained(model.base_model.model, model_id, device_map={"":0})

In [28]:
# model.eval()

In [20]:
generate("Hi {FirstName} ")
# generate("Hi inquiry ")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Hi {FirstName} 
Thank you for reaching out.  I'm checking our availability for the {MakeModel} you are looking to sell.  Do you have photos and information about the unit?  
{Listing} 
Best regards, 
{SenderSignature} 
 ###


In [21]:
generate("Hi {FirstName} ")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Hi {FirstName} 
Hope you are doing well. 
I received your email. 
I am checking availability and price for you. 
Could you please send me the photos of the machine you have for sale? 
I will forward this to our client and they will get back to you. 
Thanks, 
{SendersSignature} 
 ###


#### Change Adapter again. Apply adapter to generate text for ARMM campaigns.

In [22]:
model_id = "adapters/gpt-neox-20B-armm"
PeftModel.disable_adapter(model)

model = PeftModel.from_pretrained(model.base_model.model, model_id, device_map={"":0})

In [23]:
generate("Hi {FirstName} ")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Hi {FirstName} 



Hope this mail finds you well.

Our client is looking for {MakeModel} for their new project, and they are willing to pay up to {Price} for the complete unit. They are also looking for {MakeModel} as well, but they prefer to buy one complete unit instead of two.

They are looking for the complete unit, so they can start their project immediately. If you have one available, please let me know ASAP. 

Also, if you have any other similar tools for sale, please let me know. I am looking for all kinds of tools.

Best regards,

{SendersSignature} 
 ###


In [24]:
generate("Hi {FirstName} ")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Hi {FirstName} 



We are currently looking for a {MakeModel} for our client.

They are looking for a new {MakeModel} as they are looking to replace their current {MakeModel} due to the tool being out of production for the last few years.

They are looking for a complete tool with all the necessary components, as they are going to use this tool for their production line.

Please let me know if you have any available for sale or if you know of any coming available.

We are also looking for other equipment, please let me know if you have any other equipment for sale.

Thank you.



{SendersSignature} 
 ###


### Disable adapters

In [36]:
model.base_model.disable_adapter_layers()

In [37]:
model.peft_config

LoraConfig(peft_type='LORA', base_model_name_or_path='EleutherAI/gpt-neox-20B', task_type='CAUSAL_LM', inference_mode=True, r=16, target_modules=['query_key_value'], lora_alpha=32, lora_dropout=0.1, merge_weights=False, fan_in_fan_out=True, enable_lora=[True, False, True], bias='none', modules_to_save=None, init_lora_weights=True)

In [38]:
generate("Hi {FirstName} ")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Hi {FirstName} 
<br/>
<br/>
<a href='{SiteUrl}'>Go back to {FirstName}'s profile</a>

A:

You can use the following:
{FirstName} {LastName}

You can use the following to get the profile URL:
http://{SiteUrl}/profile/{UserId}




In [41]:
generate("Hi {FirstName}  ")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Hi {FirstName}  {LastName}",
                        new { @class = "btn btn-default" })
                </div>
            </div>
        }
    </div>
}

@section scripts
{
    <script type="text/javascript">
        $(function () {
            $('.datepicker').datepicker({
                showOn: 'button',
                buttonImage: '~/Content/images/calendar.png',
                buttonImageOnly: true
            });
        });
    </script>
}

The problem is that when I run the application, I get the following error:

The name 'FirstName' does not exist in the current context

I am using a model class named User, and the FirstName and LastName properties are in the User class. I have tried to make the User class public, but it did not help. I have also tried to make the User class internal, but it did not help either.
How can I make the FirstName and LastName properties available to the view?

A:

I am not sure if you are using a view model or not, but if you
