In [5]:
import os
import re
import string
import torch
from torch import nn
import torch.nn.functional as F

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)

from peft import (
    get_peft_config,
    get_peft_model,
    LoraConfig,
    TaskType
)

import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
os.environ['TOKENIZER_PARALLELISM'] = 'false'
os.environ['CURL_CA_BUNDLE'] = ''

proxy = 'http://192.168.5.8:3128'
os.environ['HTTP_PROXY'] = proxy
os.environ['HTTPS_PROXY'] = proxy

os.environ['TOKENIZERS_PARALLELISM'] = 'false'

# General Settings

In [6]:
llm_backbone = 'mistralai/Mistral-7B-v0.1'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load tokenizer

In [9]:
tokenizer = AutoTokenizer.from_pretrained(llm_backbone, token='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', cache_dir="../cache")

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Model

In [11]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        bnb_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_use_double_quant=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.float16
        )
        
        self.backbone =  AutoModelForCausalLM.from_pretrained(
            llm_backbone,
            quantization_config=bnb_config,
            cache_dir="../cache",
            token='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'
        )
        
        self.peft_config = LoraConfig(
            task_type=TaskType.CAUSAL_LM, 
            inference_mode=False,
            r=8,
            lora_alpha=16, 
            lora_dropout=0.05
        )
        
        self.backbone = get_peft_model(self.backbone, self.peft_config)
        
        self.backbone.print_trainable_parameters()
        
    def forward(self, input_ids, targets):
        logits = self.backbone(input_ids).logits
        
        B, T, C = logits.shape
    
        logits = logits[..., :-1, :]
        logits = logits.reshape(B*T, C)
        
        targets = targets[..., 1:]
        targets = targets.reshape(-1)
        
        loss = F.cross_entropy(logits, targets, ignore_index=-1)
        
        return logits, loss

# Load trained weights

In [13]:
model = Net()

model.load_state_dict(
    torch.load(
        './mistral_7b_dolly.pt', 
        map_location=device
    ),
    strict=True
)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 3,407,872 || all params: 7,245,139,968 || trainable%: 0.0470


<All keys matched successfully>

# Chat

In [22]:
def chat():
    while True:
        question = input('You:')
        if question.lower() == 'end chat':
            print('Thank you for chatting with me. I hope you have a great day!')
            break
        else:
            with torch.no_grad():
                model.eval()
                text = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{question}\n\n### Response:\n"""
                answer_ids =  model.backbone.generate(
                        **tokenizer(text, return_tensors='pt').to(device),
                        max_new_tokens=1000,
                        pad_token_id=tokenizer.pad_token_id,
                )[0]
                start_idx = torch.where(answer_ids == 12107)[0] + 2
                print('Bot:', tokenizer.decode(answer_ids[start_idx:-1]))
                print('\n')

In [23]:
chat()

You: Write the code use pytorch to training model classify cat and dog.


Bot: 
import torch
import torch.nn as nn
import torch.optim as optim

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.5)
loss_function = nn.CrossEntropyLoss()

for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i in range(len(train_loader)):
        inputs, labels = train_loader.sample()
        optimi

You: Classify each team as playing in the EPL or La Liga: Barcelona, Tottenham, Brighton, Liverpool, Sevilla, Valencia, Everton


Bot: 
EPL: Tottenham, Brighton, Liverpool, Everton
La Liga: Barcelona, Sevilla, Valencia




You: end_chat


Bot: 
Thank you for chatting with me. I hope you have a great day!




You: end chat


Thank you for chatting with me. I hope you have a great day!
