In [1]:
!pip install -q bitsandbytes
!pip install -q accelerate
!pip install -q peft

In [2]:
import os
import re
import string
import torch
from torch import nn
import torch.nn.functional as F

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)

from peft import (
    get_peft_config,
    get_peft_model,
    LoraConfig,
    TaskType
)


os.environ['TOKENIZERS_PARALLELISM'] = 'false'

# General Settings

In [3]:
llm_backbone = 'mistralai/Mistral-7B-v0.1'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load tokenizer

In [4]:
tokenizer = AutoTokenizer.from_pretrained(llm_backbone)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

Downloading tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

# Model

In [5]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        bnb_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_use_double_quant=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.float16
        )
        
        self.backbone =  AutoModelForCausalLM.from_pretrained(
            llm_backbone,
            quantization_config=bnb_config,
            load_in_4bit=True,
        )
        
        self.peft_config = LoraConfig(
            task_type=TaskType.CAUSAL_LM, 
            inference_mode=False,
            r=8,
            lora_alpha=16, 
            lora_dropout=0.05
        )
        
        self.backbone = get_peft_model(self.backbone, self.peft_config)
        
        self.backbone.print_trainable_parameters()
        
    def forward(self, input_ids, targets):
        logits = self.backbone(input_ids).logits
        
        B, T, C = logits.shape
    
        logits = logits[..., :-1, :]
        logits = logits.reshape(B*T, C)
        
        targets = targets[..., 1:]
        targets = targets.reshape(-1)
        
        loss = F.cross_entropy(logits, targets, ignore_index=-1)
        
        return logits, loss

# Load trained weights

In [6]:
model = Net()

model.load_state_dict(
    torch.load(
        '/kaggle/input/mistral-7b-dolly/mistral_7b_dolly.pt', 
        map_location=device
    ),
    strict=True
)

Downloading config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

trainable params: 3,407,872 || all params: 7,245,139,968 || trainable%: 0.04703666202518836


<All keys matched successfully>

# Chat

In [7]:
def chat():
    while True:
        question = input('You:')
        if question.lower() == 'end chat':
            print('Thank you for chatting with me. I hope you have a great day!')
            break
        else:
            with torch.no_grad():
                model.eval()
                text = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{question}\n\n### Response:\n"""
                answer_ids =  model.backbone.generate(
                        **tokenizer(text, return_tensors='pt').to(device),
                        max_new_tokens=1000,
                        pad_token_id=tokenizer.pad_token_id,
                )[0]
                start_idx = torch.where(answer_ids == 12107)[0] + 2
                print('Bot:', tokenizer.decode(answer_ids[start_idx:-1]))
                print('\n')

In [8]:
chat()

You: Tell me about AI Winter


Bot: 
AI Winter is a period of time when the field of artificial intelligence (AI) experiences a decline in funding and interest. This can be due to a number of factors, such as a lack of tangible results, a shift in public opinion, or a change in the economic climate. During an AI Winter, researchers and developers may turn to other fields or pursue other interests, and the progress of AI may slow down or even stall. However, AI Winters are not permanent, and they are often followed by periods of renewed interest and investment in the field.




You: Give me code example for image classification in PyTorch 


Bot: 
import torch
import torchvision
import torchvision.transforms as transforms

# Load the dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor())

# Define the model
model = torchvision.models.resnet18(pretrained=True)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

# Evaluate the model
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
     

You: end chat


Thank you for chatting with me. I hope you have a great day!
