In [None]:
%%time
## 5mins
!mamba install --force-reinstall aiohttp -y
!pip install -U "xformers<0.0.26" --index-url https://download.pytorch.org/whl/cu121
!pip install "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install datasets==2.16.0 fsspec==2023.10.0 gcsfs==2023.10.0

In [7]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
    "unsloth/llama-2-7b-bnb-4bit",
    "unsloth/llama-2-13b-bnb-4bit",
    "unsloth/codellama-34b-bnb-4bit",
    "unsloth/tinyllama-bnb-4bit",
    "unsloth/llama-3-8b-bnb-4bit",
    "unsloth/llama-3-70b-bnb-4bit",
] # More models at https://huggingface.co/unsloth

def init_model(model_name="unsloth/llama-3-8b-Instruct-bnb-4bit"):
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = model_name, # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
        # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
    )

    ## LoRA 
    ## 超参
    ## r: 8 or 16
    ## lora_dropout 0.2 or 0
    model = FastLanguageModel.get_peft_model(
        model,
        r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
        target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                          "gate_proj", "up_proj", "down_proj",],
        lora_alpha = 8,
        lora_dropout = 0.2, # Supports any, but = 0 is optimized
        bias = "none",    # Supports any, but = "none" is optimized
        use_gradient_checkpointing = "unsloth", # 4x longer contexts auto supported!
        random_state = 3407,
        use_rslora = False,  # We support rank stabilized LoRA
        loftq_config = None, # And LoftQ
    )
    return model,tokenizer

def save_checkpoint(dir='lora_model'):
    model.save_pretrained(dir)
    # model.push_to_hub("your_name/lora_model", token = "...") # Online saving

def load_checkpoint(dir='lora_model'):
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = dir, # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    return model,tokenizer

mode='load'## 'build'
model_name='unsloth/llama-3-8b-Instruct-bnb-4bit'
checkpoint_dir='/kaggle/input/llama-3-qg-checkpoint1/best_lora_model' ## 'best_lora_model'
if mode=='build':
    model,tokenizer=init_model(model_name)
else:
    model,tokenizer=load_checkpoint(checkpoint_dir)

config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

==((====))==  Unsloth: Fast Llama patching release 2024.6
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.2+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. Xformers = 0.0.25.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/131 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.1k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Unsloth 2024.6 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


In [31]:
import logging    
logging.getLogger().setLevel(logging.ERROR)
logging.getLogger("transformers").setLevel(logging.ERROR) 
def prepare_batch(texts,batch_size=1):
    i=0
    batch_texts=[]
    while i<len(texts):
        j=min(i+batch_size,len(texts))
        batch_texts.append(texts[i:j])
        i+=batch_size
    return batch_texts
def batch_generator(batch_input):
    inputs = tokenizer(
    [
        *batch_input
    ],padding=True,truncation=True,max_length=4096, return_tensors = "pt").to("cuda")
    
    outputs = model.generate(**inputs, 
                             max_new_tokens = 64, 
                             do_sample=False,
                             #num_beams=3,
                             use_cache = True,
                            )
    outputs=tokenizer.batch_decode(outputs,skip_special_tokens=True)
    return [output.split('### Response:\n')[-1].replace('\n','') for output in outputs]

from unsloth import FastLanguageModel
from tqdm.notebook import tqdm
def inference(inputs,batch_size=4):
    FastLanguageModel.for_inference(model)
    batch_texts=prepare_batch(inputs,batch_size)
    results=[]
    for batch in tqdm(batch_texts):
        results.extend(batch_generator(batch))
    return results

In [15]:
## 单个样本推理

answer='the tenth season'
context='''
Bigg Boss 10. Bigg Boss 10 is the tenth season of the Indian reality TV series "Bigg Boss".  It began airing on 16 October 2016 on Colors.  The show is also available after the original telecast on Viacom 18's digital platform – Voot.  A new element called ‘Unseen-Undekha’ was introduced by way of unseen footage uploaded on Voot. This footage showed parts of the day that weren’t included in the episode, from ‘wake-up call’ to ‘lights out’. 
Lopamudra Raut. Lopamudra Raut is an Indian model and beauty queen from the state of Maharashtra.  She represented India at Miss United Continents 2016 pageant and was crowned 2nd runner up.  She also won the third "Best National Costume" award for India.  Previous representatives of India, Gail Nicole Da Silva in 2014 and Sushrii Shreya Mishraa in 2015 also won the award.  She was a contestant of Bigg Boss 10. 
Bigg Boss 11. Bigg Boss 11 is the eleventh season of Indian reality TV series "Bigg Boss" that will be premiered on Colors TV.  Salman Khan will host this season for the seventh time in "Bigg Boss" history and third time in a row.  It is scheduled to premiere on 1 October 2017 Mon - Fri 10.30pm and SAT - SUN lun with the finale set in 2018 
Bigg Boss 9. Bigg Boss 9, also known as Bigg Boss: Double Trouble, (stylized as Bigg Boss: Nau), was the ninth season of the Indian reality TV series "Bigg Boss" that premiered on 11 October 2015 on Colors TV.  Salman Khan returned to host the ninth season. 
Bigg Boss 1. Bigg Boss in 2006 was the first season of the Indian reality TV programme "Bigg Boss".  It aired on Sony Entertainment Television from 3 November 2006 to 26 January 2007, a total of 86 days.  Unlike other versions of "Big Brother", the Indian version uses celebrities as housemates, not members of the general public.  It was hosted by the Bollywood Actor "Arshad Warsi". 
Bigg Boss 3. Bigg Boss 3 in 2009 was the third season of the Indian reality TV programme "Bigg Boss".  It began airing on 4 October 2009 on Colors with Amitabh Bachchan as the host and aired for 84 days concluding on 26 December 2009.  Vindu Dara Singh won the show while Pravesh Rana was declared the first runner-up and Poonam Dhillon was declared the second runner-up.  Vindu was awarded with a prize money of INR 10 million.  He was also announced the most stylish and bold contestant and won a Chevrolet Cruze.  This season, the house was located the city of Lonavla in the Indian state of Maharashtra. 
Bigg Boss 4. Bigg Boss 4 in 2010 was the fourth season of Indian reality TV show "Bigg Boss", which aired on Colors from 3 October 2010.  This season was longer than its predecessor, "Bigg Boss 3" and lasted for 14 weeks (96 days) ending on 8 January 2011.  The show was hosted by Salman Khan. 
Bigg Boss 2. Bigg Boss 2 was the 2008 second season of the Indian reality TV programme "Bigg Boss".  It began airing on 21 August 2008 on Colors.  Shilpa Shetty replaced Arshad Warsi as host of the show.  Fourteen handpicked housemates entered during the launch and were described "newsmakers" rather than celebrities, though the majority of the contestants were associated with Bollywood or Indian TV channels and other realities shows.  The housemates, considered strangers for each other, spent 98 days or nearly 14 weeks locked out together under one roof under the 24×7 supervision of 32 cameras fitted around the "Bigg Boss" house at Lonavala, a hill station about 100 km east of Mumbai. 
Bigg Boss 6. Bigg Boss 6 was the sixth season of the Indian reality TV show Bigg Boss, which is telecast on the TV channel Colors.  "Bigg Boss" is the Indian edition of "Big Brother" TV series.  The season started from 7 October 2012.  Salman Khan, who was the host of the previous two seasons, returned as the host for the show.  The sixth season was launched as a "Parivarik" season with a Gujarati tagline- "Alag che!"  (English: It's different).  The producers claimed that the contestants on "Bigg Boss 6" will be presented with a cleaner, more "family like image".  The prize money was reduced to million () with an amount of 500,000s awarded to the "most entertaining" housemate each week from week 6 onwards.  The award was discontinued after four weeks for unknown reasons. 
Bigg Boss 7. Bigg Boss 7 (tagline: "Jannat Ka Wow Aur Jahannam Ka Aaw Dekhege Saath Saath") is the seventh season of the Indian reality TV series "Bigg Boss" which aired on TV channel Colors TV from 15 September 2013, with Salman Khan returning as the host for the fourth time and this season is longer than its predecessor, "Bigg Boss 6" and lasted for 15 weeks (104 days) concluding on Saturday, 28 December 2013.  The seventh season was launched with the tagline- 'Jannat Ka Wow Aur Jahannam Ka Aaw Dekhege Saath Saath'.  The show started airing at 9:00 everyday from 15 September. 

'''
inputs=[
    f'''Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
The following is an answer and ten paragraphs. Please think about which two paragraphs are most relevant to the given answer,then generate a question according to the answer and two paragraphs.
Please note that your Response needs to be in the following format.
## Question: 


### Input:
Answer: {answer}
Context:{context}



 ### Response:
 '''
]
inference(inputs,1)

['## Question: What season of Bigg Boss did Lopamudra Raut participate in?']

In [19]:
import datasets
import random
dataset=datasets.load_from_disk('/kaggle/input/llama-3-qg-checkpoint1')
dataset.save_to_disk(dataset_path='./')
dataset=datasets.load_from_disk('./')

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

 ### Response:
{}"""

EOS_TOKEN = '<|end_of_text|>' # Must add EOS_TOKEN
## 训练数据要加结束符，验证和测试不用
training=True
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        if training:
            output=f'## Question: {output}\n{EOS_TOKEN}'
        else:
            output=''
        text = alpaca_prompt.format(instruction, input, output) #+ EOS_TOKEN
        
        texts.append(text)
    return { "text" : texts, }



training=False
dataset = dataset.map(formatting_prompts_func, batched = True,)
dataset.shuffle()



Map:   0%|          | 0/25272 [00:00<?, ? examples/s]

Dataset({
    features: ['instruction', 'input', 'output', 'text'],
    num_rows: 25272
})

In [33]:
%%time
## 批量推理
nums=20
inputs=dataset['text'][:nums]
real_outputs=['## Question: '+out for out in dataset['output'][:nums]]
outputs=inference(inputs,4)
for i in range(nums):
    print('real:',outputs[i])
    print('predict:',real_outputs[i])

  0%|          | 0/5 [00:00<?, ?it/s]

real: ## Question: Which breed was recognized by the kennel club first, the Welsh Springer Spaniel or the English Springer Spaniel?
predict: ## Question: Which of the two dog breeds, the English Springer Spaniel or the Landseer, recognized by all the kennel clubs?
real: ## Question: The main character of Bells Are Ringing was based on whom?
predict: ## Question: The song "Just in Time" was introduced in a musical whose main character is based on what woman?
real: ## Question: What was the cast member of Saturday Night Live known for?
predict: ## Question: In the early years of "Saturday Night Live" an archetypal samurai was portrayed by an American comedian best known for what?
real: ## Question: Which band was formed first, Kiwi Time or Giant?
predict: ## Question: Which band has more founding members, Fuel or Kiwi Time?
real: ## Question: What linguistic group is the people who use the De-No-To Cultural District from?
predict: ## Question: What linguistic group are the Native America