In [1]:
%%capture
%pip install -U bitsandbytes
%pip install -U transformers
%pip install -U peft
%pip install -U accelerate
# %pip install -U trl 
%pip install git+https://github.com/huggingface/trl.git
%pip install datasets==2.16.0

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging, TextStreamer
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os, torch, wandb, platform, warnings
from datasets import load_dataset
from trl import SFTTrainer
from huggingface_hub import notebook_login
#Use a sharded model to fine-tune in the free version of Google Colab.
# base_model = "/kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1" #bn22/Mistral-7B-Instruct-v0.1-sharded
base_model = "alexsherstinsky/Mistral-7B-v0.1-sharded"
dataset_name = "/kaggle/input/games-from-reddit" 

2024-04-10 09:31:51.840975: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-10 09:31:51.841111: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-10 09:31:52.009828: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:
# Importing the dataset
dataset = load_dataset(dataset_name, split="train")

# Calculate split size
# split_size = len(whole_dataset["text"]) // 3

# Select the first split_size samples
# dataset = whole_dataset.select(range(split_size))
print(dataset)
# Example usage:
print(dataset["text"][32000])  # Accessing a specific example in the dataset


In [None]:
# Load base model(Mistral 7B)
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map={"": 0}
)
model.config.use_cache = False 
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

In [None]:
# !wandb login --relogin
wandb.login(key = "1c104e13860a873640d0053b9a5841f8ef83117a")
run = wandb.init(project='Game Fine tuning mistral 7B', job_type="training", anonymous="allow")

In [None]:
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
        r=16,
        lora_alpha=16,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
    )
model = get_peft_model(model, peft_config)

In [None]:
# Training Arguments

#Hyperparamter
training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="wandb"
)


# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length= None,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)


In [None]:
trainer.train("/kaggle/input/mistral_game_7b/pytorch/checkpoint-600/1/checkpoint-600")
# Save the fine-tuned model
trainer.model.save_pretrained(new_model)
wandb.finish()
model.config.use_cache = True
model.eval()

# TESTING

In [4]:
adapters_name = "/kaggle/input/mistral_game_7b/pytorch/checkpoint-600/1/checkpoint-600"
# model_name = "alexsherstinsky/Mistral-7B-v0.1-sharded" # for 2nd version
model_name = "/kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1" # for 1st version
device = "cuda" # the device to load the model onto

# Load base model(Mistral 7B)
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)
model = AutoModelForCausalLM.from_pretrained(
#     model_name,
    model_name,
    quantization_config=bnb_config,
    device_map={"": 0}
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  return self.fget.__get__(instance, owner)()


(True, True)

In [6]:
import kagglehub

# kagglehub.login()

In [7]:
# Download latest version
adapters_name = kagglehub.model_download("damiron4/mistral_game_7b/pyTorch/checkpoint-300") + '/checkpoint-300'

print("Path to model files:", adapters_name)

Attaching model 'damiron4/mistral_game_7b/pyTorch/checkpoint-300' to your Kaggle notebook...


Path to model files: /kaggle/input/mistral_game_7b/pytorch/checkpoint-300/1/checkpoint-300


In [8]:
model = PeftModel.from_pretrained(model, adapters_name)
model.eval()
print(f"Successfully loaded the model {model_name} into memory")

Successfully loaded the model /kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1 into memory


In [None]:
# for the 2nd model

def stream(user_prompt, model, tokenizer):
    runtimeFlag = "cuda:0"
    B_INST, E_INST = "[INST]", "[/INST]"
    system_prompt = 'You are a helpful AI game recommender. Your task is to suggest a few games and justify your choice based on the given information '
    prompt = f"{system_prompt}{B_INST}{user_prompt.strip()}\n{E_INST}"

    inputs = tokenizer([prompt], return_tensors="pt").to(runtimeFlag)

    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    _ = model.generate(**inputs, streamer=streamer, max_new_tokens=128)
    


In [9]:
# for the 1st model
def stream(user_prompt, model, tokenizer):
    runtimeFlag = "cuda:0"
    B_INST, E_INST = "[INST] ", " [/INST] "
#     system_prompt = 'You are a helpful AI game recommender. Your task is to suggest a few games and justify your choice based on the given information '
    prompt = f"{B_INST}{user_prompt.strip()}{E_INST}"

    inputs = tokenizer([prompt], return_tensors="pt").to(runtimeFlag)

    # Generate output without streaming
    output = model.generate(**inputs, max_new_tokens=128)

    # Convert the generated output to a string
    output_str = tokenizer.decode(output[0], skip_special_tokens=True)

    # Return the generated output in string format
    return output_str


In [10]:
# Example prompt
user_prompt = "I beat Mass Effect 1 and 2 not too long ago, Fable 2 a few days ago (It was alright, I have a few gripes about it though - story was lacking big time, "moral choices" were too black and white, etc.), Deus Ex:HR a few weeks ago (It was great, but too easy), I tried out DX1 but can't quite get over the dated graphics, UI, and the fact it doesn't like working in Win7 (Even with all of the proper updating).  I've played (And used to love) Everquest, Asheron's Call, TFC (But not TF2), Borderlands, Oblivion + Morrowind + Fallout 3 (All feel so dead with poor art direction to me, but are fun for awhile), Amnesia (Such an unnerving game), Wurm Online (Gets way too grindy with such an awful.... Everything), and the list goes on... Can't think of much else that really "defines" me or my taste in gaming.  I'm looking for some type of game to kill time until 11/11/11 (Sorry for using DD/MM/YY, I just prefer it), and honestly I think I may want that game to be an MMO, a free to play MMO. I've been playing around with Champion's Online and it's not awful, but it's not really all that great either. I also have been **trying** to get into Forsaken World, but it's so poorly done that I just can't force myself into it. Maybe MMOs are too cookie cutter, trying to become carbon copies of WoW - forgetting what made people love them to begin with - for me to bother... So any type of game advice is welcome. Although if you agree with that statement and are currently enjoying an MMO, please do share.  Anyways, I found that this sub-reddit just opened and decided to be the first poster... I'll be sticking around and probably offering up some advice for games if anyone else posts in the near future. Also a tip for the mods: The description is a little confusing, it starts off "To give advice on games" - making it seem like you are meant to make posts supporting one game or another, but the description ends with "you want helpful suggestions on" - which is what I took from the description, but it feels contradictory What's the point of that?"
# Call the stream function with the loaded model and tokenizer
print(stream(user_prompt, model, tokenizer))


#

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


[INST] Recommend a game like Dota 2 [/INST]  1. League of Legends 2. Smite 3. Heroes of the Storm 4. Dota 2 5. Hearthstone 6. Overwatch 7. Starcraft 2 8. Team Fortress 2 9. Warframe 10. World of Warcraft 11. Counter Strike: Global Offensive 12. Path of Exile 13. Diablo 3 14. Hearthstone 15. Dota 2 16. League of Legends 17. Hearthstone


Generate answers for evaluation on test data

In [None]:


with open("/kaggle/input/games-reddit/test_data_1.txt", encoding = 'utf-8') as f:
    test_file = f.readlines()

# i = 0
# generated_data = ''

with open("output_first_model_1.txt", 'a', encoding = 'utf-8') as f2:
    for line in test_file:
#         if i == 4:
#             break
        labels = line.split('\t')[0]
        prompt = str(line.split('\t')[1])
        processed_prompt = prompt.replace("'", "").replace('"', '')
#         print(processed_prompt)
        full_ans = stream(processed_prompt, model, tokenizer)
        predict = full_ans.split('[/INST] ')[1]
    #     print(predict)
        text = labels + '\t' + predict + '\n'
#         generated_data += text
    #     print(line)
    #     print(predict)
#         i += 1
        f2.write(text)


    
    

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token

In [None]:
#check generated file
with open("/kaggle/working/output.txt") as f:
    data = f.read()
print(data)

In [None]:
# Clear the memory footprint
del model, trainer
torch.cuda.empty_cache()

# Reload the base model
base_model_reload = AutoModelForCausalLM.from_pretrained(
    base_model, low_cpu_mem_usage=True,
    return_dict=True,torch_dtype=torch.bfloat16,
    device_map= {"": 0})
model = PeftModel.from_pretrained(base_model_reload, new_model)
model = model.merge_and_unload()

# Reload tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"