## notebook

In [None]:
import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

In [None]:
from datasets import Dataset
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer, GenerationConfig
import warnings
from transformers import TrainingArguments, Trainer, get_scheduler
warnings.filterwarnings('ignore')
import torch
torch.set_num_threads(12)  

In [None]:
df = pd.read_json('../Data/data.json')
ds = Dataset.from_pandas(df)

In [None]:
tokenizer = AutoTokenizer.from_pretrained('./qwen/Qwen2-0___5B-Instruct', use_fast=False, trust_remote_code=True,torch_dtype=torch.bfloat16,device_map='balanced_low_0')
tokenizer

In [None]:
from datasets import load_dataset
def process_func(example):
    MAX_LENGTH = 100000  
    input_ids, attention_mask, labels = [], [], []
    
    # Tokenize the prompt and treatment
    prompt = tokenizer(
        f"system\nYou are an EEG emotion analyzer. I will input the patient's personal information and the EEG signals collected from some electrode positions. Please help me infer the patient's current emotion based on this signal. \nuser\n{example['prompt']}\nassistant\n",
        add_special_tokens=False,
        truncation=True,  # Apply truncation at the tokenization step
        max_length=MAX_LENGTH // 2  # Split the length allowance
    )
    
    response = tokenizer(
        f"Emotion: {example['emotion_label']}. Treatment: {example['treatment']}.",
        add_special_tokens=False
    )
    
    # Construct input_ids, attention_mask, and labels
    input_ids = prompt["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id]
    attention_mask = prompt["attention_mask"] + response["attention_mask"] + [1]
    labels = [-100] * len(prompt["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id]
    
    # Handle truncation if input exceeds MAX_LENGTH
    if len(input_ids) > MAX_LENGTH:
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]
    
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }
tokenized_id = ds.map(process_func, remove_columns=ds.column_names, num_proc=12)  # Adjust num_proc as needed

In [None]:
tokenizer.decode(tokenized_id[0]['input_ids'])

In [None]:
tokenizer.decode(list(filter(lambda x: x != -100, tokenized_id[1]["labels"])))

In [None]:
import torch
from datasets import Dataset
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer, GenerationConfig, BitsAndBytesConfig
model = AutoModelForCausalLM.from_pretrained('/root/.cache/modelscope/hub/qwen/Qwen2-0.5B-Instruct', device_map="balanced_low_0", torch_dtype=torch.bfloat16)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    './qwen/Qwen2-0___5B-Instruct',
    torch_dtype=torch.bfloat16,
    device_map='cuda:0',
    trust_remote_code=True,
    ignore_mismatched_sizes=True
)
from collections import defaultdict

param_counts = defaultdict(int)
total_params = 0

for name, param in model.named_parameters():
    # for example, "mlp.gate_proj"、"self_attn.q_proj"
    parts = name.split(".")
    if "mlp" in parts:
        key = ".".join([p for p in parts if p in {"mlp", "gate_proj", "up_proj", "down_proj"}])
    elif "self_attn" in parts:
        key = ".".join([p for p in parts if p in {"self_attn", "q_proj", "k_proj", "v_proj", "o_proj"}])
    elif "layernorm" in name:
        key = "layernorm"
    elif "embed_tokens" in name:
        key = "embed_tokens"
    elif "norm" in name:
        key = "final_norm"
    else:
        key = "others"

    param_counts[key] += param.numel()
    total_params += param.numel()

# Output in descending order by parameter count.
print(f"{'Module Type':<30} : Parameters")
print("-" * 50)
for module, count in sorted(param_counts.items(), key=lambda x: -x[1]):
    print(f"{module:<30} : {count:,}")

print(f"\nTotal parameters: {total_params:,}")


In [None]:
model.enable_input_require_grads() # When enabling gradient checkpointing, you need to call this method.

In [None]:
model.dtype

In [None]:
model

In [None]:
from peft import LoraConfig, TaskType, get_peft_model

config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, 
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    inference_mode=False, # Training Mode
    r=8, # Lora rank
    lora_alpha=32, # Lora alaph
    lora_dropout=0.5# Dropout 
)
config

In [None]:
model = get_peft_model(model, config)
config

In [None]:
model.print_trainable_parameters()

In [None]:
args = TrainingArguments(
    output_dir="./qwem2_0.5b_new/exp1",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    logging_steps=10,
    num_train_epochs=6,
    save_steps=3850,
    learning_rate=1e-5,
    save_on_each_node=True,
    gradient_checkpointing=True,
    weight_decay=0.01,  
    max_grad_norm=2.0,  
    lr_scheduler_type="cosine",
    dataloader_num_workers=16,
)

In [None]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_id,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)

In [None]:
# Create a learning rate scheduler
trainer.create_optimizer_and_scheduler(num_training_steps=len(tokenized_id) // args.per_device_train_batch_size * args.num_train_epochs)

scheduler = get_scheduler(
    name=args.lr_scheduler_type,
    optimizer=trainer.optimizer,
    num_warmup_steps=100,  # Set the number of learning rate warmup steps.
    num_training_steps=len(tokenized_id) // args.per_device_train_batch_size * args.num_train_epochs,
)

trainer.lr_scheduler = scheduler

In [None]:
torch.backends.cuda.enable_mem_efficient_sdp(False)

In [None]:
trainer.train()

In [None]:
# Model merging and saving
new_model_directory = "./merged_model_qwen2_0.5b_new1/"
merged_model = model.merge_and_unload()
# Save the weights in safetensors format, with each weight file not exceeding 2 GB (2048 MB).
merged_model.save_pretrained(new_model_directory, max_shard_size="2048MB", safe_serialization=True)

In [None]:
!cp /root/.cache/modelscope/hub/qwen/Qwen2-0.5B-Instruct/config.json ./merged_model_qwen2_0.5b_new1/

## Restart notebook

In [None]:
import torch
import torch.nn as nn
import torch.quantization as quantization
from transformers import BertModel, BertTokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel

model_path='./merged_model'
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

# Load model
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", torch_dtype=torch.float, trust_remote_code=True).eval()

# Load BERT model and tokenizer
#tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
#model = BertModel.from_pretrained('bert-base-uncased')

# Set model to evaluation mode
model.eval()

# Move model to CPU
model.to('cpu')

# Set quantization configuration for the model's embedding layer
model.embeddings.qconfig = float_qparams_weight_only_qconfig

# Prepare the model for quantization
model_prepared = quantization.prepare(model, inplace=False)

# Define test input data and ensure it is on CPU
input_text = "This is a test sentence."
inputs = tokenizer(input_text, return_tensors="pt")
inputs = {k: v.to('cpu') for k, v in inputs.items()}

# Simulate forward pass during Quantization-Aware Training (QAT)
with torch.no_grad():
    outputs = model_prepared(**inputs)

# Convert the model to its quantized version
model_quantized = quantization.convert(model_prepared, inplace=False)

# Print the structure of the quantized model
print("Quantized model architecture:")
print(model_quantized)

# Test the quantized model
with torch.no_grad():
    quantized_outputs = model_quantized(**inputs)

# Print the output of the quantized model
print("Quantized output:", quantized_outputs.last_hidden_state)

# Save the quantized model
int8_model_directory='INT8'
save_pretrained(int8_model_directory, max_shard_size="2048MB", safe_serialization=True)