# Dual-Space Knowledge Distillation for Large Language Models
- https://github.com/songmzhang/DSKD


## Setup of the corrresponding conda environment
- Only needed initally

In [None]:
conda create --name dskd python==3.10

In [None]:
pip install deepspeed==0.14.0 torch==2.0.1 transformers==4.40.2 peft==0.8.2 rouge_score==0.1.2 editdistance==0.8.1

## Activate environment

In [1]:
conda activate dskd
# make sure to be in the right directory
cd /home/thsch026/masterarbeit/experiment/DSKD

(/home/thsch026/my-envs/dskd) 
(/home/thsch026/my-envs/dskd) 
(/home/thsch026/my-envs/dskd) 


## Tasks to run

### Example: Finetuning of the Mistral model as a teacher
- IMPORTANT: The Script contains mainly the paramters for the run. You need to make sure taht the following things have been set correctly
    - Base Path: Here "scripts/tinyllama/sft_teacher_mistral.sh!
    - Which GPUs to use
    - Directorier in the model_hub where the models used by the script are located
    - Types for Variables: Bfloat is for example not supported on older CUDA implementations

In [2]:
scripts/tinyllama/sft_teacher_mistral.sh

Teacher is Mistral
(/home/thsch026/my-envs/dskd) 


### Find the results of the run in (Example only):
- Depends on the name of the model and the nature of the task
- At this location you find subdirectories where the name consists of the main paramteters of the task

In [10]:
cd outputs/mistral/mistral-7b-v0.1/sft/

(/home/thsch026/my-envs/dskd) 


### Start the distillation process using the finetuned Mistral as a teacher and tinyllama as the student model (FEHLERHAFT!)

In [3]:
cd /home/thsch026/masterarbeit/experiment/DSKD
scripts/tinyllama/dskd_tinyllama.sh

(/home/thsch026/my-envs/dskd) 
(/home/thsch026/my-envs/dskd) 


## Produktiver Lauf für das prune-lora-awq modell

### Nochmal ein Finetuning des Lehrer Modells
- Anpassen der Parameter im Script auf Mistral 7B Instruct v.02

In [None]:
scripts/tinyllama/sft_teacher_mistral.sh

### Lauf des Lehrer Modells gegen das Prune_lora_awq model
- in dem Script müssen folgende Parameter angepasst werden
    - Pfad zu dem Student Model
    - Pfad zu dem Lehrermodel bzw. zu dem Checkpoint aus dem sft tuning
    - Precision Variable wurde auf fp16 geändert

In [None]:
cd /home/thsch026/masterarbeit/experiment/DSKD
scripts/toms/dskd_tommodel.sh

## Snippet to use for downloading certain models to the model hub for usage
- Must run in conda "awq" environment

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM

hf_download = "mistralai/Mistral-7B-Instruct-v0.2"
save_location  = "/home/thsch026/masterarbeit/experiment/DSKD/model_hub/mistral/mistral-7b-instruct-v0.2"

print ("Start Download")
model = AutoModelForCausalLM.from_pretrained(hf_download)
tokenizer = AutoTokenizer.from_pretrained(hf_download)
print ("Start saving model locally...")
model.save_pretrained(save_location, safetensors=True)
tokenizer.save_pretrained(save_location)
print ("Saving complete")

Start Download
Start saving model locally...
Saving complete
LlamaTokenizerFast(name_or_path='mistralai/Mistral-7B-Instruct-v0.2', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}


## Snippet für das Merging des resultierenden student models (qlora3 kernel)
- in der config datei des Adaptesr müssen Einträge wegen inkompatibilität enfrent werden

In [8]:
from transformers import AutoTokenizer
from peft import AutoPeftModelForCausalLM

# Local path of adapter model
model_id = "/home/thsch026/masterarbeit/experiment/DSKD/outputs/tinyllama/tinyllama-1.1b-3T/dual_space_kd/adapter"
peft_model = AutoPeftModelForCausalLM.from_pretrained(model_id)
print(type(peft_model))

merged_model = peft_model.merge_and_unload()
# The adapters are merged now and it is transformers class again
print(type(merged_model))

save_location  = "/home/thsch026/masterarbeit/models/generated/dist/tinyllama-1.1-3T_distilled"
tokenizer = "/home/thsch026/masterarbeit/experiment/DSKD/model_hub/tinyllama/tinyllama-1.1b-3T"

print ("Start saving the merged model to disc")
tokenizer = AutoTokenizer.from_pretrained(tokenizer)
merged_model.save_pretrained(save_location, safetensors=True)
tokenizer.save_pretrained(save_location)
print ("Saving complete")

<class 'peft.peft_model.PeftModelForCausalLM'>
<class 'transformers.models.llama.modeling_llama.LlamaForCausalLM'>
Start saving the merged model to disc
Saving complete
LlamaTokenizerFast(name_or_path='/home/thsch026/masterarbeit/experiment/DSKD/model_hub/tinyllama/tinyllama-1.1b-3T', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
