### <font color='red'> DEPENDENCIES </font>

In [1]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
github_token = user_secrets.get_secret("github_token")
hf_token = user_secrets.get_secret("hf_token")

In [2]:
repo = "llm"
clone_url = f"https://hmzhan:{github_token}@github.com/hmzhan/{repo}.git"
get_ipython().system(f"git clone {clone_url}")

Cloning into 'llm'...
remote: Enumerating objects: 154, done.[K
remote: Counting objects: 100% (154/154), done.[K
remote: Compressing objects: 100% (120/120), done.[K
remote: Total 154 (delta 46), reused 121 (delta 28), pack-reused 0 (from 0)[K
Receiving objects: 100% (154/154), 189.34 KiB | 9.02 MiB/s, done.
Resolving deltas: 100% (46/46), done.


In [3]:
from huggingface_hub import login
login(hf_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [6]:
import sys
sys.path.append("/kaggle/working/llm")

In [8]:
from src.efficient_llm.knowledge_distillation import knowledge_distillation

In [5]:
import sys
sys.path

['/kaggle/lib/kagglegym',
 '/kaggle/lib',
 '/opt/conda/lib/python310.zip',
 '/opt/conda/lib/python3.10',
 '/opt/conda/lib/python3.10/lib-dynload',
 '',
 '/root/.local/lib/python3.10/site-packages',
 '/opt/conda/lib/python3.10/site-packages',
 '/root/src/BigQuery_Helper']

In [4]:
import torch
import pandas as pd
import numpy as np
from torch import nn
from transformers import pipeline, AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
from datasets import load_dataset, load_metric
from pathlib import Path
from time import perf_counter

### <font color='red'> MODEL </font>

In [5]:
from llm.src.efficient_llm.constants import MODEL_CKPT
pipe = pipeline("text-classification", model=MODEL_CKPT)

query = """Hey, I'd like to rent a vehicle from Nov 1st to Nov 15th in Paris and I need a 15 passenger van"""
pipe(query)

config.json:   0%|          | 0.00/8.18k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



[{'label': 'car_rental', 'score': 0.549003541469574}]

### <font color='red'> DATA </font>

In [6]:
from llm.src.efficient_llm.data import clinc

Downloading readme:   0%|          | 0.00/24.0k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/312k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/77.8k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/136k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/15250 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3100 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5500 [00:00<?, ? examples/s]

### <font color='red'> MODEL PERFORMANCE </font>

In [None]:
from llm.src.efficient_llm.constants import MODEL_CKPT
from llm.src.efficient_llm.model_performance import PerformanceBenchmark

pipe = pipeline("text-classification", model=MODEL_CKPT)
pb = PerformanceBenchmark(pipe, clinc["test"])
perf_metrics = pb.run_benchmark()
print(perf_metrics)

### <font color='red'> KNOWLEDGE DISTILLATION </font>

In [7]:
from llm.src.efficient_llm.knowledge_distillation import knowledge_distillation
from llm.src.efficient_llm.constants import(
    DEVICE,
    STUDENT_CKPT,
    TEACHER_CKPT
)

ModuleNotFoundError: No module named 'llm.src.efficient_llm.src'

In [None]:
distillbert_trainer = knowledge_distillation()
distillbert_trainer.train()

#### <font color='red'> DISTILLATION: TRAINING </font>

In [None]:
# student toknizer
student_tokenizer = AutoTokenizer.from_pretrained(STUDENT_CKPT)

def student_init():
    student_config = AutoConfig.from_pretrained(
        STUDENT_CKPT, 
        num_labels=clinc["test"].features["intent"].num_classes, 
        id2label=pipe.model.config.id2label, 
        label2id=pipe.model.config.label2id
    )
    return AutoModelForSequenceClassification.from_pretrained(STUDENT_CKPT, config=student_config).to(DEVICE)

# teacher model
teacher_model = AutoModelForSequenceClassification.from_pretrained(TEACHER_CKPT, num_labels=clinc["test"].features["intent"].num_classes).to(DEVICE)

# prep data for training: tokenization
def tokenize_text(batch):
    return student_tokenizer(batch["text"], truncation=True)

clinc_enc = clinc.map(tokenize_text, batched=True, remove_columns=["text"])
clinc_enc = clinc_enc.rename_column("intent", "labels")

In [None]:
# training args
training_args = DistillationTrainingArguments(
    output_dir="distillbert-base-uncased-finetuned-clinc",
    evaluation_strategy="epoch",
    num_train_epochs=10,
    temperature=7,
    learning_rate=2e-5,
    per_device_train_batch_size=48,
    per_device_eval_batch_size=48,
    alpha=0.12,
    weight_decay=0.01,
    push_to_hub=True,
    report_to="none"
)

# Trainer
distillbert_trainer = DistillationTrainer(
    model_init=student_init,
    teacher_model=teacher_model,
    args=training_args,
    train_dataset=clinc_enc['train'],
    eval_dataset=clinc_enc['validation'],
    compute_metrics=compute_metrics,
    tokenizer=student_tokenizer
)

# start training
distillbert_trainer.train()

#### <font color='red'> INFERENCE </font>

In [None]:
pipe = pipeline("text-classification", model="zhan/distillbert-base-uncased-finetuned-clinc")

optim_type = "Distillation"
pb = PerformanceBenchmark(pipe, clinc["test"], optim_type=optim_type)
perf_metrics.update(pb.run_benchmark())  # d.update(d2) this is a useful function
print(perf_metrics)
plot_metrics(perf_metrics, optim_type)

#### <font color='red'> Optimal Hyperparameters: Optuna </font>
No enough space to implement Optuna

In [None]:
def hp_space(trial):
    return {
        'num_train_epochs': trial.suggest_int('num_train_epochs', 8, 10),
        'alpha': trial.suggest_float('alpha', 0, 0.2),
        'temperature': trial.suggest_int('temperature', 5, 10)
    }

best_run = distillbert_trainer.hyperparameter_search(
    n_trials=20, direction='maximize', hp_space=hp_space)
print(best_run)

### <font color='red'> DYNAMIC QUANTIZATION </font>

In [6]:
from llm.src.efficient_llm.constants import NEW_MODEL_CKPT
from llm.src.efficient_llm.quantization import quantization_model

pipe = quantization_model(model_ckpt=NEW_MODEL_CKPT)
optim_type = "Distillation + Quantization"
pb = PerformanceBenchmark(pipe, clinc["test"], optim_type=optim_type)
perf_metrics.update(pb.run_benchmark())
print(perf_metrics)
plot_metrics(perf_metrics, optim_type)

### <font color='red'> ONNX and ONNX Runtime </font>

In [None]:
!pip install onnxruntime

In [None]:
from llm.src.efficient_llm.onnx import convert_model_onnx
from llm.src.efficient_llm.constants import NEW_MODEL_CKPT, ONNX_MODEL_PATH

convert_model_onnx(NEW_MODEL_CKPT, ONNX_MODEL_PATH)
onnx_model = create_model_for_provider(onnx_model_path)

In [None]:
pipe = OnnxPipeline(onnx_model, tokenizer)
pipe(query)

In [None]:
optim_type = "Distillation + ORT"
pb = OnnxPerformanceBenchmark(pipe, clinc["test"], optim_type=optim_type, model_path="onnx/model.onnx")
perf_metrics.update(pb.run_benchmark())
perf_metrics

In [None]:
plot_metrics(perf_metrics, optim_type)

### <font color='red'> ONNX runtime + QUANTIZATION </font>

In [None]:
from onnxruntime.quantization import quantize_dynamic, QuantType

model_input = "onnx/model.onnx"
model_output = "onnx/model.quant.onnx"
quantize_dynamic(model_input, model_output, weight_type=QuantType.QInt8)
onnx_quantized_model = create_model_for_provider(model_input)

In [None]:
optim_type = "Distillation + ORT + Quantization"
pipe = OnnxPipeline(onnx_quantized_model, tokenizer)
pb = OnnxPerformanceBenchmark(pipe, clinc["test"], optim_type=optim_type, model_path=model_output)
perf_metrics.update(pb.run_benchmark())
perf_metrics

In [None]:
plot_metrics(perf_metrics, optim_type)