In [1]:
!pip -q install transformers accelerate bitsandbytes trl

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m544.8/544.8 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Import necessary packages for the fine-tuning process
import os                          # Operating system functionalities
import torch                       # PyTorch library for deep learning
from datasets import load_dataset  # Loading datasets for training
from transformers import (
    AutoModelForCausalLM,          # AutoModel for language modeling tasks
    AutoTokenizer,                # AutoTokenizer for tokenization
    BitsAndBytesConfig,           # Configuration for BitsAndBytes
    HfArgumentParser,             # Argument parser for Hugging Face models
    TrainingArguments,            # Training arguments for model training
    pipeline,                     # Creating pipelines for model inference
    logging,                      # Logging information during training
)
from peft import LoraConfig, PeftModel,PeftConfig
from trl import SFTTrainer

from datasets import load_dataset, Dataset

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from google.colab import files
import os
import logging as log
import sqlite3
from typing import Any, Dict, Iterator, List, Mapping, Optional
import re
from pydantic import BaseModel, Field

import warnings
warnings.filterwarnings('ignore')

# Utils

In [3]:
def sql_process(example):
    create_statements = re.findall(r'(?:|\n|^)CREATE\s+TABLE\s+\w+\s*\([^;]+;', example, re.DOTALL)
    schema = " ".join(create_statements)
    data = re.sub(r'CREATE\s+TABLE\s+\w+\s*\([^;]+;', '', example, flags=re.DOTALL).strip()
    return {"schema": schema, "data": data}

In [4]:
def make_question(schema, question):
    system = "You are an SQL analyst with 15 years of experience writing complex SQL queries. \n"
    system += "Consider the following tables with their schemas: \n"
    system += "Write a SQLite SQL query that would help you answer the following question: \n"
    system += "Remember always return sql query answer, do not return any extra information or explain or add text. \n"
    user = question + f" (Tables: {schema})"
    return {"system": system, "user": user}

In [5]:
def preprocess_function(example):
    # Ghép schema và data vào user context
    user_content = f"""{example['user']}"""

    return {
        "messages": [
            {"role": "system", "content": example["system"]},
            {"role": "user", "content": user_content},
            {"role": "assistant", "content": example["sql"]}
        ]
    }

In [6]:
def get_dataset():
    limit = 20000
    ds = load_dataset("gretelai/synthetic_text_to_sql", split="train")
    ds = ds.select(range(limit))
    ds = ds.map(lambda x: sql_process(x["sql_context"]))
    ds = ds.map(lambda x: make_question(x['schema'], x['sql_prompt']))
    ds = ds.map(preprocess_function)
    ds_train = ds.select_columns(['messages'])
    ds_data = ds.select_columns(['schema', 'data', 'sql'])
    return ds_train, ds_data

In [7]:
def setup_logging():
    # Remove all handlers associated with the root logger object.
    for handler in log.root.handlers[:]:
        log.root.removeHandler(handler)

    log.basicConfig(
        level=log.WARNING,
        format="%(asctime)s [%(levelname)s] %(message)s",
        handlers=[log.StreamHandler()],
    )

In [8]:
logger = log.getLogger(__name__)
setup_logging()

In [9]:
def check_sql_commend(query):
    try:
        pd.read_sql(query, con=engine)
    except Exception as e:
        logger.debug(f"Error in query", e)
        return False

    logger.info(f"Query: {query} is valid")
    return True

# Dataset EDA

In [10]:
dataset = load_dataset("gretelai/synthetic_text_to_sql", split="train")
print(dataset[0])

README.md: 0.00B [00:00, ?B/s]

(…)nthetic_text_to_sql_train.snappy.parquet:   0%|          | 0.00/32.4M [00:00<?, ?B/s]

(…)ynthetic_text_to_sql_test.snappy.parquet:   0%|          | 0.00/1.90M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/100000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5851 [00:00<?, ? examples/s]

{'id': 5097, 'domain': 'forestry', 'domain_description': 'Comprehensive data on sustainable forest management, timber production, wildlife habitat, and carbon sequestration in forestry.', 'sql_complexity': 'single join', 'sql_complexity_description': 'only one join (specify inner, outer, cross)', 'sql_task_type': 'analytics and reporting', 'sql_task_type_description': 'generating reports, dashboards, and analytical insights', 'sql_prompt': 'What is the total volume of timber sold by each salesperson, sorted by salesperson?', 'sql_context': "CREATE TABLE salesperson (salesperson_id INT, name TEXT, region TEXT); INSERT INTO salesperson (salesperson_id, name, region) VALUES (1, 'John Doe', 'North'), (2, 'Jane Smith', 'South'); CREATE TABLE timber_sales (sales_id INT, salesperson_id INT, volume REAL, sale_date DATE); INSERT INTO timber_sales (sales_id, salesperson_id, volume, sale_date) VALUES (1, 1, 120, '2021-01-01'), (2, 1, 150, '2021-02-01'), (3, 2, 180, '2021-01-01');", 'sql': 'SELECT

In [11]:
ds = dataset.select(range(100))
ds = ds.map(lambda x: make_question(x["sql_context"], x["sql_prompt"]))
ds = ds.select_columns(['sql_context', 'system', 'user', 'sql'])
ds[0]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

{'sql_context': "CREATE TABLE salesperson (salesperson_id INT, name TEXT, region TEXT); INSERT INTO salesperson (salesperson_id, name, region) VALUES (1, 'John Doe', 'North'), (2, 'Jane Smith', 'South'); CREATE TABLE timber_sales (sales_id INT, salesperson_id INT, volume REAL, sale_date DATE); INSERT INTO timber_sales (sales_id, salesperson_id, volume, sale_date) VALUES (1, 1, 120, '2021-01-01'), (2, 1, 150, '2021-02-01'), (3, 2, 180, '2021-01-01');",
 'system': 'You are an SQL analyst with 15 years of experience writing complex SQL queries. \nConsider the following tables with their schemas: \nWrite a SQLite SQL query that would help you answer the following question: \nRemember always return sql query answer, do not return any extra information or explain or add text. \n',
 'user': "What is the total volume of timber sold by each salesperson, sorted by salesperson? (Tables: CREATE TABLE salesperson (salesperson_id INT, name TEXT, region TEXT); INSERT INTO salesperson (salesperson_id,

In [12]:
df_train = pd.DataFrame(ds)
df_train.head()

Unnamed: 0,sql_context,system,user,sql
0,"CREATE TABLE salesperson (salesperson_id INT, ...",You are an SQL analyst with 15 years of experi...,What is the total volume of timber sold by eac...,"SELECT salesperson_id, name, SUM(volume) as to..."
1,CREATE TABLE equipment_maintenance (equipment_...,You are an SQL analyst with 15 years of experi...,List all the unique equipment types and their ...,"SELECT equipment_type, SUM(maintenance_frequen..."
2,"CREATE TABLE marine_species (name VARCHAR(50),...",You are an SQL analyst with 15 years of experi...,How many marine species are found in the South...,SELECT COUNT(*) FROM marine_species WHERE loca...
3,"CREATE TABLE trade_history (id INT, trader_id ...",You are an SQL analyst with 15 years of experi...,What is the total trade value and average pric...,"SELECT trader_id, stock, SUM(price * quantity)..."
4,"CREATE TABLE upgrades (id INT, cost FLOAT, typ...",You are an SQL analyst with 15 years of experi...,Find the energy efficiency upgrades with the h...,"SELECT type, cost FROM (SELECT type, cost, ROW..."


# Set up llama 2

In [13]:
import re

sql = """CREATE TABLE salesperson (salesperson_id INT, name TEXT, region TEXT);
INSERT INTO salesperson (salesperson_id, name, region) VALUES (1, 'John Doe', 'North'), (2, 'Jane Smith', 'South');
CREATE TABLE timber_sales (sales_id INT, salesperson_id INT, volume REAL, sale_date DATE);
INSERT INTO timber_sales (sales_id, salesperson_id, volume, sale_date) VALUES (1, 1, 120, '2021-01-01'), (2, 1, 150, '2021-02-01'), (3, 2, 180, '2021-01-01');"""

result = sql_process(sql)
print("Schema:")
print(result["schema"])
print("\nData:")
print(result["data"])

Schema:
CREATE TABLE salesperson (salesperson_id INT, name TEXT, region TEXT); 
CREATE TABLE timber_sales (sales_id INT, salesperson_id INT, volume REAL, sale_date DATE);

Data:
INSERT INTO salesperson (salesperson_id, name, region) VALUES (1, 'John Doe', 'North'), (2, 'Jane Smith', 'South');

INSERT INTO timber_sales (sales_id, salesperson_id, volume, sale_date) VALUES (1, 1, 120, '2021-01-01'), (2, 1, 150, '2021-02-01'), (3, 2, 180, '2021-01-01');


In [14]:
ds = ds.map(lambda x: sql_process(x["sql_context"]))
ds[0]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

{'sql_context': "CREATE TABLE salesperson (salesperson_id INT, name TEXT, region TEXT); INSERT INTO salesperson (salesperson_id, name, region) VALUES (1, 'John Doe', 'North'), (2, 'Jane Smith', 'South'); CREATE TABLE timber_sales (sales_id INT, salesperson_id INT, volume REAL, sale_date DATE); INSERT INTO timber_sales (sales_id, salesperson_id, volume, sale_date) VALUES (1, 1, 120, '2021-01-01'), (2, 1, 150, '2021-02-01'), (3, 2, 180, '2021-01-01');",
 'system': 'You are an SQL analyst with 15 years of experience writing complex SQL queries. \nConsider the following tables with their schemas: \nWrite a SQLite SQL query that would help you answer the following question: \nRemember always return sql query answer, do not return any extra information or explain or add text. \n',
 'user': "What is the total volume of timber sold by each salesperson, sorted by salesperson? (Tables: CREATE TABLE salesperson (salesperson_id INT, name TEXT, region TEXT); INSERT INTO salesperson (salesperson_id,

# Set up  

In [15]:
model_name = "NousResearch/Llama-2-7b-chat-hf"
model_name_finetuned = "llama-2-7b-chat-sql"

In [16]:
################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

In [17]:
################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

In [18]:
################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 2

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 1

# Batch size per GPU for evaluation
per_device_eval_batch_size = 1

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule (constant a bit better than cosine)
lr_scheduler_type = "constant"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 500

# Log every X updates steps
logging_steps = 50

In [19]:
################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

In [20]:
# Step 2 :Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [21]:
# Step 3 :Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

In [22]:
# Step 4 :Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

In [23]:
# Step 5 :Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.chat_template = """{% for message in messages %}
{% if message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + eos_token }}
{% elif message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + eos_token }}
{% elif message['role'] == 'assistant' %}{{ '<|assistant|>\n' + message['content'] + eos_token }}
{% endif %}
{% if loop.last and add_generation_prompt %}{{ '<|assistant|>' }}
{% endif %}
{% endfor %}"""


tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

In [24]:
# Step 6 :Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [25]:
# Step 7 :Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

In [26]:
ds_train, ds_data = get_dataset()

Map:   0%|          | 0/20000 [00:00<?, ? examples/s]

Map:   0%|          | 0/20000 [00:00<?, ? examples/s]

Map:   0%|          | 0/20000 [00:00<?, ? examples/s]

In [27]:
message = ds_train[0]['messages']

In [28]:
print(tokenizer.chat_template)

{% for message in messages %}
{% if message['role'] == 'system' %}{{ '<|system|>
' + message['content'] + eos_token }}
{% elif message['role'] == 'user' %}{{ '<|user|>
' + message['content'] + eos_token }}
{% elif message['role'] == 'assistant' %}{{ '<|assistant|>
' + message['content'] + eos_token }}
{% endif %}
{% if loop.last and add_generation_prompt %}{{ '<|assistant|>' }}
{% endif %}
{% endfor %}


In [29]:
message = ds_train[0]['messages']
tokenized_chat = tokenizer.apply_chat_template(message, tokenize=True, add_generation_prompt=True, return_tensors="pt")

In [30]:
print(tokenizer.decode(tokenized_chat[0]))

<|system|>
You are an SQL analyst with 15 years of experience writing complex SQL queries. 
Consider the following tables with their schemas: 
Write a SQLite SQL query that would help you answer the following question: 
Remember always return sql query answer, do not return any extra information or explain or add text. 
</s>
<|user|>
What is the total volume of timber sold by each salesperson, sorted by salesperson? (Tables: CREATE TABLE salesperson (salesperson_id INT, name TEXT, region TEXT); CREATE TABLE timber_sales (sales_id INT, salesperson_id INT, volume REAL, sale_date DATE);)</s>
<|assistant|>
SELECT salesperson_id, name, SUM(volume) as total_volume FROM timber_sales JOIN salesperson ON timber_sales.salesperson_id = salesperson.salesperson_id GROUP BY salesperson_id, name ORDER BY total_volume DESC;</s>
<|assistant|>



In [31]:
dataset = ds_train.map(lambda x: {"formatted_chat": tokenizer.apply_chat_template(x["messages"], tokenize=False, add_generation_prompt=False)}, batched=True)
dataset = dataset.rename_column("formatted_chat", "text").select_columns(["text"])

Map:   0%|          | 0/20000 [00:00<?, ? examples/s]

In [None]:
dataset[0]

In [None]:
# Step 8 :Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    args=training_arguments,
)

In [None]:
# Step 9 :Train model
trainer.train()

# Step 10 :Save trained model
trainer.model.save_pretrained(model_name_finetuned)

In [None]:
%load_ext tensorboard
%tensorboard --logdir /kaggle/input/dashboard-cp/run --port 6007

# Model evalutation

In [None]:
# Đường dẫn checkpoint
path = f"/kaggle/working/merged_model"

model = AutoModelForCausalLM.from_pretrained(
    path,
    device_map=device_map,
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(path)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.chat_template = """{% for message in messages %}
{% if message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + eos_token }}
{% elif message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + eos_token }}
{% elif message['role'] == 'assistant' %}{{ '<|assistant|>\n' + message['content'] + eos_token }}
{% endif %}
{% if loop.last and add_generation_prompt %}{{ '<|assistant|>' }}
{% endif %}
{% endfor %}"""

print("Model loaded successfully!")

Model loaded successfully!


In [None]:
# Chuyển sang bfloat16 và eval mode
# model.to(torch.bfloat16)
model.eval()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): Lla

In [None]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto"
)

Device set to use cuda:0


In [None]:
messages = [
    {"role": "system", "content": "You are an SQL analyst with 15 years of experience writing complex SQL queries. Consider the following tables with their schemas: Write a SQLite SQL query that would help you answer the following question: Remember always return sql query answer, do not return any extra information or explain or add text. \n"},
    {"role": "user", "content": "Find the name of employees who are working in the department 'project3D' and their marital status (Tables: CREATE TABLE Employees (EmployeeID INT, Name VARCHAR(50), Department VARCHAR(20), MaritalStatus VARCHAR(10));"}
]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [None]:
response = pipe(
    prompt,
    do_sample=False,
    num_beams=4,
    early_stopping=True,  # Dừng sớm khi tìm ra câu trả lời tốt
    repetition_penalty=1.5,  # Tăng để giảm lặp
    max_new_tokens=150,
    eos_token_id=tokenizer.eos_token_id
)

In [None]:
print(response[0]["generated_text"])

<|system|>
You are an SQL analyst with 15 years of experience writing complex SQL queries. Consider the following tables with their schemas: Write a SQLite SQL query that would help you answer the following question: Remember always return sql query answer, do not return any extra information or explain or add text. 
</s>
<|user|>
Find the name of employees who are working in the department 'project3D' and their marital status (Tables: CREATE TABLE Employees (EmployeeID INT, Name VARCHAR(50), Department VARCHAR(20), MaritalStatus VARCHAR(10));)</s>
<|assistant|>
SELECT Employees.Name, Employees.Department, Employees.MaritalStatus FROM Employees WHERE Employees.Department = 'project3D';


# Merge backbone and lora

In [32]:
!unzip /content/checkpoints.zip

Archive:  /content/checkpoints.zip
   creating: checkpoints/
  inflating: checkpoints/adapter_config.json  
  inflating: checkpoints/adapter_model.safetensors  
  inflating: checkpoints/README.md   


In [33]:
path = "/content/checkpoints"
peft_model  = PeftModel.from_pretrained(model, path)
merged_model = peft_model.merge_and_unload()

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.chat_template = """{% for message in messages %}
{% if message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + eos_token }}
{% elif message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + eos_token }}
{% elif message['role'] == 'assistant' %}{{ '<|assistant|>\n' + message['content'] + eos_token }}
{% endif %}
{% if loop.last and add_generation_prompt %}{{ '<|assistant|>' }}
{% endif %}
{% endfor %}"""

# merged_model.resize_token_embeddings(len(tokenizer))

merged_model.save_pretrained("./merged_model", safe_serialization=True)
tokenizer.save_pretrained("./merged_model")

('./merged_model/tokenizer_config.json',
 './merged_model/special_tokens_map.json',
 './merged_model/chat_template.jinja',
 './merged_model/tokenizer.model',
 './merged_model/added_tokens.json',
 './merged_model/tokenizer.json')

In [35]:
!zip -r merged_model.zip /content/merged_model

  adding: content/merged_model/ (stored 0%)
  adding: content/merged_model/added_tokens.json (deflated 15%)
  adding: content/merged_model/tokenizer.json (deflated 85%)
  adding: content/merged_model/tokenizer_config.json (deflated 74%)
  adding: content/merged_model/generation_config.json (deflated 35%)
  adding: content/merged_model/tokenizer.model (deflated 55%)
  adding: content/merged_model/chat_template.jinja (deflated 60%)
  adding: content/merged_model/special_tokens_map.json (deflated 72%)
  adding: content/merged_model/model.safetensors (deflated 10%)
  adding: content/merged_model/config.json (deflated 56%)


In [36]:
from google.colab import files

# Ví dụ tải file model.zip
files.download("/content/merged_model.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>