# Install the needed packages

In [1]:
try: import torch
except: raise ImportError("Install torch via `pip install torch`")
from packaging.version import Version as V
v = V(torch.__version__)
cuda = str(torch.version.cuda)
is_ampere = torch.cuda.get_device_capability()[0] >= 8
if cuda != "12.1" and cuda != "11.8": raise RuntimeError(f"CUDA = {cuda} not supported!")
if   v <= V('2.1.0'): raise RuntimeError(f"Torch = {v} too old!")
elif v <= V('2.1.1'): x = 'cu{}{}-torch211'
elif v <= V('2.1.2'): x = 'cu{}{}-torch212'
elif v  < V('2.3.0'): x = 'cu{}{}-torch220'
elif v  < V('2.4.0'): x = 'cu{}{}-torch230'
elif v  < V('2.5.0'): x = 'cu{}{}-torch240'
else: raise RuntimeError(f"Torch = {v} too new!")
x = x.format(cuda.replace(".", ""), "-ampere" if is_ampere else "")
print(f'pip install --upgrade pip && pip install "unsloth[{x}] @ git+https://github.com/unslothai/unsloth.git"')

pip install --upgrade pip && pip install "unsloth[cu121-torch240] @ git+https://github.com/unslothai/unsloth.git"


In [2]:
%%capture

# Install compatible versions of required packages
!pip install --upgrade pip && pip install "unsloth[cu121-torch240] @ git+https://github.com/unslothai/unsloth.git"

# Install Transformers and required dependencies
!pip install transformers torch datasets

# Accelerate, Peft, and other model fine-tuning libraries
!pip install --no-deps trl peft accelerate bitsandbytes

# FAISS for GPU
!pip install faiss-gpu

# Langchain and related community package
!pip install langchain langchain-community

# Playwright for web automation
!pip install playwright
!playwright install

# Text and sentence processing tools
!pip install nltk html2text sentence-transformers

# Import Libraries

In [3]:
from datasets import load_dataset

from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
from unsloth import UnslothTrainer, UnslothTrainingArguments
from unsloth.chat_templates import get_chat_template


from langchain_text_splitters import NLTKTextSplitter
from langchain.document_loaders import AsyncChromiumLoader
from langchain.document_transformers import Html2TextTransformer
from langchain.vectorstores import FAISS
import nest_asyncio
from langchain.embeddings import HuggingFaceEmbeddings
import nltk
import re

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


# Load the dataset

In [4]:
config = 'all_laws' #{language}_{type} | to load all languages and/or all types, use 'all_all'
dataset = load_dataset('rcds/MultiLegalSBD', config, split='train', trust_remote_code=True)

MultiLegalSBD.py:   0%|          | 0.00/4.02k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/24.1k [00:00<?, ?B/s]

Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/rcds--MultiLegalSBD/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57
INFO:datasets.info:Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/rcds--MultiLegalSBD/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57
Generating dataset multi_legal_sbd (/root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57)
INFO:datasets.builder:Generating dataset multi_legal_sbd (/root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57)
Downloading and preparing dataset multi_legal_sbd/all_laws to /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57...
INFO:datasets.builder:Downloading and preparing dataset multi_legal

fr_laws_0.jsonl.xz:   0%|          | 0.00/1.02M [00:00<?, ?B/s]

Downloading took 0.0 min
INFO:datasets.download.download_manager:Downloading took 0.0 min
Checksum Computation took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min


fr_laws_1.jsonl.xz:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading took 0.0 min
INFO:datasets.download.download_manager:Downloading took 0.0 min
Checksum Computation took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min


fr_laws_2.jsonl.xz:   0%|          | 0.00/136k [00:00<?, ?B/s]

Downloading took 0.0 min
INFO:datasets.download.download_manager:Downloading took 0.0 min
Checksum Computation took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min


it_laws_0.jsonl.xz:   0%|          | 0.00/1.12M [00:00<?, ?B/s]

Downloading took 0.0 min
INFO:datasets.download.download_manager:Downloading took 0.0 min
Checksum Computation took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min


it_laws_1.jsonl.xz:   0%|          | 0.00/802k [00:00<?, ?B/s]

Downloading took 0.0 min
INFO:datasets.download.download_manager:Downloading took 0.0 min
Checksum Computation took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min


it_laws_2.jsonl.xz:   0%|          | 0.00/131k [00:00<?, ?B/s]

Downloading took 0.0 min
INFO:datasets.download.download_manager:Downloading took 0.0 min
Checksum Computation took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min


es_laws_0.jsonl.xz:   0%|          | 0.00/705k [00:00<?, ?B/s]

Downloading took 0.0 min
INFO:datasets.download.download_manager:Downloading took 0.0 min
Checksum Computation took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min


es_laws_1.jsonl.xz:   0%|          | 0.00/867k [00:00<?, ?B/s]

Downloading took 0.0 min
INFO:datasets.download.download_manager:Downloading took 0.0 min
Checksum Computation took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min


es_laws_2.jsonl.xz:   0%|          | 0.00/181k [00:00<?, ?B/s]

Downloading took 0.0 min
INFO:datasets.download.download_manager:Downloading took 0.0 min
Checksum Computation took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min


de_laws_0.jsonl.xz:   0%|          | 0.00/3.75M [00:00<?, ?B/s]

Downloading took 0.0 min
INFO:datasets.download.download_manager:Downloading took 0.0 min
Checksum Computation took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min


pt_laws_0.jsonl.xz:   0%|          | 0.00/46.2k [00:00<?, ?B/s]

Downloading took 0.0 min
INFO:datasets.download.download_manager:Downloading took 0.0 min
Checksum Computation took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min


pt_laws_1.jsonl.xz:   0%|          | 0.00/54.4k [00:00<?, ?B/s]

Downloading took 0.0 min
INFO:datasets.download.download_manager:Downloading took 0.0 min
Checksum Computation took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min


pt_laws_2.jsonl.xz:   0%|          | 0.00/109k [00:00<?, ?B/s]

Downloading took 0.0 min
INFO:datasets.download.download_manager:Downloading took 0.0 min
Checksum Computation took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min
Generating train split
INFO:datasets.builder:Generating train split


Generating train split:   0%|          | 0/5789 [00:00<?, ? examples/s]

All the splits matched successfully.
INFO:datasets.utils.info_utils:All the splits matched successfully.
Dataset multi_legal_sbd downloaded and prepared to /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57. Subsequent calls will reuse this data.
INFO:datasets.builder:Dataset multi_legal_sbd downloaded and prepared to /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57. Subsequent calls will reuse this data.


# Choose the model (Quantized)
Llama-3.2 3B

In [5]:
max_seq_length = 2048 # max sequence length of the input text.
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct", # or choose "unsloth/Llama-3.2-1B-Instruct"
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = Your_Access_Token, # Hugging face access token to access the model.
)

==((====))==  Unsloth 2024.10.2: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.27.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

Unsloth: We fixed a gradient accumulation bug, but it seems like you don't have the latest transformers version!
Please update transformers via:
`pip uninstall transformers -y && pip install --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git"`


In [6]:
for row in dataset[:5]["text"]:
    print("=========================")
    print(row)

Code civil
(version consolidée au 15 décembre 2019)
Titre préliminaire : De la publication, des effets et de l'application des lois en général
Article 1
Les lois et, lorsqu'ils sont publiés au Journal officiel de la République française, les actes administratifs entrent en vigueur à la date qu'ils fixent ou, à défaut, le lendemain de leur publication. Toutefois, l'entrée en vigueur de celles de leurs dispositions dont l'exécution nécessite des mesures d'application est reportée à la date d'entrée en vigueur de ces mesures.
En cas d'urgence, entrent en vigueur dès leur publication les lois dont le décret de promulgation le prescrit et les actes administratifs pour lesquels le Gouvernement l'ordonne par une disposition spéciale.
Les dispositions du présent article ne sont pas applicables aux actes individuels.
Article 2
La loi ne dispose que pour l'avenir ; elle n'a point d'effet rétroactif.
Article 3
Les lois de police et de sûreté obligent tous ceux qui habitent le territoire.
Les imme

# Formatting the dataset for Text Completion / Raw Text Training

In [7]:
EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    # Ensure 'text' is processed correctly
    return {"text": [example + EOS_TOKEN for example in examples["text"]]}

# Map the function to the dataset
dataset = dataset.map(formatting_prompts_func, batched=True)

Map:   0%|          | 0/5789 [00:00<?, ? examples/s]

Caching processed dataset at /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57/cache-61fc2ee499001ea5.arrow
INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57/cache-61fc2ee499001ea5.arrow


# Reduce the number of parameters to be updated during the fine-tuning process of the model using LoRA (Low-Rank Adaptation)

In [8]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,
    loftq_config = None, # And LoftQ
)

Unsloth 2024.10.2 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


# Add the trainnig parameters for the model and train on our formatted dataset

In [9]:
trainer = UnslothTrainer(
    model=model,
    tokenizer=tokenizer,  # The tokenizer to preprocess text data for the model.
    train_dataset=dataset,  # The dataset used for training.
    dataset_text_field="text",  # The field in the dataset that contains text data.
    max_seq_length=max_seq_length,  # Maximum number of tokens per input sequence.
    dataset_num_proc=8,  # Number of CPU cores used for data preprocessing.

    args=UnslothTrainingArguments(
        per_device_train_batch_size=2,  # Number of training examples processed per device at once.
        gradient_accumulation_steps=8,  # Gradients are accumulated over 8 steps before updating the model, simulating a larger batch size.
        warmup_ratio=0.1,  # Percentage of training steps used for gradually increasing the learning rate (warmup).
        num_train_epochs=1,  # Number of full passes over the dataset.
        learning_rate=5e-5,  # The base learning rate for model parameters.
        embedding_learning_rate=5e-6,  # A separate learning rate for the embedding layer of the model.
        fp16=not is_bfloat16_supported(),  # Whether to use 16-bit floating point precision, enabled if BF16 is not supported.
        bf16=is_bfloat16_supported(),  # Whether to use BF16 precision if supported by the hardware.
        logging_steps=1,  # Number of steps between logging model training progress.
        optim="adamw_8bit",  # Optimization algorithm to use, AdamW with 8-bit precision to save memory.
        weight_decay=0.00,  # The rate at which the model weights are regularized to prevent overfitting.
        lr_scheduler_type="cosine",  # Learning rate scheduler type, cosine annealing reduces the learning rate following a cosine curve.
        seed=3407,  # Random seed for reproducibility.
        output_dir="outputs",  # Directory where the training outputs (like model checkpoints) will be saved.
    ),
)

Process #0 will write at /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57/cache-a96a07299a801ddb_00000_of_00008.arrow
INFO:datasets.arrow_dataset:Process #0 will write at /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57/cache-a96a07299a801ddb_00000_of_00008.arrow
Process #1 will write at /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57/cache-a96a07299a801ddb_00001_of_00008.arrow
INFO:datasets.arrow_dataset:Process #1 will write at /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57/cache-a96a07299a801ddb_00001_of_00008.arrow
Process #2 will write at /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8

Map (num_proc=8):   0%|          | 0/5789 [00:00<?, ? examples/s]

Caching processed dataset at /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57/cache-a96a07299a801ddb_00000_of_00008.arrow
INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57/cache-a96a07299a801ddb_00000_of_00008.arrow
Caching processed dataset at /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57/cache-a96a07299a801ddb_00001_of_00008.arrow
INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126967b709c3764c7f85cd8b9dc1b14f07c0cf694cb5e09e59bc2f0a9cc96c57/cache-a96a07299a801ddb_00001_of_00008.arrow
Caching processed dataset at /root/.cache/huggingface/datasets/rcds___multi_legal_sbd/all_laws/0.0.0/126

In [10]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 5,789 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 8
\        /    Total batch size = 16 | Total steps = 361
 "-____-"     Number of trainable parameters = 24,313,856


**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers and Unsloth!


Step,Training Loss
1,2.224
2,2.3902
3,2.3402
4,2.3299
5,2.4014
6,2.6355
7,2.5602
8,2.1656
9,2.4091
10,2.104


# RAG model

In [11]:
nltk.download('punkt')
nest_asyncio.apply()

# You can add any website URL in English Language that has Law content. These are examples:
articles = ["https://www.justia.com/criminal/offenses/violent-crimes/assault-battery/",
            "https://www.justia.com/criminal/offenses/violent-crimes/domestic-violence",
            "https://www.justia.com/criminal/drunk-driving-dui-dwi/",
            "https://www.justia.com/criminal/offenses/alcohol-related-crimes/public-intoxication",
            "https://law.justia.com/constitution/us/bill-of-rights.html",
            "https://law.justia.com/constitution/us/amendment-01/",
            "https://law.justia.com/constitution/us/amendment-02/",
            "https://law.justia.com/constitution/us/amendment-03/",
            "https://law.justia.com/constitution/us/amendment-13/",
            "https://law.justia.com/constitution/us/amendment-07/",
            "https://law.justia.com/constitution/us/amendment-16/"]

# Scrapes the blogs using an asynchronous Chromium browser session
loader = AsyncChromiumLoader(articles)
docs = loader.load()  # Loads the blog content into the 'docs' variable.

# Converts the HTML content of the blogs into plain text format
html2text = Html2TextTransformer()  # Transformer that removes HTML tags and extracts readable text.
docs_transformed = html2text.transform_documents(docs)  # Applies the transformation to the loaded documents.

# Splits the transformed text into smaller chunks based on sentences
text_splitter = NLTKTextSplitter(chunk_size=10, chunk_overlap=1)  # Splits the text into chunks of 10 sentences, with a 1 sentence overlap between chunks.
chunked_documents = text_splitter.split_documents(docs_transformed)  # Produces a list of chunked text documents.

# Loads the chunked documents into a FAISS index, used for fast similarity search
db = FAISS.from_documents(chunked_documents,  # The chunked documents are embedded and indexed for fast retrieval.
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))  # Uses the 'all-mpnet-base-v2' model to create sentence embeddings for the FAISS index.

# Connects the FAISS index to a retriever, which will allow similarity-based searches on the indexed documents
retriever = db.as_retriever(
    search_type="similarity",  # Specifies that the search will be based on embedding similarity.
    search_kwargs={'k': 4}  # Returns the top 4 most similar results for any query.
)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
  HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [69]:
# This is the question that you want to ask!
# RAG has law documents written in English only, you can change the language to French, Spanish, Italian or Portaguese in the user prompt line in the messages list
# and the LLM will generate an answer with the same language.
query = "what is the law of human hurting another human?"

In [70]:
# Performs a similarity search on the FAISS index using the provided query
docs = db.similarity_search(query)  # Searches the FAISS index for documents most similar to the 'query'.

# Combines the content of the retrieved documents into a single string
retrieved_text = " ".join([doc.page_content for doc in docs])  # Joins the 'page_content' of the top 4 retrieved documents into one string.

# Prints the combined text from the top 4 most similar documents
print(retrieved_text)  # Outputs the result, which is the concatenated text from the retrieved documents.

For example, someone might be entitled to punch their significant
other if that person was trying to strangle them. This involves the same
elements as self-defense but applies when the defendant used force to protect
someone else, such as a family member. Defense of Others

A defendant also may be justified in using force to protect someone else. For
example, if a woman sees her boyfriend raising his fist to punch their child,
she likely would be justified in using force to stop him.


# Generation

In [64]:
# Function that uses Regex patterns to extract only the generated response of the assistant.

def extract_assistant_response(raw_output):
    # Define the regex pattern to capture the assistant's response, located between special tags.
    pattern = r"<\|start_header_id\|>assistant<\|end_header_id\|>(.*?)<\|eot_id\|>"  # Looks for text between these specific markers.

    # Search for the assistant's response in the raw output using the defined pattern.
    match = re.search(pattern, raw_output, re.DOTALL)  # 're.DOTALL' ensures the pattern works across multiple lines.

    if match:
        # Extract the assistant's response from the match and remove extra spaces.
        assistant_response = match.group(1).strip()  # Captures the response text and strips leading/trailing whitespace.
        print(assistant_response)  # Prints the cleaned-up assistant response.
    else:
        return "Assistant response not found"  # Returns this message if the pattern wasn't found in the raw output.

In [73]:
# Initialize the tokenizer with a chat template
tokenizer = get_chat_template(
    tokenizer,  # The tokenizer used to process the input text.
    chat_template = "llama-3.1",  # Specifies the chat template based on the "llama-3.1" model.
)

# Enables faster inference for the model by optimizing it for performance
FastLanguageModel.for_inference(model)  # Enables native 2x faster inference by optimizing the model's execution.

# Define the conversation messages for the model
messages = [
    {"role": "system", "content": "You are a knowledgeable and helpful multilingual law assistant. Identify the language of the user's question and respond to their inquiry in the same language."},  # System message defines the assistant's behavior.
    {"role": "system", "content": f"Here is some retrieved context that might be helpful: {retrieved_text}"},  # Adds context retrieved from previous steps to help the model generate a more informed response.
    {"role": "user", "content": f"{query}"},  # The user's query is passed to the model to generate a relevant response.
]

# Apply the chat template and tokenize the messages for model input
inputs = tokenizer.apply_chat_template(
    messages,  # The conversation messages defined above.
    tokenize = True,  # Tokenizes the input, converting text into the model's input format (tokens).
    add_generation_prompt = True,  # Adds a prompt to indicate that the model should generate a response.
    return_tensors = "pt",  # Returns the tokenized data in PyTorch tensor format for GPU processing.
).to("cuda")  # Moves the input tensors to the GPU for faster processing.

# Generate a response from the model
outputs = model.generate(
    input_ids = inputs,  # The tokenized input data.
    max_new_tokens = 2048,  # Maximum number of new tokens (words/phrases) the model can generate.
    use_cache = True,  # Enables caching to speed up generation.
    temperature = 1.0,  # Controls the randomness of the generated response (1.0 is neutral, higher values increase creativity).
    min_p = 0.1  # Prevents low-probability tokens from being generated (controls diversity of output).
)

# Decode the generated output from token IDs back into readable text
output_in_text = tokenizer.batch_decode(outputs)[0]  # Decodes the output tokens into human-readable text.

# Extracts and prints the assistant's generated response using a regex pattern
extract_assistant_response(output_in_text)  # Uses the previously defined function to extract and print the assistant's response.

The user's question is in English. I'll provide a response in English.

The law regarding human hurting another human varies depending on the jurisdiction, but I'll provide a general overview of the common principles and laws.

In most countries, harming or injuring another person is considered a criminal offense. The laws and penalties for such offenses vary, but here are some key principles:

1. **Assault and Battery**: Harming or injuring another person can be classified as assault and battery. Assault is the threat of harm, while battery is the actual act of harm.
2. **Criminal Homicide**: If the harm or injury results in the death of the other person, it becomes a more serious offense, known as criminal homicide. This can include murder, manslaughter, or negligent homicide.
3. **Self-Defense**: As I mentioned earlier, self-defense is a defense against charges of harming or injuring another person. However, the laws regarding self-defense vary, and it's often subject to strict inte