In [2]:
import json
import pandas as pd
from datasets import Dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
from transformers import EarlyStoppingCallback

2025-05-13 17:49:22.030802: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747158562.053174     303 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747158562.060293     303 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:
!pip install trl==0.11.0
! pip install -U bitsandbytes

## upload dataset

In [3]:
with open('/kaggle/input/bilingual-construction-dataset/bilingual_construction_dataset_translated.json', 'r') as file:
    data = json.load(file)


### format the json file

In [5]:
formatted_bilingual = [
    {"text": f"### Human: {row['instruction_en']}\n### Assistant: {row['response_en']}"} 
    for row in data
] + [
    {"text": f"### Human: {row['instruction_ar']}\n### Assistant: {row['response_ar']}"} 
    for row in data
]

dataset = Dataset.from_pandas(pd.DataFrame(formatted_bilingual))
dataset = dataset.train_test_split(test_size=0.1)

In [37]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## upload the tokenizer and the model

In [6]:
# --- Tokenizer ---
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16"
)

model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
pip install -U bitsandbytes

### use lora configration

In [8]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [9]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


trainable params: 8,388,608 || all params: 6,746,804,224 || trainable%: 0.1243


### setting training parameters

In [10]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments

from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./llama2-7b-egyptian-construction-lora",
    per_device_train_batch_size=1,      # Lowered
    per_device_eval_batch_size=1,       # Lowered
    gradient_accumulation_steps=2,      # Or 1 if still OOM
    max_steps=1000,                      # For quick test
    learning_rate=2e-4,
    fp16=True,
    logging_steps=50,
    save_steps=100,
    eval_strategy="steps",
    eval_steps=100,
    save_total_limit=2,
    load_best_model_at_end=True,
    report_to="wandb",
    run_name="egyptian-construction-chatbot-v1",
)

In [None]:
print(dataset["train"][0])
print(dataset["train"].features)

In [None]:
!pip install wandb --quiet
!wandb login 771825ea45b66d37e930eaecd1a00b8fe61ccfed


### intialiaze wnadb project to track the training

In [None]:
import wandb
wandb.init(
    project="egyptian-construction-chatbot-v1",
    name="debug-run",
)


In [11]:
# Trainer setup
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    args=training_args,
    tokenizer=tokenizer,
    peft_config=lora_config,
    dataset_text_field="text",
    callbacks=[EarlyStoppingCallback(early_stopping_patience=20)],
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/10404 [00:00<?, ? examples/s]

Map:   0%|          | 0/1156 [00:00<?, ? examples/s]

  super().__init__(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [12]:
trainer.train()
model.save_pretrained("./lora")
tokenizer.save_pretrained("./lora")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mahmeddewdar45[0m ([33mahmeddewdar45-alexandria-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss
100,1.4422,1.478307
200,1.3703,1.391276
300,1.3101,1.350127
400,1.2791,1.310227
500,1.1733,1.285482
600,1.2052,1.261849
700,1.1949,1.246149
800,1.1592,1.230777
900,1.199,1.222252
1000,1.1453,1.217482


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


('./lora/tokenizer_config.json',
 './lora/special_tokens_map.json',
 './lora/tokenizer.model',
 './lora/added_tokens.json',
 './lora/tokenizer.json')

In [None]:
  print(len(dataset["train"]))

### load the model to hugging face repositery

In [39]:
from huggingface_hub import HfApi
api = HfApi()
api.upload_folder(
    folder_path="./lora",
    repo_id="AhmedHussein66/llama2-7b-egyptian-construction-lora-lora-adapter",
    repo_type="model"
)

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/33.6M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/AhmedHussein66/llama2-7b-egyptian-construction-lora-lora-adapter/commit/29a71d13a7029314a7e1dc24a03566ef585b82a4', commit_message='Upload folder using huggingface_hub', commit_description='', oid='29a71d13a7029314a7e1dc24a03566ef585b82a4', pr_url=None, repo_url=RepoUrl('https://huggingface.co/AhmedHussein66/llama2-7b-egyptian-construction-lora-lora-adapter', endpoint='https://huggingface.co', repo_type='model', repo_id='AhmedHussein66/llama2-7b-egyptian-construction-lora-lora-adapter'), pr_revision=None, pr_num=None)

In [38]:
from huggingface_hub import HfApi

api = HfApi()
api.create_repo("AhmedHussein66/llama2-7b-egyptian-construction-lora-lora-adapter", private=False)


RepoUrl('https://huggingface.co/AhmedHussein66/llama2-7b-egyptian-construction-lora-lora-adapter', endpoint='https://huggingface.co', repo_type='model', repo_id='AhmedHussein66/llama2-7b-egyptian-construction-lora-lora-adapter')

### evaluate the model 

In [42]:
eval_results = trainer.evaluate()
print("Evaluation results:", eval_results)


Evaluation results: {'eval_loss': 1.2174819707870483, 'eval_runtime': 794.7723, 'eval_samples_per_second': 1.455, 'eval_steps_per_second': 1.455, 'epoch': 0.1922337562475971}


### test the model

In [46]:
from transformers import pipeline

translator_ar_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-ar-en")
translator_en_to_ar = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ar")

chatbot = pipeline("text-generation", model=model, tokenizer=tokenizer)

def translate_and_chat(input_text):
    input_en = translator_ar_to_en(input_text)[0]['translation_text']
    
    prompt = f"### Human: {input_en}\n### Assistant:"
    response = chatbot(prompt, max_new_tokens=100, temperature=0.7, top_p=0.9, repetition_penalty=1.2)
    
    assistant_response_en = response[0]['generated_text'].split("### Assistant:")[-1].strip()
    
    assistant_response_ar = translator_en_to_ar(assistant_response_en)[0]['translation_text']
    
    return assistant_response_ar

arabic_question = " ما الفرق بين ترخيص البناء وتصريح التشغيل"
answer = translate_and_chat(arabic_question)
print("🧠 Assistant says in Arabic:", answer)


Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'Glm4ForCausalLM', 'GotOcr2ForConditionalGeneration', 'GPT2LMHeadModel', 'GP

🧠 Assistant says in Arabic: ترخيص البناء يسمح بالبناء، في حين يسمح ترخيص التشغيل (مثلاً للمصنع) للشركات بالعمل بشكل قانوني في مواقع معينة بعد الإنجاز. غالباً ما تطلب من قبل سلطات مثل EMA أو MOPH. بدون ترخيص تشغيلي، لا يمكنك فتح أبوابك! لذا احصل على كلاهما إذا لزم الأمر قبل الشروع في أي مشروع جديد؛ وإلا فإنك تخاطر بدفع غرامات أو أوامر إغلاق بسبب انتهاك قوانين المدينة بدونها.
