In [None]:
!pip install transformers datasets accelerate bitsandbytes peft

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2

In [None]:
!pip install python-docx pypdf
!pip install torch

Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting pypdf
  Downloading pypdf-5.6.0-py3-none-any.whl.metadata (7.2 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pypdf-5.6.0-py3-none-any.whl (304 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m304.2/304.2 kB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx, pypdf
Successfully installed pypdf-5.6.0 python-docx-1.1.2


In [None]:
import os
import re
import json
import gc
import pickle
import torch
from google.colab import files
from datasets import Dataset, concatenate_datasets
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    logging as hf_logging,
)
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document as LC_Doc
from docx import Document as DocxDocument

# Silence HF warnings
hf_logging.set_verbosity_error()

# 1) Mitigate GPU fragmentation
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"

In [None]:
# 2) Upload and parse instruction JSON
uploaded = files.upload()
instr_fname = next(iter(uploaded))
raw = uploaded[instr_fname].decode("utf-8")
# extract JSON array
start, end = raw.find("["), raw.rfind("]") + 1
js = raw[start:end]
js = re.sub(r",\s*\]$", "]", js)  # remove trailing comma
instr_pairs = json.loads(js)
instr_ds = Dataset.from_list([
    {"text": f"User: {ex['user_query']}\nAssistant: {ex['response']}\n"}
    for ex in instr_pairs
])

Saving Industry & Machinery Equipment.txt to Industry & Machinery Equipment.txt


In [None]:
# 3) Upload and read Word document
uploaded = files.upload()
docx_fname = next(iter(uploaded))
doc = DocxDocument(docx_fname)
raw_text = "\n".join(p.text for p in doc.paragraphs)

Saving Industrial Machinery Content.docx to Industrial Machinery Content.docx


In [None]:
def clean_healthcare_text(text: str) -> str:
    text = re.sub(r'\xa0(\d+)', r' \1', text)                     # Fix nbsp before numbers
    text = re.sub(r'(?<=\D)\$(\d)', r' \1', text)                 # Space before currency
    text = re.sub(r'\b(\d+)\s+([KM])\b', r'\1\2', text)           # 150 K → 150K
    text = re.sub(r'\$(\d+)\s+([KM])\b', r'\1\2', text)           # $10 K → $10K
    text = re.sub(r'(\d)(vs\.?)(\d)', r'\1 vs \3', text)          # 75(vs.150 → 75 vs 150
    text = re.sub(r'(\d)\s?–\s?(\d)', r'\1-\2', text)             # En-dash formatting
    text = re.sub(r'(\d+)([A-Za-z]{2,})', r'\1 \2', text)         # 50Kper → 50K per
    text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', text)              # letters next to digits
    text = re.sub(r'\b(e\.)\s?(g\.)\s?,', r'e.g.,', text)         # normalize e.g.,
    text = re.sub(r'\b(i\.)\s?(e\.)\s?,', r'i.e.,', text)         # normalize i.e.,
    text = re.sub(r'\s+', ' ', text)                              # collapse whitespace
    text = re.sub(r'(?<=\D)\.(?=\S)', '. ', text).strip()         # ensure space after sentences
    text = re.sub(r'[^A-Za-z0-9\s]+', ' ', text)                  # remove all punctuation/symbols
    text = re.sub(r'\s+', ' ', text).strip()                      # collapse spaces again
    return text

In [None]:
cleaned = clean_healthcare_text(raw_text)


In [None]:
cleaned

'Industrial Machinery Equipment Overview The industrial machinery and equipment industry designs manufactures and maintains heavy machinery for construction aerospace and defense This sector powers infrastructure aviation and military operations with equipment like excavators cranes welders and CNC machines Key technologies digital twins predictive maintenance quality assurance QA research and development R D and logistics optimization drive efficiency safety and innovation This document details these pillars drawing from extensive query based insights 600 queries and industry trends Industry Scope Construction Bulldozers graders pavers cranes for roads bridges and buildings Aerospace Welders drills hoists cutters for aircraft and runway construction Defense Loaders rollers CNC machines for vehicles weapons and bases Annual Market 500B globally growing with automation and sustainability demands Core Technologies 1 Digital Twins A digital twin is a virtual replica of a physical machine 

In [None]:
# 5) Chunk into ~800-char slices
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=80)
chunks = splitter.split_text(cleaned)
content_ds = Dataset.from_list([{"text": c} for c in chunks])

In [None]:
# 1) Load tokenizer & base model in FP16
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    "EleutherAI/gpt-neo-1.3B",
    device_map="auto",
    torch_dtype=torch.float16,   # 16-bit precision
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/5.31G [00:00<?, ?B/s]

In [None]:
# 2) Prepare model for LoRA:
#    - disable caching for checkpointing compatibility
#    - enable gradient checkpointing
#    - freeze all base-model parameters
model.config.use_cache = False
model.gradient_checkpointing_enable()
for param in model.parameters():
    param.requires_grad = False

In [None]:
# 3) Inject LoRA adapters (only these will train)
lora_cfg = LoraConfig(
    task_type="CAUSAL_LM",
    inference_mode=False,
    r=16,               # LoRA rank
    lora_alpha=32,
    lora_dropout=0.1,
)
model = get_peft_model(model, lora_cfg)

In [None]:
# Verify that only LoRA params are trainable
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total     = sum(p.numel() for p in model.parameters())
print(f"Trainable params: {trainable:,} / {total:,}")

Trainable params: 3,145,728 / 1,318,721,536


In [None]:
def tokenize_and_label(ex):
    tok = tokenizer(
        ex["text"],
        padding="max_length",
        truncation=True,
        max_length=512,
    )
    # For causal LM, labels = copy of input_ids
    tok["labels"] = tok["input_ids"].copy()
    return tok

tok_instr   = instr_ds.map(tokenize_and_label, batched=True, remove_columns=["text"])
tok_content = content_ds.map(tokenize_and_label, batched=True, remove_columns=["text"])

Map:   0%|          | 0/1405 [00:00<?, ? examples/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

In [None]:
train_dataset = concatenate_datasets([tok_instr, tok_content])

  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


In [None]:
from transformers import DataCollatorForLanguageModeling
# 5) Use a data collator that handles causal LM labels
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)


In [None]:
# 5) Training arguments (FP16 + no eval split)
training_args = TrainingArguments(
    output_dir="./gpt_neo_fp16_lora",
    num_train_epochs=6,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    fp16=True,
    optim="adamw_torch",
    learning_rate=2e-4,
    weight_decay=0.01,
    max_grad_norm=1.0,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    eval_strategy="no",
    save_steps=200,
    save_total_limit=3,
    logging_steps=50,
    report_to="none",
    load_best_model_at_end=False,
)

In [None]:
# 6) Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
)


In [None]:
torch.cuda.empty_cache()
gc.collect()

338

In [None]:
# 7) Train
trainer.train()
# 8) Save LoRA adapters & tokenizer
model.save_pretrained("./gpt_neo_fp16_lora")
tokenizer.save_pretrained("./gpt_neo_fp16_lora")
print("✅ FP16 + LoRA fine-tuning complete.")

{'loss': 4.6137, 'grad_norm': 5.114451885223389, 'learning_rate': 8.22429906542056e-05, 'epoch': 0.2820874471086037}
{'loss': 0.2688, 'grad_norm': 0.055342692881822586, 'learning_rate': 0.00017570093457943927, 'epoch': 0.5641748942172073}
{'loss': 0.2059, 'grad_norm': 0.0565318800508976, 'learning_rate': 0.0001992693710678105, 'epoch': 0.846262341325811}
{'loss': 0.1599, 'grad_norm': 0.08283542096614838, 'learning_rate': 0.00019598272076749046, 'epoch': 1.1241184767277856}
{'loss': 0.1456, 'grad_norm': 0.09747800976037979, 'learning_rate': 0.00019013737330432506, 'epoch': 1.4062059238363893}
{'loss': 0.1183, 'grad_norm': 0.07924995571374893, 'learning_rate': 0.0001818891533293583, 'epoch': 1.688293370944993}
{'loss': 0.1095, 'grad_norm': 0.08739066123962402, 'learning_rate': 0.00017145794101351794, 'epoch': 1.9703808180535967}
{'loss': 0.1065, 'grad_norm': 0.09350455552339554, 'learning_rate': 0.00015912181050540245, 'epoch': 2.2482369534555713}
{'loss': 0.0999, 'grad_norm': 0.11189005

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig

# 1) Paths
MODEL_DIR = "./gpt_neo_fp16_lora"

In [None]:
# 2) Load PEFT config & base model
peft_config = PeftConfig.from_pretrained(MODEL_DIR)
base_model = AutoModelForCausalLM.from_pretrained(
    peft_config.base_model_name_or_path,
    device_map="auto",
    torch_dtype=torch.float16,
)

In [None]:
# 3) Wrap with LoRA adapters
model = PeftModel.from_pretrained(base_model, MODEL_DIR)
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPTNeoForCausalLM(
      (transformer): GPTNeoModel(
        (wte): Embedding(50257, 2048)
        (wpe): Embedding(2048, 2048)
        (drop): Dropout(p=0.0, inplace=False)
        (h): ModuleList(
          (0-23): 24 x GPTNeoBlock(
            (ln_1): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
            (attn): GPTNeoAttention(
              (attention): GPTNeoSelfAttention(
                (attn_dropout): Dropout(p=0.0, inplace=False)
                (resid_dropout): Dropout(p=0.0, inplace=False)
                (k_proj): Linear(in_features=2048, out_features=2048, bias=False)
                (v_proj): lora.Linear(
                  (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.1, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_f

In [None]:
# 4) Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [None]:
# 5) Generation function
def generate_response(query: str, max_new_tokens=100) -> str:
    prompt = f"User: {query}\nAssistant:"
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=512,
    ).to(model.device)
    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            no_repeat_ngram_size=2,
            top_p=0.9,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id,
        )
    text = tokenizer.decode(out[0], skip_special_tokens=True)
    return text.split("Assistant:")[-1].strip()


In [None]:

# 6) Example usage
if __name__ == "__main__":
    queries = [
        "Hi! How you doing?",
    ]
    for q in queries:
        print(f"> {q}\n{generate_response(q)}\n")

    # Or enter an interactive loop:
    print("Enter 'exit' to quit.")
    while True:
        q = input("You: ").strip()
        if q.lower() in {"exit","quit"}:
            break
        print("Bot:", generate_response(q))

> Hi! How you doing?
I’m the Industrial Machinery & Equipment Agent, your gear-tech buddy.

Enter 'exit' to quit.


KeyboardInterrupt: Interrupted by user