# Required Imports & Configuration

In [None]:
import csv
import ast
import pandas as pd
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import json
import torch
from datetime import datetime
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
)
from peft import LoraConfig, get_peft_model
from evaluate import load as load_metric
from transformers import TrainerCallback


class PrintStepCallback(TrainerCallback):
    def on_step_end(self, args, state, control, **kwargs):
        if state.global_step % 10 == 0:
            loss = state.log_history[-1].get('loss', 'N/A') if state.log_history else 'N/A'
            print(f"Step {state.global_step}, Loss: {loss}")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
target_device = ''
if torch.backends.mps.is_available():
    target_device = 'mps'
elif torch .cuda.is_available():
    target_device = 'cuda'
else:
    target_device = 'cpu'

DEVICE = torch.device(target_device)
print("Using DEVICE:", DEVICE)

Using DEVICE: cuda


# Constants/Hyperparameters

In [None]:
# For the incoming .json dataset
TITLE = 'title'
DESCRIPTION = 'description'
SUMMARY = 'summary'
DATA_SLICE = None

# Model hyperparameters
MAX_LENGTH = 512        # len
MAX_TEXT_TOKENS = 350   # tt

MAX_NEW_TOKENS = 1000   # nt
EPOCHS = 30             # ep

OUTPUT_DIR = f'./qwen_len{MAX_LENGTH}_tt{MAX_TEXT_TOKENS}_nt{MAX_NEW_TOKENS}_ep{EPOCHS}'

# Utility Functions

In [4]:
def clean_row_text(s):
    lst = ast.literal_eval(s)
    json_str = json.dumps(lst)
    obj = json.loads(json_str)
    return obj[0]['text']

In [5]:
def truncate_text(text, tokenizer, max_tokens):
    tokens = tokenizer.encode(text, add_special_tokens=False)
    if len(tokens) > max_tokens:
        tokens = tokens[:max_tokens]
        text = tokenizer.decode(tokens, skip_special_tokens=True)
    return text

In [6]:
def preprocess(batch, tokenizer):
    input_ids_list = []
    attention_list = []
    labels_list = []

    for summary, description in zip(batch[SUMMARY], batch[DESCRIPTION]):
        summary = truncate_text(summary, tokenizer, MAX_TEXT_TOKENS)
        
        # prompt = f"Summarize this patent:\n\n{text}\n\nSummary: "
        prompt = f"Generate a full detailed patent document based on this summary: \n\n{summary}\n\n Patent Document:"
        target = description + tokenizer.eos_token
        full_text = prompt + target

        target_ids = tokenizer.encode(target, add_special_tokens=False)
        target_len = len(target_ids)

        tokenized = tokenizer(
            full_text,
            truncation=True,
            max_length=MAX_LENGTH,
            padding="max_length",
            add_special_tokens=True,
        )

        input_ids = tokenized["input_ids"]
        attention_mask = tokenized["attention_mask"]
        seq_len = sum(attention_mask)

        labels = [-100] * MAX_LENGTH
        target_start = seq_len - target_len
        
        for i in range(target_len):
            pos = target_start + i
            if 0 <= pos < MAX_LENGTH:
                labels[pos] = input_ids[pos]

        input_ids_list.append(input_ids)
        attention_list.append(attention_mask)
        labels_list.append(labels)

    return {
        "input_ids": input_ids_list,
        "attention_mask": attention_list,
        "labels": labels_list
    }

In [7]:
def generate_summary(mdl, text, tokenizer):
    text = truncate_text(text, tokenizer, MAX_TEXT_TOKENS)
    # prompt = f"Summarize this patent:\n\n{text}\n\nSummary:"
    prompt = f"Generate a full patent document based on this summary: \n\n{text}\n\n Patent Document:"
    inputs = tokenizer(prompt, return_tensors="pt")#.to("cuda")
    inputs = {k: v.to(DEVICE) for k, v in inputs.items()}  # move to DEVICE (CPU or GPU)

    mdl.eval()
    with torch.no_grad():
        output = mdl.generate(
            **inputs,
            max_new_tokens=MAX_NEW_TOKENS,
            do_sample=False,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    
    full_output = tokenizer.decode(output[0], skip_special_tokens=True)
    if "Patent Document:" in full_output:
        return full_output.split("Patent Document:")[-1].strip()
    return full_output

In [8]:
def collate_fn(batch):
    return {
        "input_ids": torch.tensor([x["input_ids"] for x in batch], dtype=torch.long),
        "attention_mask": torch.tensor([x["attention_mask"] for x in batch], dtype=torch.long),
        "labels": torch.tensor([x["labels"] for x in batch], dtype=torch.long),
    }

# Training Data

In [9]:
# ================================================================
# 0. Prepare Data
# ================================================================
FILE_NAME = 'us_smallest_claims_1985_1990_top500'
CSV_PATH = './' + FILE_NAME + '.csv'
JSON_PATH = './' + FILE_NAME + '.json'

df = pd.read_csv(CSV_PATH)
df.drop(columns=['n_claims', 'publication_number', 'publication_date', 'claims_localized_html'], inplace=True)
df = df.map(clean_row_text)
df.to_json(JSON_PATH, orient='records')

In [10]:
# ================================================================
# 1. Load Data
# ================================================================
# DATA_PATH = "./us_smallest_claims_1985_1990_top500.json"
DATA_PATH = "./batch_summarized.json"

with open(DATA_PATH, "r", encoding="utf-8") as f:
    data = json.load(f)

if DATA_SLICE:
    data = data[:DATA_SLICE]

print(f"Loaded {len(data)} patents with real summaries")

Loaded 500 patents with real summaries


In [11]:
records = []
for i, item in enumerate(data):
    if SUMMARY in item and DESCRIPTION in item:
        records.append({
            DESCRIPTION: item[DESCRIPTION],
            TITLE: item[TITLE],
            SUMMARY: item[SUMMARY]
        })
        if i < 3:
            print(f"\nExample {i+1}: {item[TITLE]}")
            print(f"  Summary: {item[SUMMARY][:150]}...")

print(f"\nTotal records: {len(records)}")


Example 1: Vehicular turn signal apparatus
  Summary: Vehicular turn signal apparatus is provided for securement to a rear shelf surface proximate a rear window of a vehicular interior. The apparatus incl...

Example 2: Apparatus for facilitating the machining of workpieces
  Summary: Apparatus for facilitating the machining of workpieces. Means to gain access to the cutting edge of a saw tooth in the direction of both the back and ...

Example 3: Control valve
  Summary: A 3/2 proportional control valve is provided with an actuating piston which is subjected to the control pressure set in a pilot valve. The area relati...

Total records: 500


# Model Loading, Fine-Tuning, and Evaluation

In [12]:
# ================================================================
# 2. Load Model/Tokenizer
# ================================================================
model_name = "Qwen/Qwen3-0.6B-Base"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    # use bfloat16 only if CUDA is available; otherwise use float32
    torch_dtype=(torch.bfloat16 if torch.cuda.is_available() else torch.float32),
)
model.to(DEVICE)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

`torch_dtype` is deprecated! Use `dtype` instead!


In [13]:
# ================================================================
# 3. Create Dataset
# ================================================================

dataset = Dataset.from_list(records)
dataset = dataset.train_test_split(test_size=0.2, seed=42)
print(f"\nTrain: {len(dataset['train'])}, Test: {len(dataset['test'])}")
print(dataset['test'][0])


Train: 400, Test: 100
{'description': 'A protective trim system is provided which protects the surface of an underlying solid body. The system includes an elongated trim member which is attached to a surface and is movable away from the surface to protect the underlying body.', 'title': 'Extendable protective trim', 'summary': 'A protective trim system is provided which protects the surface of an underlying solid body. System includes an elongated trim member which is attached to a surface and is movable away from the surface to protect the underlying body. The system also comprises a deflatable bladder disposed between said mountingflange and said trim member.'}


In [14]:
# ================================================================
# 4. Preprocessing
# ================================================================
tokenized_train = dataset["train"].map(
    lambda batch: preprocess(batch, tokenizer),
    batched=True,
    remove_columns=dataset["train"].column_names
)

example = tokenized_train[0]
valid_count = sum(1 for l in example["labels"] if l != -100)
print(f"Valid label tokens: {valid_count}")

Map: 100%|██████████| 400/400 [00:00<00:00, 946.44 examples/s]

Valid label tokens: 105





In [15]:
# ================================================================
# 5. Baseline Evaluation
# ================================================================
rouge = load_metric("rouge")

# test_refs = [item[SUMMARY] for item in dataset["test"]]
test_refs = [item[DESCRIPTION] for item in dataset["test"]]

print("\nBaseline evaluation...")
baseline_preds = []
for item in dataset["test"]:
    pred = generate_summary(model, item[DESCRIPTION], tokenizer)
    baseline_preds.append(pred)

baseline_rouge = rouge.compute(predictions=baseline_preds, references=test_refs)
print(f"Baseline ROUGE-L: {baseline_rouge['rougeL']:.4f}")


Baseline evaluation...
Baseline ROUGE-L: 0.3224


In [None]:
# ================================================================
# 6. LoRA Fine-tuning
# ================================================================
config = LoraConfig(
    r=64,
    lora_alpha=64,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
model.print_trainable_parameters()

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=1e-4,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    weight_decay=0.01,
    bf16=torch.cuda.is_available(),               # enable bf16 only on CUDA
    dataloader_pin_memory=torch.cuda.is_available(),  # only pin when CUDA available
    logging_steps=10,
    save_steps=999999, # keep this high
    report_to="none",
    disable_tqdm=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    data_collator=collate_fn,
    callbacks=[PrintStepCallback()]
)

print("\n=== Training ===")
trainer.train()

trainable params: 40,370,176 || all params: 636,420,096 || trainable%: 6.3433

=== Training ===


Step,Training Loss
10,1.77
20,1.6944
30,1.653
40,1.5664
50,1.5187
60,1.396
70,1.595
80,1.5726
90,1.5656
100,1.5069


Step 10, Loss: N/A
Step 20, Loss: 1.77
Step 30, Loss: 1.6944
Step 40, Loss: 1.653
Step 50, Loss: 1.5664
Step 60, Loss: 1.5187
Step 70, Loss: 1.396
Step 80, Loss: 1.595
Step 90, Loss: 1.5726
Step 100, Loss: 1.5656
Step 110, Loss: 1.5069
Step 120, Loss: 1.4955
Step 130, Loss: 1.3848
Step 140, Loss: 1.3771
Step 150, Loss: 1.3423
Step 160, Loss: 1.4413
Step 170, Loss: 1.1558
Step 180, Loss: 1.2409
Step 190, Loss: 1.1575
Step 200, Loss: 1.1954
Step 210, Loss: 1.2541
Step 220, Loss: 0.9181
Step 230, Loss: 0.9552
Step 240, Loss: 1.0028
Step 250, Loss: 0.9745
Step 260, Loss: 0.9691
Step 270, Loss: 0.7359
Step 280, Loss: 0.7606
Step 290, Loss: 0.6723
Step 300, Loss: 0.7651
Step 310, Loss: 0.7136
Step 320, Loss: 0.5364
Step 330, Loss: 0.5054
Step 340, Loss: 0.4926
Step 350, Loss: 0.5197
Step 360, Loss: 0.5288
Step 370, Loss: 0.3404
Step 380, Loss: 0.3584
Step 390, Loss: 0.3207
Step 400, Loss: 0.3291
Step 410, Loss: 0.3824
Step 420, Loss: 0.2365
Step 430, Loss: 0.1862
Step 440, Loss: 0.2134
Step 

TrainOutput(global_step=1500, training_loss=0.30038514609324435, metrics={'train_runtime': 2023.0427, 'train_samples_per_second': 5.932, 'train_steps_per_second': 0.741, 'total_flos': 1.7725598466048e+16, 'train_loss': 0.30038514609324435, 'epoch': 30.0})

In [17]:
# ================================================================
# 7. Final Evaluation
# ================================================================
print("\nFinal evaluation...")
finetuned_preds = []

model.eval()
for item in dataset["test"]:
    pred = generate_summary(model, item[DESCRIPTION], tokenizer)
    finetuned_preds.append(pred)

finetuned_rouge = rouge.compute(predictions=finetuned_preds, references=test_refs)


Final evaluation...


In [18]:
# ================================================================
# 8. SAVE MODEL PROPERLY
# ================================================================
print("\n" + "="*70)
print("SAVING MODEL")
print("="*70)

# Save LoRA adapters
model.save_pretrained("./qwen_lora_patent_real")
print("✓ LoRA adapters saved")

# Save tokenizer
tokenizer.save_pretrained("./qwen_lora_patent_real")
print("✓ Tokenizer saved")

# Save comprehensive metadata
metadata = {
    "model_info": {
        "base_model": model_name,
        "model_type": "LoRA_fine-tuned",
        "task": "patent_summarization"
    },
    "training_info": {
        "training_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "dataset": DATA_PATH,
        "num_train_examples": len(dataset["train"]),
        "num_test_examples": len(dataset["test"]),
        "num_epochs": EPOCHS,
        "batch_size": 2,
        "gradient_accumulation_steps": 4,
        "effective_batch_size": 8,
        "learning_rate": 1e-4,
        "max_length": MAX_LENGTH,
        "max_text_tokens": MAX_TEXT_TOKENS
    },
    "lora_config": {
        "r": 32,
        "lora_alpha": 64,
        "lora_dropout": 0.05,
        "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
    },
    "results": {
        "baseline": {
            "rouge1": float(baseline_rouge['rouge1']),
            "rouge2": float(baseline_rouge['rouge2']),
            "rougeL": float(baseline_rouge['rougeL'])
        },
        "finetuned": {
            "rouge1": float(finetuned_rouge['rouge1']),
            "rouge2": float(finetuned_rouge['rouge2']),
            "rougeL": float(finetuned_rouge['rougeL'])
        },
        "improvement": {
            "rouge1": float(finetuned_rouge['rouge1'] - baseline_rouge['rouge1']),
            "rouge2": float(finetuned_rouge['rouge2'] - baseline_rouge['rouge2']),
            "rougeL": float(finetuned_rouge['rougeL'] - baseline_rouge['rougeL'])
        }
    }
}

with open("./qwen_lora_patent_real/metadata.json", "w") as f:
    json.dump(metadata, f, indent=2)
print("✓ Metadata saved")

# Save sample predictions for reference
samples = []
for i in range(min(5, len(test_refs))):
    samples.append({
        TITLE: dataset['test'][i][TITLE],
        "reference": test_refs[i],
        "baseline": baseline_preds[i],
        "finetuned": finetuned_preds[i]
    })

with open("./qwen_lora_patent_real/sample_predictions.json", "w") as f:
    json.dump(samples, f, indent=2, ensure_ascii=False)
print("✓ Sample predictions saved")

print(f"\n✓ Complete model package saved to: ./qwen_lora_patent_real/")
print("\nSaved files:")
print("  - adapter_model.bin (~13MB)")
print("  - adapter_config.json")
print("  - tokenizer files")
print("  - metadata.json")
print("  - sample_predictions.json")


SAVING MODEL
✓ LoRA adapters saved
✓ Tokenizer saved
✓ Metadata saved
✓ Sample predictions saved

✓ Complete model package saved to: ./qwen_lora_patent_real/

Saved files:
  - adapter_model.bin (~13MB)
  - adapter_config.json
  - tokenizer files
  - metadata.json
  - sample_predictions.json


In [19]:
# ================================================================
# 9. Display Results
# ================================================================
print("\n" + "="*70)
print("FINAL RESULTS")
print("="*70)
print(f"Baseline ROUGE-L:   {baseline_rouge['rougeL']:.4f}")
print(f"Fine-tuned ROUGE-L: {finetuned_rouge['rougeL']:.4f}")
delta = finetuned_rouge['rougeL'] - baseline_rouge['rougeL']
print(f"Change: {'+' if delta >= 0 else ''}{delta:.4f} ({delta*100:+.2f}%)")

print(f"\nDetailed scores:")
print(f"  ROUGE-1: {baseline_rouge['rouge1']:.4f} -> {finetuned_rouge['rouge1']:.4f}")
print(f"  ROUGE-2: {baseline_rouge['rouge2']:.4f} -> {finetuned_rouge['rouge2']:.4f}")
print(f"  ROUGE-L: {baseline_rouge['rougeL']:.4f} -> {finetuned_rouge['rougeL']:.4f}")

print("\n=== Sample Comparisons ===")
for i in range(min(3, len(test_refs))):
    print(f"\n--- {dataset['test'][i][TITLE]} ---")
    print(f"Reference:  {test_refs[i][:]}")
    print(f"Baseline:   {baseline_preds[i][:]}")
    print(f"Fine-tuned: {finetuned_preds[i][:]}")

print("\n" + "="*70)
print("TRAINING COMPLETE!")
print("="*70)


FINAL RESULTS
Baseline ROUGE-L:   0.3224
Fine-tuned ROUGE-L: 0.7598
Change: +0.4374 (+43.74%)

Detailed scores:
  ROUGE-1: 0.3380 -> 0.7890
  ROUGE-2: 0.3064 -> 0.7346
  ROUGE-L: 0.3224 -> 0.7598

=== Sample Comparisons ===

--- Extendable protective trim ---
Reference:  A protective trim system is provided which protects the surface of an underlying solid body. The system includes an elongated trim member which is attached to a surface and is movable away from the surface to protect the underlying body.
Baseline:   [Patent Document]

[Abstract] 

A protective trim system is provided which protects the surface of an underlying solid body. The system includes an elongated trim member which is attached to a surface and is movable away from the surface to protect the underlying body. The trim member is designed to be easily removable and can be used to cover or protect various surfaces, such as windows, doors, and other exposed areas. The system is easy to install and can be used in a va

# Load and Prompt Existing Model

In [20]:
PROMPT_PATH = "./prompt-construction/patent_prompts.json"

with open(PROMPT_PATH, "r", encoding="utf-8") as f:
    prompts = json.load(f)

prompts = prompts[:10]

print(f"Loaded {len(prompts)} prompts for our model!")

Loaded 10 prompts for our model!


In [21]:
# 1. Load the base model
from peft import PeftModel

base_model_name = "Qwen/Qwen3-0.6B-Base"
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    trust_remote_code=True,
    torch_dtype=torch.float32,  # or torch.bfloat16 if CUDA available
)

# 2. Load LoRA adapters on top of base model
model = PeftModel.from_pretrained(model, "./qwen_lora_patent_real")

# 3. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("./qwen_lora_patent_real", trust_remote_code=True)

# 4. Move to device
model.to(DEVICE)
model.eval()

# Test
patents = []
for prompt in prompts:
    prompt += "\n\n Patent Document:"
    patents.append(generate_summary(model, prompt, tokenizer))

In [22]:
for patent in patents:
    print(patent)

Vehicular turn signal apparatus is provided for securement to a rear shelf surface proximate a rear window of a vehicular interior in a manner similar to that for securement to a rear window of a vehicle port light unit. The apparatus includes a plurality of modules each containing a pivotally mounted bulb positionable from a first vertical position to a second horizontal position such that when the vehicle enters a traffic cycle the modules are positioned along the length of the rear shelf surface with the bulbs being positioned at predetermined spaced intervals thereafter providing simultaneousaneous illumination upon exit from the traffic cycle.
Apparatus for facilitating the machining of workpieces, particularly in the context of grinding a cutting edge of a saw tooth in the direction of both the back and the front face of a tooth in a single grinding process, comprises a grinding tool having a multi-tooth arrangement and gaining access to the cutting edge of a saw tooth in the dir

In [None]:
# If you leave this running in the background, turn your speakers on (only works locally)

# import platform
# import os

# if platform.system() == "Darwin":
#     while True:
#         os.system("afplay /System/Library/Sounds/Glass.aiff")
# elif platform.system() == "Windows":
#     import winsound
#     duration = 1000  # milliseconds
#     freq = 1000  # Hz
#     while True:
#         winsound.Beep(freq, duration)

KeyboardInterrupt: 