## Finetuning Flan T5 XL for lease agreement data extraction

### Install dependencies

In [1]:
%pip install pandas numpy peft scikit-learn transformers datasets torch accelerate bitsandbytes huggingface-hub -q

Note: you may need to restart the kernel to use updated packages.


### Define the extraction JSON schema

In [2]:
import json

extraction_json_schema = {
  "title": "ExtractedLeaseData",
  "type": "object",
  "properties": {
    "tenant_name": {
      "type": ["string", "null"],
      "description": "The name of the tenant, found in the OCR text."
    },
    "unit_address": {
      "type": ["string", "null"],
      "description": "The unit address found in the OCR text."
    },
    "unit_number": {
      "type": ["string", "null"],
      "description": "The unit number found in the OCR text."
    },
    "unit_type": {
      "type": ["string", "null"],
      "description": "The unit type found in the OCR text."
    },
    "agreement_date": {
      "type": ["string", "null"],
      "format": "date"
    },
    "lease_start": {
      "type": ["string", "null"],
      "format": "date",
      "description": "The date when the lease starts, found in the OCR text."
    },
    "lease_end": {
      "type": ["string", "null"],
      "format": "date",
      "description": "The date when the lease ends, found in the OCR text."
    },
    "lease_auto_renew": {
      "type": ["string", "null"],
      "description": "The type of lease auto renewal, found in the OCR text."
    },
    "hourly_rate": {
      "type": ["number", "null"],
      "description": "The hourly rate found in the OCR text."
    },
    "monthly_rent": {
      "type": ["number", "null"],
      "description": "The monthly rent found in the OCR text."
    },
    "prorated_rent": {
      "type": ["number", "null"],
      "description": "The prorated rent found in the OCR text."
    },
    "security_deposit": {
      "type": ["number", "null"],
      "description": "The security deposit found in the OCR text."
    },
    "lease_rent": {
      "type": ["number", "null"],
      "description": "The security deposit found in the OCR text."
    },
    "monthly_payment_breakdown": {
      "type": ["object", "null"],
      "description": "The monthly payment breakdown data found in the OCR text.",
      "additionalProperties": {}
    },
    "utility_charges": {
      "type": ["object", "null"],
      "description": "The utility charges found in the OCR text. This is a dictionary with utility charges as the key, and their price as the value.",
      "additionalProperties": {
        "type": ["number", "null"]
      }
    }
  },
  "required": ["tenant_name", "unit_address", "unit_number", "unit_type", "agreement_date", "lease_start", "lease_end", "lease_auto_renew", "hourly_rate", "monthly_rent", "prorated_rent", "security_deposit", "lease_rent", "monthly_payment_breakdown", "utility_charges"]
}

extraction_json_schema_str = json.dumps(extraction_json_schema, indent=2)

### Build the Dataset
Build the dataset from the collected data in the Sqlite3 database

In [3]:
import pandas as pd
import sqlite3

# Path to the SQLite database
db_path = "../output/extracted_lease_agreements.db"

# Connect to the SQLite database
conn = sqlite3.connect(db_path)

# Query to select all data from the extracted_data table
query = "SELECT * FROM extracted_data"

# Read the data into a DataFrame
df = pd.read_sql_query(query, conn, index_col="id")

# Close the database connection
conn.close()

df

Unnamed: 0_level_0,extracted_text,extracted_fields
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,"22.PARKING. We may regulate the time, manner, ...","{""tenant_name"": null, ""unit_address"": null, ""u..."
2,38.MISCELLANEOUS.\nA. Exercising one remedy wo...,"{""tenant_name"": null, ""unit_address"": null, ""u..."
3,"You'll pay for all other utilities, related de...","{""tenant_name"": null, ""unit_address"": null, ""u..."
4,Disposition or Sale. Except for animals and pr...,"{""tenant_name"": null, ""unit_address"": null, ""u..."
5,dupusit: or 2) return the remaining portion (i...,"{""tenant_name"": ""Comunque Bolas"", ""unit_addres..."
...,...,...
651,L HOUSING OPPORTUNITY\nANIMAL ADDENDUM Becomes...,"{""tenant_name"": ""Novy Jezkova"", ""unit_address""..."
652,POURL HOUSIMO OPPORTUNITY\nADDITIONAL SPECIAL ...,"{""tenant_name"": ""Novy Jezkova"", ""unit_address""..."
653,26.CONDITION OF THE PREMISES AND ALTERATIONS. ...,"{""tenant_name"": null, ""unit_address"": null, ""u..."
654,"Severability, Originals and Attachments, and S...","{""tenant_name"": ""Novy Jezkova"", ""unit_address""..."


#### Preprocess the dataset
We need to add in missing fields in the dataset

In [4]:
# Function to ensure all required fields are present in the extracted_fields column
def ensure_all_fields(extracted_fields, required_fields):
    # Convert the extracted_fields string to a dictionary
    extracted_data = json.loads(extracted_fields)
    
    # Check and add any missing fields with a value of None
    for field in required_fields:
        if field not in extracted_data:
            extracted_data[field] = None
    
    # Convert the dictionary back to a JSON string
    return json.dumps(extracted_data)

# List of required fields from the extraction_json_schema
extraction_fields = list(extraction_json_schema['properties'].keys())

# Apply the function to each row in the extracted_fields column
df['extracted_fields'] = df['extracted_fields'].apply(ensure_all_fields, required_fields=extraction_fields)

# Display the updated DataFrame
df

Unnamed: 0_level_0,extracted_text,extracted_fields
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,"22.PARKING. We may regulate the time, manner, ...","{""tenant_name"": null, ""unit_address"": null, ""u..."
2,38.MISCELLANEOUS.\nA. Exercising one remedy wo...,"{""tenant_name"": null, ""unit_address"": null, ""u..."
3,"You'll pay for all other utilities, related de...","{""tenant_name"": null, ""unit_address"": null, ""u..."
4,Disposition or Sale. Except for animals and pr...,"{""tenant_name"": null, ""unit_address"": null, ""u..."
5,dupusit: or 2) return the remaining portion (i...,"{""tenant_name"": ""Comunque Bolas"", ""unit_addres..."
...,...,...
651,L HOUSING OPPORTUNITY\nANIMAL ADDENDUM Becomes...,"{""tenant_name"": ""Novy Jezkova"", ""unit_address""..."
652,POURL HOUSIMO OPPORTUNITY\nADDITIONAL SPECIAL ...,"{""tenant_name"": ""Novy Jezkova"", ""unit_address""..."
653,26.CONDITION OF THE PREMISES AND ALTERATIONS. ...,"{""tenant_name"": null, ""unit_address"": null, ""u..."
654,"Severability, Originals and Attachments, and S...","{""tenant_name"": ""Novy Jezkova"", ""unit_address""..."


### Split the dataset

In [5]:
from sklearn.model_selection import train_test_split

# First split: 80% for training, 20% for temp (which will be split into eval and test)
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42)

# Second split: 50% of temp for eval and 50% for test (10% of the original data each)
eval_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Check the sizes of the splits
print(f"Training set size: {len(train_df)}")
print(f"Evaluation set size: {len(eval_df)}")
print(f"Test set size: {len(test_df)}")


Training set size: 524
Evaluation set size: 65
Test set size: 66


### Load the base model

In [6]:
from peft import LoraConfig, TaskType

peft_config = LoraConfig(
    lora_alpha=16, # Higher alpha to match larger model capacity
    lora_dropout=0.1, # Consistent dropout rate to prevent overfitting
    r=8, # Rank, kept the same for balance between performance and efficiency
    task_type=TaskType.SEQ_2_SEQ_LM, # Change to SEQ_2_SEQ_LM for seq2seq models
    bias="none", # Keeping bias as none, similar to your original setup
    target_modules=[
        'q', 'v', 'k', 'o', # Attention layers (query, value, key, output projections)
        'wi', 'wo', # Feedforward layers (input, output projections)
        'wq', 'wv', 'wk', 'wo', # Additional T5-specific projection layers
    ], # Target modules relevant to T5 architecture
)


In [7]:

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, BitsAndBytesConfig
from huggingface_hub import notebook_login
from peft import get_peft_model, prepare_model_for_kbit_training
import torch

# login to access gated model
notebook_login()

# Load the tokenizer and model
model_id = "google/flan-t5-xl"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

# empty GPU memory
torch.cuda.empty_cache()

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

model = AutoModelForSeq2SeqLM.from_pretrained(model_id, quantization_config=bnb_config)

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…



`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 12,189,696 || all params: 2,861,946,880 || trainable%: 0.4259


### Build the datasets

In [8]:
prompt_template = """
### TASK:
You are a specialized model for extracting specific information from lease agreement text. Your goal is to accurately extract data fields from the provided OCR text of a lease agreement. Additionally, correct any obvious OCR errors you encounter during extraction.

### INPUT TEXT:
Below is the OCR text extracted from a lease agreement. Carefully analyze this text, and extract the relevant data fields.

OCR Text:
```
{extracted_text}
```

### RESPONSE FORMAT:
Return the extracted data as a JSON object, adhering strictly to the following JSON schema:

```json
{extraction_json_schema_str}
```
"""

In [9]:
from datasets import Dataset

def preprocess_function(examples):
    inputs = [prompt_template.format(extracted_text=extracted_text, extraction_json_schema_str=extraction_json_schema_str) for extracted_text in examples["extracted_text"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True)

    # The "labels" are the tokenized outputs:
    labels = tokenizer(
        text_target=examples["extracted_fields"], 
        max_length=512,         
        truncation=True
    )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Create datasets with the extracted text and labels
train_dataset = Dataset.from_pandas(train_df)
tokenized_train_dataset = train_dataset.map(preprocess_function, batched=True)
eval_dataset = Dataset.from_pandas(eval_df)
tokenized_eval_dataset = eval_dataset.map(preprocess_function, batched=True)
test_dataset = Dataset.from_pandas(test_df)
tokenized_test_dataset = test_dataset.map(preprocess_function, batched=True)

train_dataset


Map:   0%|          | 0/524 [00:00<?, ? examples/s]

Map:   0%|          | 0/65 [00:00<?, ? examples/s]

Map:   0%|          | 0/66 [00:00<?, ? examples/s]

Dataset({
    features: ['extracted_text', 'extracted_fields', 'id'],
    num_rows: 524
})

### Train (fine tune) the base model

In [10]:
from transformers import Seq2SeqTrainingArguments, DataCollatorForSeq2Seq, Seq2SeqTrainer

training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    eval_strategy="steps",  # Changed to evaluate based on steps
    eval_steps=10,  # Evaluate every 10 steps
    learning_rate=1e-3,
    lr_scheduler_type="linear",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=8,
    gradient_checkpointing=True,
    logging_strategy="steps",
    logging_steps=10,
    disable_tqdm=False,  # Ensure tqdm progress bar is enabled
    max_steps = 2000, # increase max steps
    save_strategy="steps",
    save_steps=50,  # Save a checkpoint every 10 steps
    # predict_with_generate=True, # need this for ROUGE/ BLEU metrics
    optim="paged_adamw_32bit",
)

trainer = Seq2SeqTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_eval_dataset,
    args=training_args,
    data_collator=DataCollatorForSeq2Seq(
        tokenizer=tokenizer,
        model=model,
        return_tensors="pt",  # Return PyTorch tensors
    )
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

max_steps is given, it will override any value given in num_train_epochs


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss,Validation Loss
10,1.0935,0.353971
20,0.3252,0.154096
30,0.1744,0.133578
40,0.16,0.124584
50,0.1311,0.118901
60,0.1246,0.112071
70,0.1179,0.106649
80,0.1123,0.102662
90,0.0922,0.100231
100,0.0983,0.095224


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


  return fn(*args, **kwargs)


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


TrainOutput(global_step=2000, training_loss=0.02121274389605969, metrics={'train_runtime': 152588.4045, 'train_samples_per_second': 0.419, 'train_steps_per_second': 0.013, 'total_flos': 5.49744808034304e+17, 'train_loss': 0.02121274389605969, 'epoch': 122.13740458015268})

### Save the model

In [11]:
peft_model_repo_id = "aryaniyaps/finetuned_flan_t5_xl_lease_data_extraction_40_deals"

In [12]:
# Push the PEFT model (LoRA adapters) to Hugging Face Hub
trainer.model.save_pretrained("./saved_models/finetuned_flan_t5_xl_lease_data_extraction_40_deals")

trainer.model.push_to_hub(peft_model_repo_id)

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/48.9M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/aryaniyaps/finetuned_flan_t5_xl_lease_data_extraction_40_deals/commit/b5c9e46b8e7aee048c5cf39fcaa7b00a4b0b062f', commit_message='Upload model', commit_description='', oid='b5c9e46b8e7aee048c5cf39fcaa7b00a4b0b062f', pr_url=None, pr_revision=None, pr_num=None)

### Evaluate the model

In [13]:
from peft import PeftModel
 
# Load the Lora model
finetuned_model = PeftModel.from_pretrained(model, peft_model_repo_id, device_map={"":0})
finetuned_model.eval()
 
print("PEFT model loaded")

adapter_config.json:   0%|          | 0.00/702 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/48.9M [00:00<?, ?B/s]

PEFT model loaded


Let's try data extraction with a random sample

In [14]:
from random import randrange
from tabulate import tabulate

sample = test_dataset[randrange(len(test_dataset))]
 
input_ids = tokenizer(prompt_template.format(extracted_text=sample["extracted_text"], extraction_json_schema_str=extraction_json_schema_str), return_tensors="pt", truncation=True).input_ids.cuda()
with torch.no_grad():
    outputs = finetuned_model.generate(input_ids=input_ids, max_new_tokens=512)


In [15]:
prediction = tokenizer.decode(outputs[0].detach().cpu().numpy())

table_data = [[sample['extracted_text'], prediction]]

# Define table headers
headers = ["OCR text", "Extracted data (Finetuned model)"]

# Display the table
tabulate(table_data, headers=headers, tablefmt="html", showindex=False)

OCR text,Extracted data (Finetuned model)
"security deposit to: · Compensate us for your payment default; or breach of any other obligation under this Agreement, including the cost of recovering possession of the Residence, rental commissions, advertising expenses and other costs incurred because of your breach of the Agreement and the Rent and other amounts due through the end of the Agreement term, (including Rent due up through the date you vacate the Residence, Rent due through the date of judgment, and Rent due after the date of judgment through the end of the original Agreement term) and any other amount necessary to compensate us for your breach of the Agreement, minus amounts we reasonably could have avoided; . Clean the Residence at the termination of the tenancy, if not returned to us at the same level of cleanliness as received; · Remedy future defaults by you in any obligation to restore, replace or return personal property or appurtenances, exclusive of ordinary wear and tear; or · Repair damages to the Residence and Property, exclusive of ordinary wear and tear, caused by you or your Related Parties. Damage or deterioration of the Residence is not ordinary wear and tear if it could have been prevented by good maintenance practices by you, or if you failed to notify us of a maintenance issue in a timely fashion in writing so that we could prevent the damage or deterioration. You may not use the Security Deposit in lieu of last month's Rent or other amounts due under this Agreement. If we apply any portion of your Security Deposit to amounts due during the term of this Agreement, you must replenish the full amount applied within three days of our demand. If we know you intend to vacate the Residence, we will give you written notice of your right to a pre-move out inspection as required by law. This inspection allows you to identify and correct any deficiencies in the Residence to avoid Security Deposit deductions. If you notify us that you want the inspection, we will inspect the Residence (no earlier than two weeks before termination of the tenancy) and provide you with an itemized statement specifying repairs or cleaning to be made at your expense. Except as otherwise specified in this Agreement, you may make these repairs yourself, or clean the Residence yourself, before you move out to avoid these deductions from your Security Deposit. You have the right to be present during the inspection. Within 21 days after you return possession of the Residence to us, we will refund amounts due to you from the Security Deposit, plus an accounting of how we have used any portion of the Security Deposit that we have retained. If the Security Deposit is insufficient to satisfy the total charges, we will send to you an itemized bill payable on demand. Any Security Deposit refund may be paid by one check jointly payable to all Residents but delivered to only one Resident at the last known address of any Resident. The refund and deductions will be calculated without regard to who paid the Security Deposit or whose conduct resulted in any deductions. 4.4 LATE CHARGES AND DEFAULT INTEREST You will be obligated to pay to us the Late Charge if you fail to pay any amount due under this Agreement within the time specified in the Variable Lease Term section. You agree it would be impractical or extremely difficult to fix the actual damage to us and that the Late Charge is a reasonable estimate of the actual damages that the parties reasonably believe would occur as a result of late payment. In addition to the Late Charge, interest will accrue on any unpaid amount at the legal rate of ten percent (10%) per year beginning on the date on which the delinquent amount was due. Late Charges and interest due are in addition to, and not in lieu of, our other remedies. 4.5 FAILURE TO MAKE ALL PAYMENTS DUE BEFORE THE COMMENCEMENT DATE If you fail to make all payments specified in the section entitled ""Initial Amounts Due"" before the specified date, or if you fail to provide us with proof that required utilities have been transferred into your name, or if you fail to provide us with proof that required utilities have been transferred into your name, or if you fail to provide proof of renter's insurance (if required under this Agreement): · We have no obligation to give you possession of the Residence; and . We may rescind this Agreement and keep any portion of funds that you have paid (if any) necessary to 10","<pad> Security Deposit, Compensate, Remedy, Repair, Damage, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damages, Damage"


### Evaluate the model
(Using F1 score and exact matches)

In [16]:
from tqdm import tqdm

def evaluate_peft_model(sample, max_target_length=512):
    # Generate extracted data
    input_ids = torch.tensor(sample["input_ids"]).unsqueeze(0).cuda()
    with torch.no_grad():
        outputs = model.generate(input_ids=input_ids, max_new_tokens=max_target_length)
    prediction = tokenizer.decode(outputs[0].detach().cpu().numpy())
    
    # Decode eval sample
    labels = tokenizer.decode(sample['labels'])
 
    return prediction, labels

# Run predictions
predictions, references = [], []
for sample in tqdm(tokenized_test_dataset):
    prediction, reference = evaluate_peft_model(sample)
    predictions.append(prediction)
    references.append(reference)


  0%|                                                                                            | 0/66 [00:00<?, ?it/s]


  2%|█▎                                                                                  | 1/66 [00:33<35:51, 33.10s/it]


  3%|██▌                                                                                 | 2/66 [00:34<15:38, 14.66s/it]


  5%|███▊                                                                                | 3/66 [00:44<13:12, 12.57s/it]


  6%|█████                                                                               | 4/66 [01:17<21:19, 20.63s/it]


  8%|██████▎                                                                             | 5/66 [01:50<25:30, 25.08s/it]


  9%|███████▋                                                                            | 6/66 [02:23<27:45, 27.76s/it]


 11%|████████▉                                                                           | 7/66 [02:56<28:57, 29.45s/it]


 12%|██████████▏                                                                         | 8/66 [02:59<20:10, 20.87s/it]


 14%|███████████▍                                                                        | 9/66 [03:22<20:38, 21.73s/it]


 15%|████████████▌                                                                      | 10/66 [03:55<23:31, 25.21s/it]


 17%|█████████████▊                                                                     | 11/66 [04:28<25:17, 27.59s/it]


 18%|███████████████                                                                    | 12/66 [05:01<26:17, 29.21s/it]


 20%|████████████████▎                                                                  | 13/66 [05:12<20:53, 23.65s/it]


 21%|█████████████████▌                                                                 | 14/66 [05:45<22:57, 26.49s/it]


 23%|██████████████████▊                                                                | 15/66 [06:18<24:10, 28.45s/it]


 24%|████████████████████                                                               | 16/66 [06:51<24:51, 29.83s/it]


 26%|█████████████████████▍                                                             | 17/66 [07:24<25:08, 30.79s/it]


 27%|██████████████████████▋                                                            | 18/66 [07:49<23:16, 29.09s/it]


 29%|███████████████████████▉                                                           | 19/66 [07:59<18:09, 23.19s/it]


 30%|█████████████████████████▏                                                         | 20/66 [08:15<16:10, 21.09s/it]


 32%|██████████████████████████▍                                                        | 21/66 [08:18<11:50, 15.80s/it]


 33%|███████████████████████████▋                                                       | 22/66 [08:45<14:01, 19.12s/it]


 35%|████████████████████████████▉                                                      | 23/66 [09:12<15:21, 21.43s/it]


 36%|██████████████████████████████▏                                                    | 24/66 [09:19<12:00, 17.17s/it]


 38%|███████████████████████████████▍                                                   | 25/66 [09:31<10:39, 15.60s/it]


 39%|████████████████████████████████▋                                                  | 26/66 [09:32<07:26, 11.16s/it]


 41%|█████████████████████████████████▉                                                 | 27/66 [09:35<05:36,  8.64s/it]


 42%|███████████████████████████████████▏                                               | 28/66 [10:02<08:55, 14.09s/it]


 44%|████████████████████████████████████▍                                              | 29/66 [10:28<10:52, 17.63s/it]


 45%|█████████████████████████████████████▋                                             | 30/66 [10:29<07:36, 12.67s/it]


 47%|██████████████████████████████████████▉                                            | 31/66 [10:50<08:49, 15.12s/it]


 48%|████████████████████████████████████████▏                                          | 32/66 [11:22<11:35, 20.47s/it]


 50%|█████████████████████████████████████████▌                                         | 33/66 [11:24<08:05, 14.70s/it]


 52%|██████████████████████████████████████████▊                                        | 34/66 [11:57<10:45, 20.16s/it]


 53%|████████████████████████████████████████████                                       | 35/66 [12:30<12:23, 24.00s/it]


 55%|█████████████████████████████████████████████▎                                     | 36/66 [12:34<09:01, 18.04s/it]


 56%|██████████████████████████████████████████████▌                                    | 37/66 [12:35<06:17, 13.02s/it]


 58%|███████████████████████████████████████████████▊                                   | 38/66 [12:46<05:46, 12.39s/it]


 59%|█████████████████████████████████████████████████                                  | 39/66 [13:19<08:20, 18.54s/it]


 61%|██████████████████████████████████████████████████▎                                | 40/66 [13:44<08:57, 20.66s/it]


 62%|███████████████████████████████████████████████████▌                               | 41/66 [14:17<10:08, 24.35s/it]


 64%|████████████████████████████████████████████████████▊                              | 42/66 [14:27<08:01, 20.06s/it]


 65%|██████████████████████████████████████████████████████                             | 43/66 [15:00<09:10, 23.93s/it]


 67%|███████████████████████████████████████████████████████▎                           | 44/66 [15:26<08:54, 24.30s/it]


 68%|████████████████████████████████████████████████████████▌                          | 45/66 [15:27<06:05, 17.40s/it]


 70%|█████████████████████████████████████████████████████████▊                         | 46/66 [15:28<04:09, 12.45s/it]


 71%|███████████████████████████████████████████████████████████                        | 47/66 [15:31<03:02,  9.60s/it]


 73%|████████████████████████████████████████████████████████████▎                      | 48/66 [15:53<04:01, 13.44s/it]


 74%|█████████████████████████████████████████████████████████████▌                     | 49/66 [16:15<04:30, 15.92s/it]


 76%|██████████████████████████████████████████████████████████████▉                    | 50/66 [16:16<03:02, 11.40s/it]


 77%|████████████████████████████████████████████████████████████████▏                  | 51/66 [16:16<02:02,  8.18s/it]


 79%|█████████████████████████████████████████████████████████████████▍                 | 52/66 [16:17<01:24,  6.04s/it]


 80%|██████████████████████████████████████████████████████████████████▋                | 53/66 [16:50<03:03, 14.09s/it]


 82%|███████████████████████████████████████████████████████████████████▉               | 54/66 [17:03<02:44, 13.69s/it]


 83%|█████████████████████████████████████████████████████████████████████▏             | 55/66 [17:36<03:34, 19.47s/it]


 85%|██████████████████████████████████████████████████████████████████████▍            | 56/66 [18:09<03:55, 23.51s/it]


 86%|███████████████████████████████████████████████████████████████████████▋           | 57/66 [18:15<02:45, 18.38s/it]


 88%|████████████████████████████████████████████████████████████████████████▉          | 58/66 [18:25<02:05, 15.70s/it]


 89%|██████████████████████████████████████████████████████████████████████████▏        | 59/66 [18:25<01:18, 11.18s/it]


 91%|███████████████████████████████████████████████████████████████████████████▍       | 60/66 [18:51<01:33, 15.55s/it]


 92%|████████████████████████████████████████████████████████████████████████████▋      | 61/66 [19:24<01:43, 20.75s/it]


 94%|█████████████████████████████████████████████████████████████████████████████▉     | 62/66 [19:31<01:07, 16.76s/it]


 95%|███████████████████████████████████████████████████████████████████████████████▏   | 63/66 [20:04<01:04, 21.58s/it]


 97%|████████████████████████████████████████████████████████████████████████████████▍  | 64/66 [20:29<00:45, 22.61s/it]


 98%|█████████████████████████████████████████████████████████████████████████████████▋ | 65/66 [21:02<00:25, 25.70s/it]


100%|███████████████████████████████████████████████████████████████████████████████████| 66/66 [21:09<00:00, 19.88s/it]


100%|███████████████████████████████████████████████████████████████████████████████████| 66/66 [21:09<00:00, 19.23s/it]




In [17]:
from tabulate import tabulate

# Prepare data for tabulation
table_data = []
for i, (pred, ref) in enumerate(zip(predictions, references), 1):
    table_data.append([i, ref, pred])

# Define table headers
headers = ["#", "Reference data", "Extracted data (Finetuned model)"]

# Display the table
tabulate(table_data, headers=headers, tablefmt="html", showindex=False)


#,Reference data,Extracted data (Finetuned model)
1,"<unk>""tenant_name"": ""Ricquan M Moore"", ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null<unk></s>","<pad> ANIMALS, ADDITIONAL_FEES, ADDITIONAL_RENT, ADDITIONAL_FEES_OR_CHARGES, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_"
2,"<unk>""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null<unk></s>","<pad> 26.CONDITION OF THE PREMISES AND ALTERATIONS, <unk></s>"
3,"<unk>""tenant_name"": ""Samantha Putlak"", ""unit_address"": ""304 Stanhope St."", ""unit_number"": ""#2L"", ""unit_type"": null, ""agreement_date"": null, ""lease_start"": ""2023-08-01"", ""lease_end"": ""2024-07-31"", ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": 2800, ""prorated_rent"": null, ""security_deposit"": 2800, ""lease_rent"": 33600, ""monthly_payment_breakdown"": null, ""utility_charges"": null<unk></s>","<pad> TERM: 12 Months MONTHLY_RENT: $2,800 BEGINNING: 8/1/2023 MONTHLY_RENT: $2,800 SECURITY_DEPOSIT: $2,800 TERM: 12 Months MONTHLY_RENT: $2,800 BEGINNING: 8/1/2023 MONTHLY_RENT: $2,800 SECURITY_DEPOSIT: $2,800 TERM: 12 Months MONTHLY_RENT: $2,800 BEGINNING: 8/1/2023 MONTHLY_RENT: $2,800 SECURITY_DEPOSIT: $2,800</s>"
4,"<unk>""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null<unk></s>","<pad> <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>"
5,"<unk>""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null<unk></s>","<pad> Damage, Landlord, Damage, Tenant, Damage, Landlord, Damage, Tenant, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage,"
6,"<unk>""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null<unk></s>","<pad> PART_A, PART_B, PART_C, PART_D, PART_A, PART_B, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_C, PART_D, PART_A, PART_C, PART_D, PART_C, PART_D, PART_A, PART_C, PART_D, PART_C, PART_D, PART_A, PART_C, PART_D, PART_C, PART_D, PART_C, PART_D, PART_C, PART"
7,"<unk>""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null<unk></s>","<pad> <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>, <unk>"
8,"<unk>""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": ""month-to-month"", ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null<unk></s>","<pad> Lessee shall be responsible for the payment of all taxes, license fees, registration fees, insurance, and other assessments associated with the use of the Property.</s>"
9,"<unk>""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null<unk></s>","<pad> <unk> Replacements 31.REPLACEMENTS AND SUBLETTING. Replacing a resident, subletting. assignment, or granting a right or license to occupy is allowed only when we expressly consent in writing. If departing or remaining residents find a replacement resident acceptable to us before moving out and we expressly consent, in writing, to the replacement, subletting, assignment, or granting a right or any license to occupy, then (1) a reletting charge will not be due; (2) a reasonable administrative (paperwork) and/or transfer fee will be due, and a rekeying fee is due if rekeying is requested or required; and (3) the departing and remaining residents will remain liable for all lease obligations for the rest of the original lease term. Procedures for Replacement. If we approve a replacement resident, then, at our option: (1) the replacement resident must sign this Lease Contract with or without an increase in the total security deposit; or (2) the remaining and replacement residents must sign an entirely new I.case Contract. Unless we agree otherwise in writing-even if a new L.case Contract is signed. Responsibilities of Owner and Resident 32.RESPONSIBILITIES OF OWNER. We'll act with customary diligence to: (1) keep common areas reasonably clean, subject to paragraph 26 (Condition of the Premises and Alterations); (2) maintain fixtures, furniture, hot water, heating and A/C. equipment. (3) comply with applicable federal, state, and local laws regarding safety, sanitation, and fair housing; and (4) make all repairs and replacements as may be necessary.</s>"
10,"<unk>""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null<unk></s>","<pad> RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILIT"


In [18]:
# TODO: calculate accuracy using metrics