## Finetuning Gemma 2B for lease agreement data extraction

### Install dependencies

In [1]:
%pip install pandas numpy peft scikit-learn transformers datasets torch accelerate bitsandbytes huggingface-hub trl -q

Note: you may need to restart the kernel to use updated packages.


### Define the extraction JSON schema

In [2]:
import json

extraction_json_schema = {
  "title": "ExtractedLeaseData",
  "type": "object",
  "properties": {
    "tenant_name": {
      "type": ["string", "null"],
      "description": "The name of the tenant, found in the OCR text."
    },
    "unit_address": {
      "type": ["string", "null"],
      "description": "The unit address found in the OCR text."
    },
    "unit_number": {
      "type": ["string", "null"],
      "description": "The unit number found in the OCR text."
    },
    "unit_type": {
      "type": ["string", "null"],
      "description": "The unit type found in the OCR text."
    },
    "agreement_date": {
      "type": ["string", "null"],
      "format": "date"
    },
    "lease_start": {
      "type": ["string", "null"],
      "format": "date",
      "description": "The date when the lease starts, found in the OCR text."
    },
    "lease_end": {
      "type": ["string", "null"],
      "format": "date",
      "description": "The date when the lease ends, found in the OCR text."
    },
    "lease_auto_renew": {
      "type": ["string", "null"],
      "description": "The type of lease auto renewal, found in the OCR text."
    },
    "hourly_rate": {
      "type": ["number", "null"],
      "description": "The hourly rate found in the OCR text."
    },
    "monthly_rent": {
      "type": ["number", "null"],
      "description": "The monthly rent found in the OCR text."
    },
    "prorated_rent": {
      "type": ["number", "null"],
      "description": "The prorated rent found in the OCR text."
    },
    "security_deposit": {
      "type": ["number", "null"],
      "description": "The security deposit found in the OCR text."
    },
    "lease_rent": {
      "type": ["number", "null"],
      "description": "The security deposit found in the OCR text."
    },
    "monthly_payment_breakdown": {
      "type": ["object", "null"],
      "description": "The monthly payment breakdown data found in the OCR text.",
      "additionalProperties": {}
    },
    "utility_charges": {
      "type": ["object", "null"],
      "description": "The utility charges found in the OCR text. This is a dictionary with utility charges as the key, and their price as the value.",
      "additionalProperties": {
        "type": ["number", "null"]
      }
    }
  },
  "required": ["tenant_name", "unit_address", "unit_number", "unit_type", "agreement_date", "lease_start", "lease_end", "lease_auto_renew", "hourly_rate", "monthly_rent", "prorated_rent", "security_deposit", "lease_rent", "monthly_payment_breakdown", "utility_charges"]
}

extraction_json_schema_str = json.dumps(extraction_json_schema, indent=2)

### Build the Dataset
Build the dataset from the collected data in the Sqlite3 database

In [3]:
import pandas as pd
import sqlite3

# Path to the SQLite database
db_path = "../output/extracted_lease_agreements.db"

# Connect to the SQLite database
conn = sqlite3.connect(db_path)

# Query to select all data from the extracted_data table
query = "SELECT * FROM extracted_data"

# Read the data into a DataFrame
df = pd.read_sql_query(query, conn, index_col="id")

# Close the database connection
conn.close()

df

Unnamed: 0_level_0,extracted_text,extracted_fields
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,"22.PARKING. We may regulate the time, manner, ...","{""tenant_name"": null, ""unit_address"": null, ""u..."
2,38.MISCELLANEOUS.\nA. Exercising one remedy wo...,"{""tenant_name"": null, ""unit_address"": null, ""u..."
3,"You'll pay for all other utilities, related de...","{""tenant_name"": null, ""unit_address"": null, ""u..."
4,Disposition or Sale. Except for animals and pr...,"{""tenant_name"": null, ""unit_address"": null, ""u..."
5,dupusit: or 2) return the remaining portion (i...,"{""tenant_name"": ""Comunque Bolas"", ""unit_addres..."
...,...,...
651,L HOUSING OPPORTUNITY\nANIMAL ADDENDUM Becomes...,"{""tenant_name"": ""Novy Jezkova"", ""unit_address""..."
652,POURL HOUSIMO OPPORTUNITY\nADDITIONAL SPECIAL ...,"{""tenant_name"": ""Novy Jezkova"", ""unit_address""..."
653,26.CONDITION OF THE PREMISES AND ALTERATIONS. ...,"{""tenant_name"": null, ""unit_address"": null, ""u..."
654,"Severability, Originals and Attachments, and S...","{""tenant_name"": ""Novy Jezkova"", ""unit_address""..."


#### Preprocess the dataset
We need to add in missing fields in the dataset

In [4]:
# Function to ensure all required fields are present in the extracted_fields column
def ensure_all_fields(extracted_fields, required_fields):
    # Convert the extracted_fields string to a dictionary
    extracted_data = json.loads(extracted_fields)
    
    # Check and add any missing fields with a value of None
    for field in required_fields:
        if field not in extracted_data:
            extracted_data[field] = None
    
    # Convert the dictionary back to a JSON string
    return json.dumps(extracted_data)

# List of required fields from the extraction_json_schema
extraction_fields = list(extraction_json_schema['properties'].keys())

# Apply the function to each row in the extracted_fields column
df['extracted_fields'] = df['extracted_fields'].apply(ensure_all_fields, required_fields=extraction_fields)

# Display the updated DataFrame
df

Unnamed: 0_level_0,extracted_text,extracted_fields
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,"22.PARKING. We may regulate the time, manner, ...","{""tenant_name"": null, ""unit_address"": null, ""u..."
2,38.MISCELLANEOUS.\nA. Exercising one remedy wo...,"{""tenant_name"": null, ""unit_address"": null, ""u..."
3,"You'll pay for all other utilities, related de...","{""tenant_name"": null, ""unit_address"": null, ""u..."
4,Disposition or Sale. Except for animals and pr...,"{""tenant_name"": null, ""unit_address"": null, ""u..."
5,dupusit: or 2) return the remaining portion (i...,"{""tenant_name"": ""Comunque Bolas"", ""unit_addres..."
...,...,...
651,L HOUSING OPPORTUNITY\nANIMAL ADDENDUM Becomes...,"{""tenant_name"": ""Novy Jezkova"", ""unit_address""..."
652,POURL HOUSIMO OPPORTUNITY\nADDITIONAL SPECIAL ...,"{""tenant_name"": ""Novy Jezkova"", ""unit_address""..."
653,26.CONDITION OF THE PREMISES AND ALTERATIONS. ...,"{""tenant_name"": null, ""unit_address"": null, ""u..."
654,"Severability, Originals and Attachments, and S...","{""tenant_name"": ""Novy Jezkova"", ""unit_address""..."


### Split the dataset

In [5]:
from sklearn.model_selection import train_test_split

# First split: 80% for training, 20% for temp (which will be split into eval and test)
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42)

# Second split: 50% of temp for eval and 50% for test (10% of the original data each)
eval_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Check the sizes of the splits
print(f"Training set size: {len(train_df)}")
print(f"Evaluation set size: {len(eval_df)}")
print(f"Test set size: {len(test_df)}")


Training set size: 524
Evaluation set size: 65
Test set size: 66


### Load the base model

In [6]:
from peft import LoraConfig, TaskType

peft_config = LoraConfig(
    lora_alpha = 16, # increased alpha
    lora_dropout=0.1, # decreased dropout
    r=8, # increased rank
    task_type=TaskType.CAUSAL_LM,
    bias="none",
    target_modules=['k_proj', 'q_proj', 'gate_proj', 'o_proj', 'v_proj', 'down_proj', 'up_proj'], # target as many layers as possible
)

In [7]:

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from huggingface_hub import notebook_login
import torch

# login to access gated model
notebook_login()

# Load the tokenizer and model
model_id = "google/gemma-2b-it"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

# empty GPU memory
torch.cuda.empty_cache()

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config)


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…



`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 12,189,696 || all params: 2,861,946,880 || trainable%: 0.4259


### Build the datasets

In [None]:
from datasets import Dataset

# Create datasets with the extracted text and labels
train_dataset = Dataset.from_pandas(train_df)
eval_dataset = Dataset.from_pandas(eval_df)
test_dataset = Dataset.from_pandas(test_df)

train_dataset

### Define the prompt template

In [8]:
prompt_template = """
## USER:
You are a specialized model for extracting specific information from lease agreement text. Your goal is to accurately extract data fields from the provided OCR text of a lease agreement. Additionally, correct any obvious OCR errors you encounter during extraction.

### INPUT TEXT:
Below is the OCR text extracted from a lease agreement. Carefully analyze this text, and extract the relevant data fields.

OCR Text:
```
{extracted_text}
```

### RESPONSE FORMAT:
Return the extracted data as a JSON object, adhering strictly to the following JSON schema:

```json
{extraction_json_schema_str}
```

## ASSISTANT:
```json
{extracted_fields}
```
"""

### Train (fine tune) the base model

In [None]:
from transformers import TrainingArguments, DataCollatorForLanguageModeling
from trl import SFTTrainer

def formatting_func(example):
    output_texts = []
    for i in range(len(example['extracted_text'])):
        text = prompt_template.format(extracted_text=example['extracted_text'][i], extraction_json_schema_str=extraction_json_schema_str, extracted_fields=example['extracted_fields'][i])
        output_texts.append(text)
    return output_texts

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="steps",  # Changed to evaluate based on steps
    eval_steps=10,  # Evaluate every 10 steps
    learning_rate=1e-3,
    lr_scheduler_type="linear",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=8,
    gradient_checkpointing=True,
    logging_steps=2,
    warmup_steps = 2,
    max_steps = 200,
    save_strategy="steps",
    save_steps=10,  # Save a checkpoint every 10 steps
    optim="paged_adamw_32bit",
)

trainer = SFTTrainer(
    model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    neftune_noise_alpha=5,
    max_seq_length=512,
    args=training_args,
    formatting_func=formatting_func,
    data_collator=DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False,  # No masked language modeling, suitable for CausalLM
        return_tensors="pt"  # Return PyTorch tensors
    )
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

### Save the model

In [11]:
peft_model_repo_id = "aryaniyaps/finetuned_gemma_2b_lease_data_extraction_40_deals"
local_model_id = "./saved_models/finetuned_gemma_2b_lease_data_extraction_40_deals"

In [12]:
# Push the PEFT model (LoRA adapters) to Hugging Face Hub
trainer.model.save_pretrained(local_model_id)

trainer.model.push_to_hub(peft_model_repo_id)

adapter_model.safetensors:   0%|          | 0.00/48.9M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/aryaniyaps/finetuned_flan_t5_xl_lease_data_extraction_40_deals/commit/eb308e54306e20a5b90edc7ebeccc662b18878f8', commit_message='Upload model', commit_description='', oid='eb308e54306e20a5b90edc7ebeccc662b18878f8', pr_url=None, pr_revision=None, pr_num=None)

### Evaluate the model

In [13]:
from peft import PeftModel
 
# Load the Lora model
finetuned_model = PeftModel.from_pretrained(model, peft_model_repo_id, device_map={"":0})
finetuned_model.eval()
 
print("PEFT model loaded")

adapter_config.json:   0%|          | 0.00/702 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/48.9M [00:00<?, ?B/s]

PEFT model loaded


### Evaluate the model
(Using F1 score and exact matches)

In [None]:
evaluation_prompt_template = """
## USER:
You are a specialized model for extracting specific information from lease agreement text. Your goal is to accurately extract data fields from the provided OCR text of a lease agreement. Additionally, correct any obvious OCR errors you encounter during extraction.

### INPUT TEXT:
Below is the OCR text extracted from a lease agreement. Carefully analyze this text, and extract the relevant data fields.

OCR Text:
```
{extracted_text}
```

### RESPONSE FORMAT:
Return the extracted data as a JSON object, adhering strictly to the following JSON schema:

```json
{extraction_json_schema_str}
```

## ASSISTANT:
"""

In [16]:
from peft import PeftModelForCausalLM

finetuned_model = PeftModelForCausalLM.from_pretrained(model=model, model_id=local_model_id)

# Set the model to evaluation mode
finetuned_model.eval()

# Prepare lists to store outputs and true labels
predictions = []
references = []

def format_prompt(message):
    return evaluation_prompt_template.format(extracted_text=message['extracted_text'], extraction_json_schema_str=extraction_json_schema_str)

# Disable gradient calculation for evaluation
with torch.inference_mode():
    for message in test_dataset:
        # Prepare input using apply_chat_template
        input_ids = tokenizer(format_prompt(message), return_tensors="pt").to("cuda")
        # Get the true output
        true_output = message['extracted_fields']
        references.append(true_output)

        # Generate output from the fine-tuned model
        outputs_finetuned = finetuned_model.generate(**input_ids, max_new_tokens=1024, do_sample=False)
        finetuned_output = tokenizer.decode(outputs_finetuned[0])
        predictions.append(finetuned_output)



  0%|                                                                                            | 0/66 [00:00<?, ?it/s]


  2%|█▎                                                                                  | 1/66 [00:33<35:50, 33.09s/it]


  3%|██▌                                                                                 | 2/66 [00:34<15:38, 14.66s/it]


  5%|███▊                                                                                | 3/66 [00:44<13:12, 12.58s/it]


  6%|█████                                                                               | 4/66 [01:18<21:21, 20.67s/it]


  8%|██████▎                                                                             | 5/66 [01:51<25:35, 25.17s/it]


  9%|███████▋                                                                            | 6/66 [02:24<27:51, 27.86s/it]


 11%|████████▉                                                                           | 7/66 [02:57<29:04, 29.58s/it]


 12%|██████████▏                                                                         | 8/66 [02:59<20:15, 20.96s/it]


 14%|███████████▍                                                                        | 9/66 [03:23<20:42, 21.80s/it]


 15%|████████████▌                                                                      | 10/66 [03:56<23:35, 25.28s/it]


 17%|█████████████▊                                                                     | 11/66 [04:29<25:22, 27.68s/it]


 18%|███████████████                                                                    | 12/66 [05:02<26:23, 29.33s/it]


 20%|████████████████▎                                                                  | 13/66 [05:13<20:57, 23.74s/it]


 21%|█████████████████▌                                                                 | 14/66 [05:46<23:01, 26.56s/it]


 23%|██████████████████▊                                                                | 15/66 [06:19<24:14, 28.52s/it]


 24%|████████████████████                                                               | 16/66 [06:52<24:54, 29.89s/it]


 26%|█████████████████████▍                                                             | 17/66 [07:26<25:12, 30.87s/it]


 27%|██████████████████████▋                                                            | 18/66 [07:51<23:20, 29.18s/it]


 29%|███████████████████████▉                                                           | 19/66 [08:00<18:13, 23.26s/it]


 30%|█████████████████████████▏                                                         | 20/66 [08:17<16:13, 21.15s/it]


 32%|██████████████████████████▍                                                        | 21/66 [08:20<11:53, 15.85s/it]


 33%|███████████████████████████▋                                                       | 22/66 [08:47<14:04, 19.20s/it]


 35%|████████████████████████████▉                                                      | 23/66 [09:14<15:26, 21.55s/it]


 36%|██████████████████████████████▏                                                    | 24/66 [09:21<12:04, 17.26s/it]


 38%|███████████████████████████████▍                                                   | 25/66 [09:33<10:43, 15.69s/it]


 39%|████████████████████████████████▋                                                  | 26/66 [09:34<07:28, 11.22s/it]


 41%|█████████████████████████████████▉                                                 | 27/66 [09:37<05:38,  8.69s/it]


 42%|███████████████████████████████████▏                                               | 28/66 [10:04<08:57, 14.15s/it]


 44%|████████████████████████████████████▍                                              | 29/66 [10:30<10:54, 17.70s/it]


 45%|█████████████████████████████████████▋                                             | 30/66 [10:31<07:37, 12.72s/it]


 47%|██████████████████████████████████████▉                                            | 31/66 [10:52<08:51, 15.18s/it]


 48%|████████████████████████████████████████▏                                          | 32/66 [11:25<11:38, 20.55s/it]


 50%|█████████████████████████████████████████▌                                         | 33/66 [11:26<08:06, 14.76s/it]


 52%|██████████████████████████████████████████▊                                        | 34/66 [11:59<10:48, 20.25s/it]


 53%|████████████████████████████████████████████                                       | 35/66 [12:32<12:27, 24.13s/it]


 55%|█████████████████████████████████████████████▎                                     | 36/66 [12:37<09:04, 18.14s/it]


 56%|██████████████████████████████████████████████▌                                    | 37/66 [12:38<06:19, 13.09s/it]


 58%|███████████████████████████████████████████████▊                                   | 38/66 [12:49<05:49, 12.46s/it]


 59%|█████████████████████████████████████████████████                                  | 39/66 [13:22<08:23, 18.65s/it]


 61%|██████████████████████████████████████████████████▎                                | 40/66 [13:48<08:59, 20.77s/it]


 62%|███████████████████████████████████████████████████▌                               | 41/66 [14:21<10:11, 24.48s/it]


 64%|████████████████████████████████████████████████████▊                              | 42/66 [14:31<08:03, 20.16s/it]


 65%|██████████████████████████████████████████████████████                             | 43/66 [15:04<09:12, 24.02s/it]


 67%|███████████████████████████████████████████████████████▎                           | 44/66 [15:29<08:56, 24.40s/it]


 68%|████████████████████████████████████████████████████████▌                          | 45/66 [15:30<06:06, 17.47s/it]


 70%|█████████████████████████████████████████████████████████▊                         | 46/66 [15:31<04:10, 12.51s/it]


 71%|███████████████████████████████████████████████████████████                        | 47/66 [15:34<03:03,  9.64s/it]


 73%|████████████████████████████████████████████████████████████▎                      | 48/66 [15:57<04:02, 13.49s/it]


 74%|█████████████████████████████████████████████████████████████▌                     | 49/66 [16:19<04:31, 15.97s/it]


 76%|██████████████████████████████████████████████████████████████▉                    | 50/66 [16:19<03:03, 11.44s/it]


 77%|████████████████████████████████████████████████████████████████▏                  | 51/66 [16:20<02:03,  8.21s/it]


 79%|█████████████████████████████████████████████████████████████████▍                 | 52/66 [16:21<01:24,  6.06s/it]


 80%|██████████████████████████████████████████████████████████████████▋                | 53/66 [16:54<03:04, 14.16s/it]


 82%|███████████████████████████████████████████████████████████████████▉               | 54/66 [17:07<02:45, 13.76s/it]


 83%|█████████████████████████████████████████████████████████████████████▏             | 55/66 [17:40<03:34, 19.54s/it]


 85%|██████████████████████████████████████████████████████████████████████▍            | 56/66 [18:13<03:55, 23.58s/it]


 86%|███████████████████████████████████████████████████████████████████████▋           | 57/66 [18:20<02:45, 18.44s/it]


 88%|████████████████████████████████████████████████████████████████████████▉          | 58/66 [18:29<02:05, 15.74s/it]


 89%|██████████████████████████████████████████████████████████████████████████▏        | 59/66 [18:30<01:18, 11.22s/it]


 91%|███████████████████████████████████████████████████████████████████████████▍       | 60/66 [18:55<01:33, 15.61s/it]


 92%|████████████████████████████████████████████████████████████████████████████▋      | 61/66 [19:29<01:44, 20.83s/it]


 94%|█████████████████████████████████████████████████████████████████████████████▉     | 62/66 [19:36<01:07, 16.82s/it]


 95%|███████████████████████████████████████████████████████████████████████████████▏   | 63/66 [20:09<01:05, 21.67s/it]


 97%|████████████████████████████████████████████████████████████████████████████████▍  | 64/66 [20:34<00:45, 22.69s/it]


 98%|█████████████████████████████████████████████████████████████████████████████████▋ | 65/66 [21:07<00:25, 25.80s/it]


100%|███████████████████████████████████████████████████████████████████████████████████| 66/66 [21:13<00:00, 19.96s/it]


100%|███████████████████████████████████████████████████████████████████████████████████| 66/66 [21:13<00:00, 19.30s/it]




In [17]:
from tabulate import tabulate

# Prepare data for tabulation
table_data = []
for i, (pred, ref) in enumerate(zip(predictions, references), 1):
    table_data.append([i, ref, pred])

# Define table headers
headers = ["#", "Reference data", "Extracted data (Finetuned model)"]

# Display the table
tabulate(table_data, headers=headers, tablefmt="html", showindex=False)


#,Reference data,Extracted data (Finetuned model)
1,"""tenant_name"": ""Ricquan M Moore"", ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null","ANIMALS, ADDITIONAL_FEES, ADDITIONAL_RENT, ADDITIONAL_FEES_OR_CHARGES, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_CHARGES_TO_PAID, ADDITIONAL_FEES_OR_"
2,"""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null","26.CONDITION OF THE PREMISES AND ALTERATIONS,"
3,"""tenant_name"": ""Samantha Putlak"", ""unit_address"": ""304 Stanhope St."", ""unit_number"": ""#2L"", ""unit_type"": null, ""agreement_date"": null, ""lease_start"": ""2023-08-01"", ""lease_end"": ""2024-07-31"", ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": 2800, ""prorated_rent"": null, ""security_deposit"": 2800, ""lease_rent"": 33600, ""monthly_payment_breakdown"": null, ""utility_charges"": null","TERM: 12 Months MONTHLY_RENT: $2,800 BEGINNING: 8/1/2023 MONTHLY_RENT: $2,800 SECURITY_DEPOSIT: $2,800 TERM: 12 Months MONTHLY_RENT: $2,800 BEGINNING: 8/1/2023 MONTHLY_RENT: $2,800 SECURITY_DEPOSIT: $2,800 TERM: 12 Months MONTHLY_RENT: $2,800 BEGINNING: 8/1/2023 MONTHLY_RENT: $2,800 SECURITY_DEPOSIT: $2,800"
4,"""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null",",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"
5,"""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null","Damage, Landlord, Damage, Tenant, Damage, Landlord, Damage, Tenant, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage, Damage,"
6,"""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null","PART_A, PART_B, PART_C, PART_D, PART_A, PART_B, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_C, PART_D, PART_A, PART_C, PART_D, PART_A, PART_C, PART_D, PART_C, PART_D, PART_A, PART_C, PART_D, PART_C, PART_D, PART_A, PART_C, PART_D, PART_C, PART_D, PART_A, PART_C, PART_D, PART_C, PART_D, PART_C, PART_D, PART_C, PART"
7,"""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null",",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"
8,"""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": ""month-to-month"", ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null","Lessee shall be responsible for the payment of all taxes, license fees, registration fees, insurance, and other assessments associated with the use of the Property."
9,"""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null","Replacements 31.REPLACEMENTS AND SUBLETTING. Replacing a resident, subletting. assignment, or granting a right or license to occupy is allowed only when we expressly consent in writing. If departing or remaining residents find a replacement resident acceptable to us before moving out and we expressly consent, in writing, to the replacement, subletting, assignment, or granting a right or any license to occupy, then (1) a reletting charge will not be due; (2) a reasonable administrative (paperwork) and/or transfer fee will be due, and a rekeying fee is due if rekeying is requested or required; and (3) the departing and remaining residents will remain liable for all lease obligations for the rest of the original lease term. Procedures for Replacement. If we approve a replacement resident, then, at our option: (1) the replacement resident must sign this Lease Contract with or without an increase in the total security deposit; or (2) the remaining and replacement residents must sign an entirely new I.case Contract. Unless we agree otherwise in writing-even if a new L.case Contract is signed. Responsibilities of Owner and Resident 32.RESPONSIBILITIES OF OWNER. We'll act with customary diligence to: (1) keep common areas reasonably clean, subject to paragraph 26 (Condition of the Premises and Alterations); (2) maintain fixtures, furniture, hot water, heating and A/C. equipment. (3) comply with applicable federal, state, and local laws regarding safety, sanitation, and fair housing; and (4) make all repairs and replacements as may be necessary."
10,"""tenant_name"": null, ""unit_address"": null, ""unit_number"": null, ""unit_type"": null, ""agreement_date"": null, ""lease_start"": null, ""lease_end"": null, ""lease_auto_renew"": null, ""hourly_rate"": null, ""monthly_rent"": null, ""prorated_rent"": null, ""security_deposit"": null, ""lease_rent"": null, ""monthly_payment_breakdown"": null, ""utility_charges"": null","RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILITIES OF OWNER, RESPONSIBILIT"


In [18]:
# TODO: calculate accuracy using metrics