In [None]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install unsloth
# Get latest Unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [32]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [33]:
import os
os.chdir('/content/drive/MyDrive/genaiproj/AddressCorrector')

In [None]:
import torch
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template
from transformers import TextStreamer

# Load your fine-tuned model checkpoint
checkpoint_path = "models/llama3_sft_sfttrainer/checkpoint-2500"  # Change if needed

# Load model & tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = checkpoint_path,
    max_seq_length = 2048,
    dtype = torch.float16,
    load_in_4bit = True,
    device_map = "auto"
)
model.eval()

#  Patch tokenizer for llama-3
tokenizer = get_chat_template(tokenizer, chat_template="llama-3")

# Inference function (Task 1 ➜ Task 2)
def parse_and_correct_address(raw_address: str):
    # Task 1: Parsing
    task1_instruction = f"""
    Parse the following address into a structured JSON with these fields:
    AddNum_Pre, Add_Number, AddNum_Suf, St_PreDir, St_Name, St_PosTyp, St_PosDir,
    Building, Floor, Unit, Room, Uninc_Comm, Inc_Muni, County, State, Zip_Code.
    Address: {raw_address}
    """
    messages1 = [{"role": "user", "content": task1_instruction}]
    prompt1 = tokenizer.apply_chat_template(messages1, tokenize=False, add_generation_prompt=True)
    inputs1 = tokenizer(prompt1, return_tensors="pt").to(model.device)

    parsed_ids = model.generate(
        **inputs1,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )
    parsed_json = tokenizer.decode(parsed_ids[0], skip_special_tokens=True).split("System:")[-1].strip()

    # Task 2: Correction
    task2_instruction = f"""
    Fix the formatting, structure, correct any existing entities, or predict/add new values
    to the appropriate entities of this Address JSON. Expand common abbreviations (like st→street),
    correct obvious errors, generate new values, and standardize capitalization.
    Keep empty fields as empty strings. Do not return anything other than corrected Address JSON
    Address JSON: {parsed_json}
    """
    messages2 = [{"role": "user", "content": task2_instruction}]
    prompt2 = tokenizer.apply_chat_template(messages2, tokenize=False, add_generation_prompt=True)
    inputs2 = tokenizer(prompt2, return_tensors="pt").to(model.device)

    corrected_ids = model.generate(
        **inputs2,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )
    corrected_json = tokenizer.decode(corrected_ids[0], skip_special_tokens=True)

    return parsed_json, corrected_json


==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
print(parse_and_correct_address("386 Barnabye Strret Southeest, Sprngfeild Hights, Sprinfield, Hampdin Cnty, MA 01190")[-1])

user

Fix the formatting, structure, correct any existing entities, or predict/add new values 
    to the appropriate entities of this Address JSON. Expand common abbreviations (like st→street), 
    correct obvious errors, generate new values, and standardize capitalization. 
    Keep empty fields as empty strings. Do not return anything other than corrected Address JSON
    Address JSON: {
  "AddNum_Pre": "",
  "Add_Number": "386",
  "AddNum_Suf": "",
  "St_PreDir": "",
  "St_Name": "barnabye",
  "St_PosTyp": "street",
  "St_PosDir": "southeast",
  "Building": "",
  "Floor": "",
  "Unit": "",
  "Room": "",
  "Uninc_Comm": "springfield heights",
  "Inc_Muni": "springfield",
  "County": "hampden",
  "State": "ma",
  "Zip_Code": "1190"
}assistant

System: {
  "AddNum_Pre": "",
  "Add_Number": "3860",
  "AddNum_Suf": "",
  "St_PreDir": "",
  "St_Name": "barnabye",
  "St_PosTyp": "street",
  "St_PosDir": "southeast",
  "Building": "",
  "Floor": "",
  "Unit": "",
  "Room": "",
  "Uninc_Co

Just run from here

In [14]:
!pip install -q streamlit pyngrok


In [15]:
from google.colab import userdata
AUTH_TOKEN = userdata.get('GROK_TOKEN')

In [16]:
AUTH_TOKEN

'2w7A6XAJDXgHoCsAB2ZfMJE52nF_6pyUtX3nTrSzRHMYxosJR'

In [27]:
!ngrok config add-authtoken '2w7A6XAJDXgHoCsAB2ZfMJE52nF_6pyUtX3nTrSzRHMYxosJR'

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [48]:
%%writefile app.py
import streamlit as st
from transformers import TextStreamer
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template
import torch

# Load model & tokenizer
checkpoint = "models/llama3_sft_sfttrainer_MA/checkpoint-2500"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = checkpoint,
    max_seq_length = 2048,
    dtype = torch.float16,
    load_in_4bit = True,
    device_map = "auto"
)

tokenizer = get_chat_template(tokenizer, chat_template="llama-3")
model.eval()

def parse_and_correct_address(raw_address):
    task1_prompt = f"""
    Parse the following address into a structured JSON with these fields:
    AddNum_Pre, Add_Number, AddNum_Suf, St_PreDir, St_Name, St_PosTyp, St_PosDir,
    Building, Floor, Unit, Room, Uninc_Comm, Inc_Muni, County, State, Zip_Code.
    Address: {raw_address}
    """

    prompt1 = tokenizer.apply_chat_template(
        [{"role": "user", "content": task1_prompt}], tokenize=False, add_generation_prompt=True
    )
    inputs1 = tokenizer(prompt1, return_tensors="pt").to(model.device)
    output1 = model.generate(**inputs1, max_new_tokens=512)
    parsed_text = tokenizer.decode(output1[0], skip_special_tokens=True)
    parsed_json = parsed_text.split("System:")[-1].strip()

    task2_prompt = f"""
    Fix the formatting, structure, correct any existing entities, or predict/add new values
    to the appropriate entities of this Address JSON. Expand common abbreviations (like st→street),
    correct obvious errors, generate new values, and standardize capitalization.
    Keep empty fields as empty strings. Do not return anything other than corrected Address JSON.
    Address JSON: {parsed_json}
    """

    prompt2 = tokenizer.apply_chat_template(
        [{"role": "user", "content": task2_prompt}], tokenize=False, add_generation_prompt=True
    )
    inputs2 = tokenizer(prompt2, return_tensors="pt").to(model.device)
    output2 = model.generate(**inputs2, max_new_tokens=512)
    corrected_text = tokenizer.decode(output2[0], skip_special_tokens=True)
    corrected_json = corrected_text.split("System:")[-1].strip()

    return parsed_json, corrected_json

# Streamlit UI
st.title("📍 Address Corrector")

user_input = st.text_area(
    "Enter an address:"
    # placeholder="e.g. 386 Barnabye Strret Southeest, Sprngfeild Hights, Sprinfield, Hampdin Cnty, MA 01190"
)

if st.button("Parse & Correct"):
    with st.spinner("Thinking..."):
        parsed, corrected = parse_and_correct_address(user_input)

        # st.subheader("Parsed Address")
        # st.text(parsed)

        st.subheader("Corrected Address")
        st.text(corrected)


Overwriting app.py


In [49]:
# !ngrok config add-authtoken '2w7A6XAJDXgHoCsAB2ZfMJE52nF_6pyUtX3nTrSzRHMYxosJR'

In [52]:
from pyngrok import ngrok
import os

def restart_demo(app_file="app.py", port=8501):
    # Kill existing streamlit and ngrok processes
    print("Killing existing processes...")
    os.system("pkill -f streamlit || true")
    ngrok.kill()

    # Start Streamlit app in background
    print("Starting Streamlit...")
    os.system(f"! streamlit run {app_file} &> /dev/null &")

    # Open new ngrok tunnel
    print("Establishing ngrok tunnel...")
    public_url = ngrok.connect(port)
    print("App is live at:", public_url)
    return public_url



In [53]:
public_url = restart_demo()


Killing existing processes...
Starting Streamlit...
Establishing ngrok tunnel...
App is live at: NgrokTunnel: "https://60a1-35-185-187-128.ngrok-free.app" -> "http://localhost:8501"


In [None]:
# example
386 Barnabye Strret Southeest, Sprngfeild Hights, Sprinfield, Hampdin Cnty, MA 01190