<a href="https://colab.research.google.com/github/Vaibhav-Shastri/Census-Field-Companion-PoC/blob/main/PoC_ORGI_Census_Field_Companion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install core libraries
!pip install -q sentence-transformers faiss-cpu pandas fastapi uvicorn openai requests beautifulsoup4 streamlit spacy pdfplumber python-docx pytesseract
# Download spaCy model
!python -m spacy download en_core_web_sm


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m915.9 kB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m71.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.2/60.2 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m33.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import os
BASE = "/content/census_field_companion"
SUBFOLDERS = ["manuals", "chunks", "embeddings", "models"]
for d in SUBFOLDERS:
    os.makedirs(os.path.join(BASE, d), exist_ok=True)
print("Workspace set up at", BASE)


Workspace set up at /content/census_field_companion


In [3]:
import requests, os, zipfile

MANUALS = os.path.join(BASE, "manuals")
manual_urls = {
  "HouseListing_Housing_Census_2011.pdf":
    "https://catalog.ihsn.org//catalog/4161/download/55469",
  "Abridged_Houselist_Household_Schedule.pdf":
    "https://catalog.ihsn.org//catalog/4161/download/55467",
  "Supervisor_Handbook_Flowcharts.pdf":
    "https://catalog.ihsn.org//catalog/4161/download/55468",
  "Household_Schedule_Manual.pdf":
    "https://catalog.ihsn.org//catalog/4161/download/55461",
  "Houselisting_Housing_Census_Schedule.pdf":
    "https://catalog.ihsn.org//catalog/4161/download/55462",
  "Urban_Frame_Jurisdiction.zip":
    "https://catalog.ihsn.org//catalog/4161/download/55464"
}

for name,url in manual_urls.items():
    out = os.path.join(MANUALS, name)
    if not os.path.exists(out):
        print("↓", name)
        r = requests.get(url, timeout=60); r.raise_for_status()
        with open(out,"wb") as f: f.write(r.content)
        print("✔ saved")
        if name.endswith(".zip"):
            with zipfile.ZipFile(out,"r") as z:
                z.extractall(MANUALS)
            print("✔ unzipped")
    else:
        print("✓ exists:", name)

print("\nManuals now in:", os.listdir(MANUALS))


↓ HouseListing_Housing_Census_2011.pdf
✔ saved
↓ Abridged_Houselist_Household_Schedule.pdf
✔ saved
↓ Supervisor_Handbook_Flowcharts.pdf
✔ saved
↓ Household_Schedule_Manual.pdf
✔ saved
↓ Houselisting_Housing_Census_Schedule.pdf
✔ saved
↓ Urban_Frame_Jurisdiction.zip
✔ saved
✔ unzipped

Manuals now in: ['HouseListing_Housing_Census_2011.pdf', 'Supervisor_Handbook_Flowcharts.pdf', 'Household_Schedule_Manual.pdf', 'Houselisting_Housing_Census_Schedule.pdf', 'Analytical Documents', 'Abridged_Houselist_Household_Schedule.pdf', 'Urban_Frame_Jurisdiction.zip']


In [5]:
import pdfplumber, pytesseract
from PIL import Image
import os

TXT_DIR = os.path.join(BASE, "manuals_txt")
os.makedirs(TXT_DIR, exist_ok=True)

for f in os.listdir(MANUALS):
    path = os.path.join(MANUALS, f)
    text_out = os.path.join(TXT_DIR, f.rsplit(".",1)[0] + ".txt")

    if f.lower().endswith(".pdf"):
        with pdfplumber.open(path) as pdf, open(text_out,"w",encoding="utf-8") as fout:
            for page in pdf.pages:
                txt = page.extract_text()
                if txt:
                    fout.write(txt+"\n")
                else:
                    # fallback to OCR
                    im = page.to_image(resolution=150).original
                    ocr = pytesseract.image_to_string(im)
                    fout.write(ocr+"\n")
        print("→ extracted to", text_out)
    elif f.lower().endswith(".txt"):
        os.system(f"cp {path} {text_out}")




→ extracted to /content/census_field_companion/manuals_txt/HouseListing_Housing_Census_2011.txt
→ extracted to /content/census_field_companion/manuals_txt/Supervisor_Handbook_Flowcharts.txt
→ extracted to /content/census_field_companion/manuals_txt/Household_Schedule_Manual.txt
→ extracted to /content/census_field_companion/manuals_txt/Houselisting_Housing_Census_Schedule.txt
→ extracted to /content/census_field_companion/manuals_txt/Abridged_Houselist_Household_Schedule.txt


In [6]:
import spacy, json
nlp = spacy.load("en_core_web_sm")
CHUNK_DIR = os.path.join(BASE,"chunks")

for txt in os.listdir(TXT_DIR):
    doc = nlp(open(os.path.join(TXT_DIR,txt),"r",encoding="utf-8").read())
    chunks = []; current_h="GENERAL"
    for sent in doc.sents:
        s=sent.text.strip()
        if s.isupper() and len(s.split())<10:
            current_h=s
        else:
            chunks.append({"heading":current_h,"text":s})
    out = os.path.join(CHUNK_DIR, txt.replace(".txt",".json"))
    with open(out,"w",encoding="utf-8") as f:
        json.dump(chunks,f,ensure_ascii=False,indent=2)
    print("→ chunked", txt, "→", len(chunks), "chunks")


→ chunked Household_Schedule_Manual.txt → 34 chunks
→ chunked HouseListing_Housing_Census_2011.txt → 1230 chunks
→ chunked Abridged_Houselist_Household_Schedule.txt → 3141 chunks
→ chunked Houselisting_Housing_Census_Schedule.txt → 43 chunks
→ chunked Supervisor_Handbook_Flowcharts.txt → 9 chunks


In [7]:
from sentence_transformers import SentenceTransformer
import numpy as np, faiss, pickle, glob

model = SentenceTransformer("all-MiniLM-L6-v2")
all_chunks=[]; texts=[]

for jf in glob.glob(CHUNK_DIR+"/*.json"):
    data=json.load(open(jf,encoding="utf-8"))
    all_chunks += data
    texts += [c["text"] for c in data]

# 1) Compute embeddings
embs = model.encode(texts, batch_size=32, convert_to_numpy=True).astype("float32")
faiss.normalize_L2(embs)

# 2) Build & save index
d = embs.shape[1]
index = faiss.IndexFlatIP(d)
index.add(embs)
faiss.write_index(index, os.path.join(BASE,"models","faiss.idx"))

# 3) Save chunks+embs for quick load
with open(os.path.join(BASE,"models","embeds.pkl"),"wb") as f:
    pickle.dump((all_chunks,embs),f)

print("Indexed", index.ntotal, "chunks")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Indexed 4457 chunks


In [8]:
%%bash
cat > backend.py << 'EOF'
from fastapi import FastAPI
from pydantic import BaseModel
import openai, faiss, pickle, numpy as np
from sentence_transformers import SentenceTransformer

# Load assets
all_chunks, _ = pickle.load(open("models/embeds.pkl","rb"))
index = faiss.read_index("models/faiss.idx")
model = SentenceTransformer("all-MiniLM-L6-v2")
openai.api_key = "YOUR_OPENAI_KEY"

app = FastAPI()

class Query(BaseModel):
    question: str
    role: str = "enumerator"  # default role

@app.post("/chat")
async def chat(q: Query):
    # 1) Embed question
    q_emb = model.encode([q.question], convert_to_numpy=True).astype("float32")
    faiss.normalize_L2(q_emb)
    D,I = index.search(q_emb,5)
    # 2) Collect top-5 chunks
    ctx = [all_chunks[i] for i in I[0]]
    # 3) Build prompt with role and sample Qs
    sample = {
      "enumerator":["What if a house is locked?","How to record vacant dwelling?"],
      "supervisor":["How many locked houses today?","Show me hotspots of issues."],
      "manager":["Overall compliance rate?","Aggregate data entry errors?"]
    }[q.role]
    prompt = (
      f"You are Census Field Companion for role: {q.role}.\n"
      f"Sample questions for you: {sample}.\n"
      "Answer using only these excerpts (cite heading):\n"
    )
    for c in ctx:
        prompt+=f"[{c['heading']}] {c['text']}\n"
    prompt+=f"\nUser: {q.question}\nAnswer:"
    # 4) Call GPT
    res = openai.ChatCompletion.create(
      model="gpt-3.5-turbo",
      messages=[{"role":"system","content":prompt}],
      temperature=0.2
    )
    return {"answer":res.choices[0].message.content}

EOF


In [9]:
!pip install -q nest-asyncio pyngrok


In [10]:
import nest_asyncio, os
nest_asyncio.apply()


In [11]:
from pyngrok import ngrok
import subprocess, time

# 1) Launch Uvicorn in the background
#    Make sure working directory is /content/census_field_companion
import os
os.chdir("/content/census_field_companion")

uvicorn_proc = subprocess.Popen(
    ["uvicorn", "backend:app", "--host", "0.0.0.0", "--port", "8000"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE
)

# 2) Give it a moment to start
time.sleep(2)

# 3) Open an ngrok tunnel to port 8000
public_url = ngrok.connect(8000).public_url
print("✅ FastAPI server is live at:", public_url)
print("→ Chat endpoint:", public_url + "/chat")




ERROR:pyngrok.process.ngrok:t=2025-06-08T07:46:14+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2025-06-08T07:46:14+0000 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2025-06-08T07:46:14+0000 lvl=eror msg="terminating with error" obj=app err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your aut

PyngrokNgrokError: The ngrok process errored on start: authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n.

In [12]:
# List everything in manuals/, including unzipped Urban Frame docs
import os
manuals_dir = "/content/census_field_companion/manuals"
print("\nManuals folder contains:")
for fn in sorted(os.listdir(manuals_dir)):
    print(" ", fn)



Manuals folder contains:
  Abridged_Houselist_Household_Schedule.pdf
  Analytical Documents
  HouseListing_Housing_Census_2011.pdf
  Household_Schedule_Manual.pdf
  Houselisting_Housing_Census_Schedule.pdf
  Supervisor_Handbook_Flowcharts.pdf
  Urban_Frame_Jurisdiction.zip


In [13]:
import subprocess, nest_asyncio, os
nest_asyncio.apply()

# Change to project root
os.chdir("/content/census_field_companion")

# Start the FastAPI app via Uvicorn in the background
uvicorn_proc = subprocess.Popen(
    ["uvicorn", "backend:app", "--host", "0.0.0.0", "--port", "8000"],
    stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
print("🚀 FastAPI launched on port 8000")


🚀 FastAPI launched on port 8000


In [14]:
# Install localtunnel
!npm install -g localtunnel


[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K
added 22 packages in 2s
[1G[0K⠋[1G[0K
[1G[0K⠋[1G[0K3 packages are looking for funding
[1G[0K⠋[1G[0K  run `npm fund` for details
[1G[0K⠋[1G[0K

In [15]:
import subprocess, time

# Give Uvicorn a moment
time.sleep(2)

# Launch localtunnel on port 8000
lt = subprocess.Popen(["lt", "--port", "8000"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

# Read the first line of stdout to get the public URL
time.sleep(2)
out = lt.stdout.readline().strip()
print("🌐 LocalTunnel URL:", out)
print("→ Chat endpoint:", out + "/chat")


🌐 LocalTunnel URL: your url is: https://upset-bushes-sleep.loca.lt
→ Chat endpoint: your url is: https://upset-bushes-sleep.loca.lt/chat


In [20]:
import requests

PUBLIC_URL = "https://upset-bushes-sleep.loca.lt"  # <-- replace here

payload = {"question":"What if a house is locked?","role":"enumerator"}
r = requests.post(PUBLIC_URL + "/chat", json=payload, timeout=20)
print("Status:", r.status_code)
print("Answer:", r.json().get("answer"))


ReadTimeout: HTTPSConnectionPool(host='upset-bushes-sleep.loca.lt', port=443): Read timed out. (read timeout=20)

In [21]:
!ps aux | grep uvicorn

root       10050  0.0  0.0      0     0 ?        Z    07:49   0:00 [uvicorn] <defunct>
root       11413  0.0  0.0   7376  3524 ?        S    07:54   0:00 /bin/bash -c ps aux | grep uvicorn
root       11415  0.0  0.0   6484  2320 ?        S    07:54   0:00 grep uvicorn


In [22]:
import subprocess, nest_asyncio, os, time
nest_asyncio.apply()
os.chdir("/content/census_field_companion")
# Kill any old uvicorn (optional)
!pkill -f uvicorn
# Restart it
uvicorn_proc = subprocess.Popen(
    ["uvicorn", "backend:app", "--host", "0.0.0.0", "--port", "8000"],
    stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
time.sleep(2)
print("✅ Uvicorn restarted on port 8000")


✅ Uvicorn restarted on port 8000


In [23]:
# Install if not already
!npm install -g localtunnel

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K
changed 22 packages in 2s
[1G[0K⠏[1G[0K
[1G[0K⠏[1G[0K3 packages are looking for funding
[1G[0K⠏[1G[0K  run `npm fund` for details
[1G[0K⠏[1G[0K

In [24]:
import subprocess, time

# Kill any previous localtunnel
!pkill -f localtunnel

# Start a new tunnel to port 8000
lt_proc = subprocess.Popen(
    ["lt", "--port", "8000", "--print-requests"],
    stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
)
# Give it a couple of seconds
time.sleep(3)

# Read the URL line from stdout
line = lt_proc.stdout.readline().strip()
print("🌐 New LocalTunnel URL:", line)
public_url = line  # store for testing


🌐 New LocalTunnel URL: your url is: https://yellow-books-carry.loca.lt


In [25]:
import requests

PUBLIC_URL = "https://yellow-books-carry.loca.lt"  # replace with what you just saw
payload = {"question":"What if a house is locked?","role":"enumerator"}

try:
    r = requests.post(f"{PUBLIC_URL}/chat", json=payload, timeout=20)
    print("Status:", r.status_code)
    print("Answer:", r.json().get("answer"))
except Exception as e:
    print("Request failed:", e)


Request failed: HTTPSConnectionPool(host='yellow-books-carry.loca.lt', port=443): Read timed out. (read timeout=20)


In [26]:
print(uvicorn_proc.stderr.read().decode())

ERROR:    Error loading ASGI app. Could not import module "backend".



In [27]:
import os
os.chdir("/content/census_field_companion")
print("CWD:", os.getcwd())
print("Contents:", os.listdir())


CWD: /content/census_field_companion
Contents: ['embeddings', 'manuals', 'chunks', 'models', 'manuals_txt']


In [28]:
# 1) Add project root to path
import sys
sys.path.insert(0, "/content/census_field_companion")

# 2) Import dependencies
import pickle, faiss, numpy as np
from sentence_transformers import SentenceTransformer
import openai, json
import pandas as pd


In [29]:
# Load chunk metadata + embeddings
with open("models/embeds.pkl","rb") as f:
    all_chunks, embs = pickle.load(f)

# Build or load FAISS index
index = faiss.read_index("models/faiss.idx")

# Load embedding model
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

# (Optional) configure OpenAI key
openai.api_key = "YOUR_OPENAI_KEY"


In [30]:
def chat_local(question: str, role: str="enumerator", top_k: int=5):
    # 1) Embed the question
    q_emb = embed_model.encode([question], convert_to_numpy=True).astype("float32")
    faiss.normalize_L2(q_emb)
    D, I = index.search(q_emb, top_k)

    # 2) Gather the top-k chunks
    context = [all_chunks[i] for i in I[0]]

    # 3) Build the system prompt (including role & sample Qs)
    samples = {
      "enumerator": ["What if a house is locked?", "How to record a vacant dwelling?"],
      "supervisor": ["Show me hotspots of locked houses today.", "What’s the compliance rate?"],
      "manager":    ["Aggregate data entry errors?", "Overall completion percentage?"]
    }[role]

    prompt = (
      f"You are Census Field Companion for role: {role}.\n"
      f"Sample questions for you: {samples}\n"
      "Answer using only these excerpts (cite heading):\n\n"
    )
    for c in context:
        prompt += f"[{c['heading']}] {c['text']}\n"
    prompt += f"\nUser: {question}\nAnswer:"

    # 4) Call the LLM
    resp = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role":"system","content":prompt}],
        temperature=0.2,
        max_tokens=200
    )
    return resp.choices[0].message.content.strip()


In [31]:
print(chat_local("What if a house is locked?", role="enumerator"))
print(chat_local("How many locked houses might a supervisor see?", role="supervisor"))
print(chat_local("Give me the overall data entry error guidelines.", role="manager"))


APIRemovedInV1: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742


In [32]:
import openai, faiss, pickle
import numpy as np
from sentence_transformers import SentenceTransformer

# --- (re)load models & data if needed ---
# with open("models/embeds.pkl","rb") as f:
#     all_chunks, embs = pickle.load(f)
# index = faiss.read_index("models/faiss.idx")
# embed_model = SentenceTransformer("all-MiniLM-L6-v2")

openai.api_key = "YOUR_OPENAI_KEY"

def chat_local(question: str, role: str="enumerator", top_k: int=5):
    # 1) Embed the question
    q_emb = embed_model.encode([question], convert_to_numpy=True).astype("float32")
    faiss.normalize_L2(q_emb)
    D, I = index.search(q_emb, top_k)

    # 2) Gather top-k chunks
    context = [all_chunks[i] for i in I[0]]

    # 3) Build prompt
    samples = {
      "enumerator": ["What if a house is locked?", "How to record a vacant dwelling?"],
      "supervisor": ["Show me hotspots of locked houses today.", "What’s the compliance rate?"],
      "manager":    ["Aggregate data entry errors?", "Overall completion percentage?"]
    }[role]

    prompt = (
        f"You are Census Field Companion for role: {role}.\n"
        f"Sample questions for you: {samples}\n"
        "Answer using only these excerpts (cite heading):\n\n"
    )
    for c in context:
        prompt += f"[{c['heading']}] {c['text']}\n"
    prompt += f"\nUser: {question}\nAnswer:"

    # 4) Call the new Chat Completions API
    resp = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role":"system","content":prompt}],
        temperature=0.2,
        max_tokens=200
    )
    return resp.choices[0].message.content.strip()


In [33]:
print(chat_local("What if a house is locked?", role="enumerator"))
print(chat_local("How many locked houses might a supervisor see?", role="supervisor"))
print(chat_local("Give me the overall data entry error guidelines.", role="manager"))


AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: YOUR_OPE***_KEY. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

In [34]:
import os
from getpass import getpass

# Prompt you to paste your key (it won’t echo)
OPENAI_KEY = getpass("Paste your OpenAI API key: ")
os.environ["OPENAI_API_KEY"] = OPENAI_KEY


Paste your OpenAI API key: ··········


In [35]:
import openai
openai.api_key = os.environ["OPENAI_API_KEY"]


In [36]:
print(chat_local("What if a house is locked?", role="enumerator"))
print(chat_local("How many locked houses might a supervisor see?", role="supervisor"))
print(chat_local("Give me the overall data entry error guidelines.", role="manager"))


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [38]:
from getpass import getpass
import os

# Paste your key when prompted (it won’t echo)
os.environ["OPENAI_API_KEY"] = getpass("Paste your OpenAI API key: ")


Paste your OpenAI API key: ··········


In [39]:
import openai
openai.api_key = os.environ["OPENAI_API_KEY"]


In [40]:
print(chat_local("What if a house is locked?", role="enumerator"))
print(chat_local("How many locked houses might a supervisor see?", role="supervisor"))
print(chat_local("Give me the overall data entry error guidelines.", role="manager"))


If the Census house was found locked because the occupants have gone on a journey or pilgrimage, then it should not be treated as 'vacant' but the use to which it is put should be recorded here and the fact that the occupants have gone on journey or pilgrimage may be noted as 'House locked - occupants on journey/pilgrimage'. If the census house was found locked during your first visit, obviously the date of your subsequent visit to the census house will be recorded.
Supervisors may see locked houses while visiting each and every house without exception and filling up the Houselisting Schedule.
[A S . . . .] Data entry on computer


In [41]:
#@title 🗺️ Census Field Companion Demo
from IPython.display import Markdown

question = "What if a house is locked?"  #@param {type:"string"}
role = "enumerator"                       #@param ["enumerator","supervisor","manager"]

answer = chat_local(question, role)

display(Markdown(f"**Role:** {role.capitalize()}  \n**Q:** {question}  \n**A:** {answer}"))


**Role:** Enumerator  
**Q:** What if a house is locked?  
**A:** If the Census house was found locked because the occupants have gone on a journey or pilgrimage, then it should not be treated as 'vacant' but the use to which it is put should be recorded here and the fact that the occupants have gone on journey or pilgrimage may be noted as 'House locked - occupants on journey/pilgrimage'. If the census house was found locked during your first visit, obviously the date of your subsequent visit to the census house will be recorded.