In [1]:
%pip install --upgrade --quiet google-genai

In [2]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

In [3]:
from IPython.display import HTML, Markdown, display
from google import genai
from google.genai.types import (
    FunctionDeclaration,
    GenerateContentConfig,
    GoogleSearch,
    HarmBlockThreshold,
    HarmCategory,
    MediaResolution,
    Part,
    Retrieval,
    SafetySetting,
    Tool,
    ToolCodeExecution,
    VertexAISearch,
)

In [4]:

from google import genai
from google.genai import types
client = genai.Client(
  vertexai=True, project="ai-patient-snapshot-001", location="us-central1",
)

In [5]:
!pip -q install fastapi uvicorn nest_asyncio python-multipart pypdf google-generativeai pyngrok

In [6]:
from google.colab import userdata
from pyngrok import ngrok
token = userdata.get("Grok2")    # validate
ngrok.set_auth_token(token)

In [19]:
# ==============================
# Imports
# ==============================
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import JSONResponse
from pyngrok import ngrok
import json
from IPython.display import Markdown
from google import genai
from google.genai import types
import asyncio
import time

# Assume 'client' is already initialized above:
# client = genai.Client(api_key="...", project="...", location="...")

# ==============================
# Constants, helpers, and globals
# ==============================
MODEL_NAME = "gemini-2.0-flash-001"

# ✅ UPDATED: Added ".xml"
ALLOWED_AUX_EXTS = {".json", ".ndjson", ".hl7", ".txt", ".xml"}

def _ext(name: str) -> str:
    name = (name or "").lower()
    dot = name.rfind(".")
    return name[dot:] if dot != -1 else ""

def _is_allowed_aux(filename: str) -> bool:
    return _ext(filename) in ALLOWED_AUX_EXTS

# Colab globals (hold last uploaded non-PDF file and model summary)
last_aux_filename = None   # e.g., "patient.hl7" / "data.xml"
last_aux_bytes = None      # raw bytes of that file
last_summary = None        # Python dict parsed from model JSON

# ==============================
# FastAPI app
# ==============================
app = FastAPI()

# ------------------------------
# Background task (optional local persistence)
# ------------------------------
async def long_processing(
    pdf_bytes: bytes,
    aux_bytes: bytes,
    aux_filename: str,
    json_summary: dict,
):
    import os, time, json as _json
    os.makedirs("inbox", exist_ok=True)
    ts = int(time.time())  # timestamp for unique filenames

    with open(f"inbox/{ts}-report.pdf", "wb") as f:
        f.write(pdf_bytes)

    with open(f"inbox/{ts}-{aux_filename}", "wb") as f:
        f.write(aux_bytes)

    with open(f"inbox/{ts}-summary.json", "w", encoding="utf-8") as f:
        _json.dump(json_summary, f, ensure_ascii=False, indent=2)

    print("✅ Background processing finished & saved locally:", ts)

# ------------------------------
# /process endpoint
# ------------------------------
@app.post("/process")
async def process(request: Request):
    # Validate content-type
    if not (request.headers.get("content-type", "").lower().startswith("multipart/")):
        raise HTTPException(415, "Expected multipart/form-data")

    # Parse form & files
    form = await request.form()

    pdf = form.get("pdf")  # required
    # Accept one of these for the non-PDF file
    aux = form.get("fhir") or form.get("aux") or form.get("file2")

    if pdf is None or aux is None:
        raise HTTPException(422, "Two files required: 'pdf' and one of ['fhir','aux','file2'].")

    pdf_bytes = await pdf.read()
    aux_bytes = await aux.read()

    if not pdf_bytes:
        raise HTTPException(400, "Empty 'pdf' file received")
    if not aux_bytes:
        raise HTTPException(400, "Empty auxiliary file received")

    # Enforce allowed extensions for the auxiliary file (now includes .xml)
    if not _is_allowed_aux(aux.filename or ""):
        raise HTTPException(
            415,
            "Unsupported auxiliary file type. Allowed: .json, .ndjson, .hl7, .txt, .xml",
        )

    print(
        f"✅ Received: pdf={pdf.filename} ({len(pdf_bytes)} bytes), "
        f"aux={aux.filename} ({len(aux_bytes)} bytes)"
    )

    # Build model input (PDF Part)
    pdf_part = types.Part(
        inline_data=types.Blob(
            mime_type="application/pdf",
            data=pdf_bytes,
        )
    )

    prompt = f"""
    You will receive a medical report as a PDF.

    Extract the items below and return a STRICT JSON object.

    Rules:
    - Return ONLY JSON (no prose, no Markdown, no code fences).
    - Every item listed must be a top-level key.
    - If a value is missing, use null.
    - Use the coding systems as indicated (SNOMED CT, RxNorm, LOINC, CVX/MVX, UCUM).

    Required schema (keys always present; values can be null):
    {{
      "patient_name": string|null,
      "sex": {{"code": string, "display": string}}|null,
      "date_of_birth": string|null,

      "race": [{{"code": string, "display": string, "omb_min_category": string}}]|null,
      "ethnicity": [{{"code": string, "display": string, "omb_min_category": string}}]|null,
      "preferred_language": {{"code": string, "display": string}}|null,
      "smoking_status": string|null,

      "problems": [{{"code": string, "system": "SNOMED CT", "display": string}}]|null,
      "medications": [{{"rxnorm_code": string, "display": string}}]|null,
      "medication_allergies": [{{"substance_code": string, "system": "RxNorm", "display": string}}]|null,

      "laboratory_tests": [{{"loinc_code": string, "display": string}}]|null,
      "laboratory_results": [
        {{"test_loinc": string|null, "value": string|number, "unit": string|null, "display": string|null}}
      ]|null,

      "vital_signs": {{
        "systolic_bp": {{"value": number, "unit": string}}|null,
        "diastolic_bp": {{"value": number, "unit": string}}|null,
        "height": {{"value": number, "unit": string}}|null,
        "weight": {{"value": number, "unit": string}}|null,
        "heart_rate": {{"value": number, "unit": string}}|null,
        "resp_rate": {{"value": number, "unit": string}}|null,
        "temperature": {{"value": number, "unit": string}}|null,
        "spo2": {{"value": number, "unit": string}}|null,
        "fio2": {{"value": number, "unit": string}}|null,
        "pediatrics_optional": {{
          "bmi_percentile": number|null,
          "weight_for_length_percentile": number|null,
          "head_circumference": {{"value": number, "unit": string}}|null,
          "growth_reference": string|null
        }}|null
      }}|null,

      "procedures": [{{"code": string, "system": string, "display": string}}]|null,
      "care_team": [{{"name": string|null, "role": string|null, "npi": string|null}}]|null,
      "immunizations": [{{"cvx": string, "mvx": string|null, "display": string, "date": string|null}}]|null,
      "udi_implants": [{{"udi": string, "description": string|null}}]|null,

      "assessment_and_plan": string|null,
      "goals": [string]|null,
      "health_concerns": [string]|null
    }}
    """

    generation_config = types.GenerateContentConfig(
        response_mime_type="application/json"  # force pure JSON
    )

    # Call model and parse JSON
    resp = client.models.generate_content(
        model=MODEL_NAME,
        contents=[pdf_part, prompt],
        config=generation_config,
    )

    try:
        summary = json.loads(resp.text)
    except Exception:
        cleaned = resp.text.strip()
        if cleaned.startswith("```"):
            cleaned = cleaned.strip("`")
            first_nl = cleaned.find("\n")
            if first_nl != -1:
                cleaned = cleaned[first_nl + 1 :]
        summary = json.loads(cleaned)

    # Stash into Colab globals
    global last_aux_filename, last_aux_bytes, last_summary
    last_aux_filename = aux.filename
    last_aux_bytes = aux_bytes
    last_summary = summary

    # 6) Kick off background persistence (optional)
    asyncio.create_task(
        long_processing(
            pdf_bytes=pdf_bytes,
            aux_bytes=aux_bytes,
            aux_filename=aux.filename or "aux_file",
            json_summary=summary,
        )
    )

    # Return the same UI contract
    return JSONResponse({"status": "received"}, status_code=202)


# ==============================
# Expose via ngrok and run server
# ==============================
public_url = ngrok.connect(8000).public_url
print("Gemini endpoint:", public_url)

import nest_asyncio
nest_asyncio.apply()

import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)


ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-7' coro=<Server.serve() done, defined at /usr/local/lib/python3.12/dist-packages/uvicorn/server.py:69> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/main.py", line 580, in run
    server.run()
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/server.py", line 67, in run
    return asyncio.run(self.serve(sockets=sockets))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 92, in run_until_complete
    self._run_once()
  File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 133, in _run_once
    handle._run()
  File "/usr/lib/python3.12/asyncio/events.py", line 88, in _run
    se

Gemini endpoint: https://6fd1d4e6f44d.ngrok-free.app


INFO:     Started server process [8012]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


✅ Received: pdf=lab.pdf (320302 bytes), aux=Patient_Marco.bundle (1).json (29269 bytes)
INFO:     2607:f140:400:4f:751e:1c7d:dc81:1f53:0 - "POST /process HTTP/1.1" 202 Accepted
✅ Background processing finished & saved locally: 1759126596


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [8012]


In [20]:
import pprint, json

# Dict summary
print("Summary:")
pprint.pprint(last_summary)

print("\nFile name:")
print(last_aux_filename)

print("\nBytes (pretty JSON):")
print(json.dumps(json.loads(last_aux_bytes.decode()), indent=4)[:1000])


Summary:
{'assessment_and_plan': None,
 'care_team': [{'name': 'Kathryn Pebanco', 'npi': None, 'role': 'NP'},
               {'name': 'Vannessa Cestino', 'npi': None, 'role': 'NP'}],
 'date_of_birth': '5/28/2004',
 'ethnicity': None,
 'goals': None,
 'health_concerns': None,
 'immunizations': [],
 'laboratory_results': [{'display': 'Induration',
                         'test_loinc': '3134-4',
                         'unit': 'mm',
                         'value': 0},
                        {'display': 'Negative',
                         'test_loinc': '3134-4',
                         'unit': None,
                         'value': None}],
 'laboratory_tests': [{'display': 'TB skin test', 'loinc_code': '3134-4'}],
 'medication_allergies': None,
 'medications': None,
 'patient_name': 'Marco Rotundo',
 'preferred_language': None,
 'problems': None,
 'procedures': [{'code': None,
                 'display': 'Tuberculin Skin Test',
                 'system': 'TST'}],
 'race': None,
 's

In [21]:
#for creating ids, shoutout to chatgpt
import secrets, uuid, hashlib

def generate_entry(prefix):
   return {
      "resource": {
          "resourceType": None,
          "id": f"{prefix}-{secrets.token_hex(4)}"
      }
  }

summary = last_summary     #nombre mas bonito
fhir_bundle = json.loads(last_aux_bytes.decode())  # bytes -> str -> dict

In [22]:
types = []
for i in fhir_bundle['entry']:
  types.append(i['resource']['resourceType'])

set(types)

{'Claim',
 'Condition',
 'Coverage',
 'DiagnosticReport',
 'DocumentReference',
 'Encounter',
 'ExplanationOfBenefit',
 'Observation',
 'Organization',
 'Patient',
 'Practitioner'}

Marco tiene que cambiar los keys de dic, para que sean como los de un archivo fhir real y que no hayan ningun NONE en los entries

In [23]:
# lee bien lo que te envie en whatsaap
# si no lo entiendes eres gay :v

#make a dict of all possible entry types and what summary keys can go in them
#skip patient for the demo

dic = {
    "Observation": ["smoking_status","laboratory_results","vital_signs"],

    "Condition": ["problems","health_concerns"],

    "MedicationRequest": ["medications"],

    "AllergyIntolerance": ["medication_allergies"],

    "ServiceRequest/Procedure": ["laboratory_tests","procedures"],

    "CareTeam": ["care_team"],

    "Immunization": ["immunizations"],

    "Device": ["udi_implants"],

    "CarePlan": ["assessment_and_plan"],

    "Goal": ["goals"]
}

# make a list of entries
entries = []

# loop through all possible entry types
for key in dic.keys():
  # generate an entry and give it an id
  entry = generate_entry(key[:3].lower())
  entry['resource']['resourceType'] = key
  # loop through all possible summary keys for the entry type
  for i in dic[key]:
    # if the value of the key isnt empty then add it
    if summary[i] is not None:
      entry['resource'][i] = summary[i]
  # if the new entry actual has stuff in it add it to entries
  if len(entry['resource'].keys()) > 2:
    # just for reference
    print(f"Generated a bundle of type: {key} \n")
    entries.append(entry)
    pprint.pprint(entry)
    print('\n \n \n')



Generated a bundle of type: Observation 

{'resource': {'id': 'obs-6305282d',
              'laboratory_results': [{'display': 'Induration',
                                      'test_loinc': '3134-4',
                                      'unit': 'mm',
                                      'value': 0},
                                     {'display': 'Negative',
                                      'test_loinc': '3134-4',
                                      'unit': None,
                                      'value': None}],
              'resourceType': 'Observation'}}

 
 

Generated a bundle of type: ServiceRequest/Procedure 

{'resource': {'id': 'ser-fc4a0f46',
              'laboratory_tests': [{'display': 'TB skin test',
                                    'loinc_code': '3134-4'}],
              'procedures': [{'code': None,
                              'display': 'Tuberculin Skin Test',
                              'system': 'TST'}],
              'resourceType': 'Service

In [24]:
# add new entries to fhir file
print(len(fhir_bundle['entry']))
for entry in entries:
  fhir_bundle['entry'].append(entry)
print(len(fhir_bundle['entry']))

25
30


Ahora lo que tienes que hacer Juan Diego es mandar el fhir_bundle al Ui y highlight los entries nuevos. Tambien mandar la nueva version del fhir file y borra la vieja.

Borra toda la data que entro por streamlit

In [16]:
!pip install flask





In [25]:
from flask import Flask, jsonify
from pyngrok import ngrok

app = Flask(__name__)

@app.route("/data", methods=["GET"])
def get_data():
    return jsonify(fhir_bundle), 200

if __name__ == "__main__":
    # Open a public ngrok tunnel on port 5000
    public_url = ngrok.connect(5000)
    print(" * ngrok tunnel URL:", public_url)

    # Run Flask app
    app.run(port=5000)

 * ngrok tunnel URL: NgrokTunnel: "https://ce5f010883be.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [29/Sep/2025 06:17:36] "GET /data HTTP/1.1" 200 -
