<a href="https://colab.research.google.com/github/guilhermebispo/nih-chestxray-label-validation/blob/main/app_web.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# NIH Chest X-ray Labeler — Colab (Public URL only)

This notebook sets up a **minimal Flask web app** to label NIH Chest X-ray images with **Portuguese labels**, collect **expert name + CRM**, and save annotations incrementally to CSV. Access is provided via a **public Cloudflared URL** (no account required).

**Output file:** `/content/web_labeler/data/labels_experts.csv`


## 1) Install dependencies

In [None]:
!pip -q install flask==3.0.3 pandas==2.2.2
!wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
!dpkg -i cloudflared-linux-amd64.deb >/dev/null 2>&1 || true
print('✅ Dependencies installed')

✅ Dependencies installed


## 2) Fetch sample CSV and images from your GitHub repo

In [None]:
!rm -rf nih-chestxray-label-validation
!git clone https://github.com/guilhermebispo/nih-chestxray-label-validation.git

import os, shutil
os.makedirs('/content/web_labeler/data', exist_ok=True)
os.makedirs('/content/web_labeler/static/images', exist_ok=True)

shutil.copy('/content/nih-chestxray-label-validation/sample_labels.csv', '/content/web_labeler/data/sample_labels.csv')

for f in os.listdir('/content/nih-chestxray-label-validation/images'):
    src = f'/content/nih-chestxray-label-validation/images/{f}'
    dst = f'/content/web_labeler/static/images/{f}'
    shutil.copy(src, dst)
print('✅ Copied CSV and images into /content/web_labeler/...')

Cloning into 'nih-chestxray-label-validation'...
remote: Enumerating objects: 20, done.[K
remote: Counting objects: 100% (20/20), done.[K
remote: Compressing objects: 100% (19/19), done.[K
remote: Total 20 (delta 1), reused 11 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (20/20), 3.12 MiB | 9.04 MiB/s, done.
Resolving deltas: 100% (1/1), done.
✅ Copied CSV and images into /content/web_labeler/...


## 3) Create Flask app (PT-BR labels, name & CRM, autosave)

In [None]:
from pathlib import Path

app_path = Path('/content/web_labeler_app.py')

app_code = '''from flask import Flask, request, render_template_string, redirect, url_for
import os, csv
from pathlib import Path
from datetime import datetime
import pandas as pd

APP_DIR = Path('/content/web_labeler')
DATA_DIR = APP_DIR / 'data'
IMG_DIR  = APP_DIR / 'static' / 'images'
CSV_NIH  = DATA_DIR / 'sample_labels.csv'
CSV_OUT  = DATA_DIR / 'labels_experts.csv'

DATA_DIR.mkdir(parents=True, exist_ok=True)
IMG_DIR.mkdir(parents=True, exist_ok=True)

LABEL_MAP_PT2EN = {
    'Sem achado': 'No Finding',
    'Atelectasia': 'Atelectasis',
    'Cardiomegalia': 'Cardiomegaly',
    'Derrame pleural': 'Effusion',
    'Infiltração': 'Infiltration',
    'Massa': 'Mass',
    'Nódulo': 'Nodule',
    'Pneumonia': 'Pneumonia',
    'Pneumotórax': 'Pneumothorax',
    'Consolidação': 'Consolidation',
    'Edema': 'Edema',
    'Enfisema': 'Emphysema',
    'Fibrose': 'Fibrosis',
    'Espessamento pleural': 'Pleural_Thickening',
    'Hérnia': 'Hernia',
}
LABELS_PT = list(LABEL_MAP_PT2EN.keys())

# === Defina aqui os especialistas e seus CRMs (edite conforme necessário) ===
SPECIALISTS = [
    ('Dr. Ana Silva', 'CRM_ANA_000'),
    ('Dr. Bruno Souza', 'CRM_BRUNO_111'),
    ('Dra. Carla Lima', 'CRM_CARLA_222'),
]
# ========================================================================

app = Flask(
    __name__,
    static_folder=str(APP_DIR / 'static'),
    static_url_path='/static'
)

def load_available_images():
    if not CSV_NIH.exists():
        files = [p.name for p in IMG_DIR.glob('*') if p.suffix.lower() in {'.png','.jpg','.jpeg','.bmp','.tif','.tiff'}]
        return sorted(files)
    df = pd.read_csv(CSV_NIH)
    names = df['Image Index'].astype(str).tolist()
    return [n for n in names if (IMG_DIR / n).exists()]

def ensure_out_header():
    if not CSV_OUT.exists() or CSV_OUT.stat().st_size == 0:
        with open(CSV_OUT, 'w', newline='', encoding='utf-8') as f:
            w = csv.writer(f)
            w.writerow(['timestamp','especialista','crm','imagem','rotulos_pt','rotulos_nih','acao'])

@app.route('/')
def index():
    i = int(request.args.get('i', 0))
    # leitura de valores para preencher select (persistência)
    selected_esp = request.args.get('especialista', '') or ''
    selected_crm = request.args.get('crm', '') or ''
    files = load_available_images()
    total = len(files)
    if total == 0:
        return '<h3>Nenhuma imagem encontrada em static/images. Coloque as imagens lá (nomes iguais ao \"Image Index\").</h3>'
    if i >= total:
        return '<h3>✅ Fim da lista. <a href=\"/?i=0\">Reiniciar</a></h3>'
    img_name = files[i]

    # lista de nomes para lógica de OUTRO
    specialist_names = [name for name, _ in SPECIALISTS]

    html = """
<!doctype html>
<html lang='pt-br'>
<head>
  <meta charset='utf-8'>
  <title>Rotulador de Raios-X</title>
  <style>
    :root { --gap:18px; --card-pad:18px; }
    html,body { height:100%; margin:0; font-family: Arial, sans-serif; background:#f5f7fb; color:#222; }
    .page { padding:18px; box-sizing:border-box; width:100%; }
    .full-card { width:100%; box-sizing:border-box; border-radius:10px; background:#eef6ff; border-left:6px solid #2563eb; padding:18px; margin-bottom:16px; box-shadow:0 0 8px rgba(0,0,0,0.06); }
    .card { width:100%; box-sizing:border-box; border-radius:10px; background:#fff; padding:var(--card-pad); box-shadow:0 0 8px rgba(0,0,0,0.06); margin-bottom:12px; }
    .header-title { margin:0; font-size:1.8rem; }
    .meta { margin-top:6px; font-size:1rem; color:#333; }
    .content-row { display:flex; gap:var(--gap); margin-top:12px; align-items:flex-start; }
    .left { flex:0 0 55%; }
    .right { flex:1; max-width:43%; }
    .image-wrap img { width:100%; height:auto; border-radius:8px; display:block; margin:0 auto; }
    .image-name { text-align:center; margin-top:8px; font-weight:600; color:#333; }
    .labels { padding:10px; max-height:68vh; overflow:auto; }
    .labels label { display:block; margin-bottom:8px; font-size:0.98rem; }
    .controls { margin-top:14px; display:flex; gap:8px; flex-wrap:wrap; justify-content:flex-start; }
    button { margin:0; padding:10px 14px; border:none; border-radius:8px; cursor:pointer; }
    .save { background:#16a34a; color:#fff; }
    .skip { background:#f59e0b; color:#fff; }
    .unknown { background:#6b7280; color:#fff; }
    @media (max-width:900px){
      .content-row { flex-direction:column; }
      .right { max-width:none; }
      .left { flex:0 0 auto; }
      .image-wrap img { width:100%; }
    }
  </style>
</head>
<body>
  <div class="page">
    <div class="full-card">
      <h1 class="header-title">NIH Chest X-ray Labeler</h1>
      <div class="meta">
        <div><strong>Autor:</strong> <span style="font-size:16px;">Guilherme Dantas Bispo</span></div>
        <div><strong>Afiliação:</strong> Doutorado, Departamento de Engenharia Elétrica (PPGEE), UnB</div>
      </div>
    </div>

    <!-- card exclusivo para identificação do médico -->
    <div class="card">
      <div style="font-weight:600;margin-bottom:8px;">Identificação do Médico</div>
      <label><b>Especialista:</b></label><br>
      <select id="esp_select_card" style="width:100%;padding:8px;border-radius:6px;border:1px solid #ccc;">
        <option value=''>-- selecione --</option>
        {% for name, crm in specialists %}
          <option value='{{name}}' data-crm='{{crm}}' {% if selected_esp == name %}selected{% endif %}>{{name}} — {{crm}}</option>
        {% endfor %}
        <option value='OUTRO' data-crm='' {% if selected_esp and selected_esp not in specialist_names %}selected{% endif %}>Outro...</option>
      </select>
      <input type="text" id="esp_other_card" placeholder="Digite nome do especialista" style="width:100%;padding:8px;margin-top:8px;border:1px solid #ccc;border-radius:6px; {% if not (selected_esp and selected_esp not in specialist_names) %}display:none;{% endif %}" value="{% if selected_esp and selected_esp not in specialist_names %}{{selected_esp}}{% endif %}">
      <div style="margin-top:8px;color:#666;font-size:0.9rem;">Seleção obrigatória — será mantida ao avançar.</div>
    </div>

    <div class="card">
      <form id="labelForm" method="POST" action='{{ url_for("submit") }}'>
        <input type='hidden' name='i' value='{{i}}'>
        <input type='hidden' name='img' value='{{img_name}}'>
        <!-- hidden inputs para receber especialista/CRM do card acima -->
        <input type='hidden' name='especialista' id='especialista_hidden' value='{{selected_esp}}'>
        <input type='hidden' name='crm' id='crm_hidden' value='{{selected_crm}}'>

        <h2 style="margin-top:0;">Rotulagem de Raios-X ({{i+1}} / {{total}})</h2>

        <div class="content-row">
          <div class="left image-wrap">
            <img src='{{ url_for("static", filename="images/" + img_name) }}' alt="imagem">
            <div class="image-name">{{img_name}}</div>
          </div>
          <div class="right">
            <div style="font-weight:600;margin-bottom:8px;">Patologias (em português)</div>
            <div class="labels">
              {% for lbl in labels %}
                <label><input type="checkbox" name="labels" value="{{lbl}}"> {{lbl}}</label>
              {% endfor %}
            </div>
            <div style="font-size:0.9rem;color:#666;margin-top:8px;"><i>'Sem achado' é exclusivo.</i></div>

            <!-- botões abaixo das patologias -->
            <div class="controls">
              <button class="save" name="action" value="SAVE" type="submit">Salvar e Próxima</button>
              <button class="skip" name="action" value="SKIP" formmethod="post">Pular</button>
              <button class="unknown" name="action" value="UNKNOWN" formmethod="post">Indefinido</button>
            </div>

          </div>
        </div>

      </form>
    </div>

  </div>

  <script>
    const espSelectCard = document.getElementById('esp_select_card');
    const espOtherCard = document.getElementById('esp_other_card');
    const espHidden = document.getElementById('especialista_hidden');
    const crmHidden = document.getElementById('crm_hidden');
    const form = document.getElementById('labelForm');

    espSelectCard.addEventListener('change', ()=>{
      const sel = espSelectCard.options[espSelectCard.selectedIndex];
      const crmVal = sel ? sel.getAttribute('data-crm') || '' : '';
      if(espSelectCard.value === 'OUTRO'){
        espOtherCard.style.display = 'block';
        espOtherCard.required = true;
        espHidden.value = '';
        crmHidden.value = '';
      } else {
        espOtherCard.style.display = 'none';
        espOtherCard.required = false;
        espHidden.value = espSelectCard.value;
        crmHidden.value = crmVal;
      }
    });

    form.addEventListener('submit', (e)=>{
      // verifica seleção do médico (obrigatória)
      if(espSelectCard.value === 'OUTRO'){
        if(!espOtherCard.value.trim()){
          alert('Por favor informe o nome do especialista.');
          e.preventDefault();
          return;
        }
        espHidden.value = espOtherCard.value.trim();
        crmHidden.value = '';
      } else {
        // se usuário não selecionou nada
        if(!espHidden.value || espHidden.value.trim()===''){
          alert('Por favor selecione o especialista.');
          e.preventDefault();
          return;
        }
      }
      // antes de enviar, anexa especialista/crm como query params na próxima página (serão adicionados pelo servidor)
    });

    // 'Sem achado' exclusivo
    const boxes = document.querySelectorAll('input[type=checkbox]');
    const noneBox = Array.from(boxes).find(b => b.value === 'Sem achado');
    const others = Array.from(boxes).filter(b => b.value !== 'Sem achado');
    if(noneBox){
      noneBox.addEventListener('change', ()=>{ if(noneBox.checked){ others.forEach(o=>o.checked=false); }});
      others.forEach(o=>o.addEventListener('change', ()=>{ if(o.checked && noneBox.checked){ noneBox.checked=false; }}));
    }
  </script>
</body></html>
"""
    return render_template_string(
        html,
        img_name=img_name,
        labels=LABELS_PT,
        specialists=SPECIALISTS,
        i=i,
        total=total,
        selected_esp=selected_esp,
        selected_crm=selected_crm,
        specialist_names=specialist_names
    )

@app.post('/submit')
def submit():
    ensure_out_header()
    i = int(request.form.get('i', '0'))
    img = request.form.get('img')
    esp = request.form.get('especialista','').strip()
    crm = request.form.get('crm','').strip()
    action = request.form.get('action','SAVE').upper()
    labels_pt = request.form.getlist('labels')
    if 'Sem achado' in labels_pt:
        labels_pt = ['Sem achado']
    labels_nih = [LABEL_MAP_PT2EN[l] for l in labels_pt] if labels_pt else []
    with open(CSV_OUT, 'a', newline='', encoding='utf-8') as f:
        w = csv.writer(f)
        w.writerow([datetime.utcnow().isoformat(), esp, crm, img, '|'.join(labels_pt), '|'.join(labels_nih), action])
    # Persiste seleção do especialista na próxima página via query params
    if esp:
        return redirect(url_for('index', i=i+1, especialista=esp, crm=crm))
    return redirect(url_for('index', i=i+1))

# Run Flask in background thread (port 7860). Cloudflared will expose it publicly.
def run_flask():
    app.run(host='0.0.0.0', port=7860, debug=False, use_reloader=False)

if __name__ == '__main__':
    import threading, time
    t = threading.Thread(target=run_flask, daemon=True)
    t.start()
    time.sleep(2)
    print('✅ Flask started on http://127.0.0.1:7860')
'''

app_path.write_text(app_code, encoding='utf-8')
print('✅ Saved Flask app to', str(app_path))

✅ Saved Flask app to /content/web_labeler_app.py


## 4) Start public URL (Cloudflared)

In [None]:
import subprocess, re, time, threading, runpy

# Start Flask in background by executing the app file
def _run_app():
    runpy.run_path('/content/web_labeler_app.py', run_name='__main__')
threading.Thread(target=_run_app, daemon=True).start()
time.sleep(2)

print('🚇 Starting cloudflared tunnel...')
proc = subprocess.Popen(
    ['cloudflared', 'tunnel', '--url', 'http://localhost:7860', '--no-autoupdate'],
    stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
)
public_url = None
for _ in range(1200):
    line = proc.stdout.readline()
    if not line:
        time.sleep(0.1)
        continue
    m = re.search(r'(https://[a-z0-9-]+\.trycloudflare\.com)', line.strip())
    if m:
        public_url = m.group(1)
        print('🌐 Public URL:', public_url)
        break
if not public_url:
    print('⚠️ Could not capture public URL. Check logs above.')

 * Serving Flask app 'web_labeler_app'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:7860
 * Running on http://172.28.0.12:7860
INFO:werkzeug:[33mPress CTRL+C to quit[0m


🚇 Starting cloudflared tunnel...
✅ Flask started on http://127.0.0.1:7860
🌐 Public URL: https://stranger-judy-hope-pull.trycloudflare.com
