In [None]:
! pip install fastapi "uvicorn[standard]" scikit-learn numpy
! pip install httpx


Collecting fastapi
  Using cached fastapi-0.116.1-py3-none-any.whl.metadata (28 kB)
Collecting scikit-learn
  Using cached scikit_learn-1.7.1-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting numpy
  Using cached numpy-2.3.2-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting uvicorn[standard]
  Using cached uvicorn-0.35.0-py3-none-any.whl.metadata (6.5 kB)
Collecting starlette<0.48.0,>=0.40.0 (from fastapi)
  Using cached starlette-0.47.2-py3-none-any.whl.metadata (6.2 kB)
Collecting pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 (from fastapi)
  Using cached pydantic-2.11.7-py3-none-any.whl.metadata (67 kB)
Collecting typing-extensions>=4.8.0 (from fastapi)
  Using cached typing_extensions-4.14.1-py3-none-any.whl.metadata (3.0 kB)
Collecting click>=7.0 (from uvicorn[standard])
  Using cached click-8.2.1-py3-none-any.whl.metadata (2.5 kB)
Collecting h11>=0.8 (from uvicorn[standard])
  Using cached h11-0.16.0-py3-none-any.whl.metadata (8.3 kB)
Collecting httptools>=


[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
from __future__ import annotations
from typing import List, Dict, Optional, Any
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
from uuid import uuid4
from datetime import datetime


class PetitionPreferences(BaseModel):
    field: Optional[str] = Field(None, description="Primary technical field, e.g., 'Artificial Intelligence'")
    priority: Optional[str] = Field(None, description="High/Medium/Low")
    review_style: Optional[str] = Field(None, description="Brief / Detailed / Line-by-line")

class SubmitPetitionRequest(BaseModel):
    client_id: str
    petition_text: str
    preferences: Optional[PetitionPreferences] = None

class MatchItem(BaseModel):
    reviewer_id: str
    name: str
    expertise: List[str]
    score: float
    breakdown: Dict[str, float]

class MatchResponse(BaseModel):
    client_id: str
    top_k: int
    matches: List[MatchItem]

class Reviewer(BaseModel):
    reviewer_id: str
    name: str
    expertise_tags: List[str]
    notes: Optional[str] = None
    max_capacity: int = 5
    current_load: int = 0  # for availability factor

# petition_id to data
PETITIONS: Dict[str, Dict[str, Any]] = {}

# client_id to [petition_ids]
CLIENT_TO_PETITIONS: Dict[str, List[str]] = {}

# reviewer_id to Reviewer
REVIEWERS: Dict[str, Reviewer] = {}


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re


def normalize_tags(tags: List[str]) -> List[str]:
    return [re.sub(r"\s+", " ", t.strip().lower()) for t in tags]

def reviewer_corpus_text(r: Reviewer) -> str:
    base = " ".join(normalize_tags(r.expertise_tags))
    return (base + " " + (r.notes or "")).strip()

def petition_doc_text(petition_text: str, prefs: Optional[PetitionPreferences]) -> str:
    field_part = (prefs.field if prefs and prefs.field else "")
    return f"{petition_text}\n{field_part}"

def tag_overlap_score(petition_text: str, reviewer_tags: List[str]) -> float:
    text = petition_text.lower()
    tags = normalize_tags(reviewer_tags)
    if not tags:
        return 0.0
    hits = sum(1 for t in tags if t in text)
    return hits / len(tags)

def availability_factor(current_load: int, max_capacity: int) -> float:
    if max_capacity <= 0:
        return 0.0
    val = 1.0 - (current_load / max_capacity)
    return max(0.0, min(1.0, val))

def compute_scores_for_petition(petition_text: str, prefs: Optional[PetitionPreferences]) -> List[Dict[str, Any]]:
    if not REVIEWERS:
        return []

    petition_doc = petition_doc_text(petition_text, prefs)
    reviewer_docs = [reviewer_corpus_text(r) for r in REVIEWERS.values()]
    corpus = [petition_doc] + reviewer_docs

    vectorizer = TfidfVectorizer(stop_words="english", ngram_range=(1,2), max_features=5000)
    X = vectorizer.fit_transform(corpus)

    p_vec = X[0]           # 1 x V
    r_vecs = X[1:]         # R x V

    # Cosine similarities
    sims = cosine_similarity(p_vec, r_vecs).flatten()

    # Prepare scores for each reviewer
    results = []
    for idx, (rid, r) in enumerate(REVIEWERS.items()):
        content_similarity = float(sims[idx])  # already 0..1 for tf-idf cosine
        exp_match = tag_overlap_score(petition_doc, r.expertise_tags)  # 0..1
        avail = availability_factor(r.current_load, r.max_capacity)    # 0..1

        # Weighted score
        score = (0.5 * content_similarity) + (0.3 * exp_match) + (0.2 * avail)

        results.append({
            "reviewer_id": rid,
            "name": r.name,
            "expertise": r.expertise_tags,
            "score": float(score),
            "breakdown": {
                "content_similarity": float(content_similarity),
                "expertise_match": float(exp_match),
                "availability": float(avail)
            }
        })
    # Sort by score desc
    results.sort(key=lambda d: d["score"], reverse=True)
    return results


In [14]:
import datetime

app = FastAPI(title="Profile matching", version="0.1.0")

@app.post("/submit-petition")
def submit_petition(req: SubmitPetitionRequest):
    # Basic validation
    if not req.client_id or not req.petition_text.strip():
        raise HTTPException(status_code=400, detail="client_id and petition_text are required.")

    petition_id = str(uuid4())
    PETITIONS[petition_id] = {
        "petition_id": petition_id,
        "client_id": req.client_id,
        "petition_text": req.petition_text,
        "preferences": req.preferences.dict() if req.preferences else None,
        "created_at": datetime.datetime.now(datetime.UTC)
    }
    CLIENT_TO_PETITIONS.setdefault(req.client_id, []).append(petition_id)
    return {"status": "success", "message": "Petition submitted successfully", "petition_id": petition_id}

@app.get("/match/{client_id}", response_model=MatchResponse)
def match_reviewers(client_id: str, top_k: int = 5):
    petition_ids = CLIENT_TO_PETITIONS.get(client_id)
    if not petition_ids:
        raise HTTPException(status_code=404, detail="No petition found for this client_id.")

    # Use the latest petition for this client
    last_pid = petition_ids[-1]
    pdata = PETITIONS[last_pid]

    scores = compute_scores_for_petition(
        petition_text=pdata["petition_text"],
        prefs=PetitionPreferences(**pdata["preferences"]) if pdata["preferences"] else None
    )
    if not scores:
        return MatchResponse(client_id=client_id, top_k=0, matches=[])

    matches = [
        MatchItem(
            reviewer_id=s["reviewer_id"],
            name=s["name"],
            expertise=s["expertise"],
            score=round(s["score"], 4),
            breakdown={k: round(v, 4) for k, v in s["breakdown"].items()}
        )
        for s in scores[:max(1, top_k)]
    ]
    return MatchResponse(client_id=client_id, top_k=len(matches), matches=matches)


In [None]:
def seed_reviewers():
    REVIEWERS.clear()
    r1 = Reviewer(
        reviewer_id="rev001",
        name="Dr. Amina Rahman",
        expertise_tags=["NIW", "Immigration Law", "Artificial Intelligence", "Machine Learning", "National Security"],
        notes="NIW focus for AI/ML; published guidance on substantial merit & national importance.",
        max_capacity=6,
        current_load=2,
    )
    r2 = Reviewer(
        reviewer_id="rev002",
        name="John Carter",
        expertise_tags=["NIW", "Biomedical", "Public Health", "USCIS", "Evidence Synthesis"],
        notes="Experienced with Letters of Recommendation and policy impact framing.",
        max_capacity=5,
        current_load=4,
    )
    r3 = Reviewer(
        reviewer_id="rev003",
        name="Sophia Lee",
        expertise_tags=["NIW", "Civil Engineering", "Infrastructure", "Transportation", "Environmental Impact"],
        notes="Strong with national importance narratives for infrastructure and resilience.",
        max_capacity=4,
        current_load=1,
    )
    r4 = Reviewer(
        reviewer_id="rev004",
        name="Miguel Alvarez",
        expertise_tags=["NIW", "Economics", "Entrepreneurship", "Startup Policy", "Commercialization"],
        notes="Focus on market impact, job creation, and commercialization arguments.",
        max_capacity=3,
        current_load=0,
    )

    for r in (r1, r2, r3, r4):
        REVIEWERS[r.reviewer_id] = r

seed_reviewers()
list(REVIEWERS.keys())

dict_values([Reviewer(reviewer_id='rev001', name='Dr. Amina Rahman', expertise_tags=['NIW', 'Immigration Law', 'Artificial Intelligence', 'Machine Learning', 'National Security'], notes='NIW focus for AI/ML; published guidance on substantial merit & national importance.', max_capacity=6, current_load=2), Reviewer(reviewer_id='rev002', name='John Carter', expertise_tags=['NIW', 'Biomedical', 'Public Health', 'USCIS', 'Evidence Synthesis'], notes='Experienced with Letters of Recommendation and policy impact framing.', max_capacity=5, current_load=4), Reviewer(reviewer_id='rev003', name='Sophia Lee', expertise_tags=['NIW', 'Civil Engineering', 'Infrastructure', 'Transportation', 'Environmental Impact'], notes='Strong with national importance narratives for infrastructure and resilience.', max_capacity=4, current_load=1), Reviewer(reviewer_id='rev004', name='Miguel Alvarez', expertise_tags=['NIW', 'Economics', 'Entrepreneurship', 'Startup Policy', 'Commercialization'], notes='Focus on mark

In [16]:
import httpx
from fastapi.testclient import TestClient

client = TestClient(app)

payload = {
    "client_id": "client_123",
    "petition_text": (
        "My research in artificial intelligence focuses on robust perception for autonomous systems "
        "with applications in national security and critical infrastructure resilience. "
        "I collaborate with US defense labs and have publications at NeurIPS and ICRA. "
        "My work enables safer autonomous navigation and situational awareness."
    ),
    "preferences": {
        "field": "Artificial Intelligence, Autonomous Systems, National Security",
        "priority": "High",
        "review_style": "Detailed"
    }
}
resp_submit = client.post("/submit-petition", json=payload)
print("Submit status:", resp_submit.status_code)
print(resp_submit.json())

resp_match = client.get("/match/client_123", params={"top_k": 3})
print("\nMatch status:", resp_match.status_code)
print(resp_match.json())


Submit status: 200
{'status': 'success', 'message': 'Petition submitted successfully', 'petition_id': '091e6511-0ce3-49fd-8a17-9a0919246614'}

Match status: 200
{'client_id': 'client_123', 'top_k': 3, 'matches': [{'reviewer_id': 'rev001', 'name': 'Dr. Amina Rahman', 'expertise': ['NIW', 'Immigration Law', 'Artificial Intelligence', 'Machine Learning', 'National Security'], 'score': 0.3355, 'breakdown': {'content_similarity': 0.1643, 'expertise_match': 0.4, 'availability': 0.6667}}, {'reviewer_id': 'rev003', 'name': 'Sophia Lee', 'expertise': ['NIW', 'Civil Engineering', 'Infrastructure', 'Transportation', 'Environmental Impact'], 'score': 0.2522, 'breakdown': {'content_similarity': 0.0844, 'expertise_match': 0.2, 'availability': 0.75}}, {'reviewer_id': 'rev004', 'name': 'Miguel Alvarez', 'expertise': ['NIW', 'Economics', 'Entrepreneurship', 'Startup Policy', 'Commercialization'], 'score': 0.2, 'breakdown': {'content_similarity': 0.0, 'expertise_match': 0.0, 'availability': 1.0}}]}


C:\Users\WALTON\AppData\Local\Temp\ipykernel_37268\3309269642.py:16: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  "preferences": req.preferences.dict() if req.preferences else None,
