In [1]:
! pip install fastapi "uvicorn[standard]" scikit-learn numpy

! pip install httpx

from __future__ import annotations

from typing import List, Dict, Optional, Any

from fastapi import FastAPI, HTTPException

from pydantic import BaseModel, Field

from uuid import uuid4

from datetime import datetime




[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:

class SOPPreferences(BaseModel):

    field: Optional[str] = Field(None, description="Primary technical field, e.g., 'Artificial Intelligence'")

    priority: Optional[str] = Field(None, description="High/Medium/Low")

    review_style: Optional[str] = Field(None, description="Brief / Detailed / Line-by-line")



class SubmitSOPRequest(BaseModel):

    client_id: str

    sop_text: str

    preferences: Optional[SOPPreferences] = None



class MatchItem(BaseModel):

    reviewer_id: str

    name: str

    expertise: List[str]

    score: float

    breakdown: Dict[str, float]



class MatchResponse(BaseModel):

    client_id: str

    top_k: int

    matches: List[MatchItem]



class Reviewer(BaseModel):

    reviewer_id: str

    name: str

    expertise_tags: List[str]

    notes: Optional[str] = None

    max_capacity: int = 5

    current_load: int = 0  # for availability factor



# sop_id to data

SOP: Dict[str, Dict[str, Any]] = {}



# client_id to [sop_ids]

CLIENT_TO_SOP: Dict[str, List[str]] = {}



# reviewer_id to Reviewer

REVIEWERS: Dict[str, Reviewer] = {}

from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.metrics.pairwise import cosine_similarity

import re

In [None]:
def normalize_tags(tags: List[str]) -> List[str]:
    return [re.sub(r"\s+", " ", t.strip().lower()) for t in tags]



def reviewer_corpus_text(r: Reviewer) -> str:
    base = " ".join(normalize_tags(r.expertise_tags))
    return (base + " " + (r.notes or "")).strip()



def sop_doc_text(sop_text: str, prefs: Optional[SOPPreferences]) -> str:
    field_part = (prefs.field if prefs and prefs.field else "")
    return f"{sop_text}\n{field_part}"



def tag_overlap_score(sop_text: str, reviewer_tags: List[str]) -> float:

    text = sop_text.lower()

    tags = normalize_tags(reviewer_tags)

    if not tags:

        return 0.0

    hits = sum(1 for t in tags if t in text)

    return hits / len(tags)



def availability_factor(current_load: int, max_capacity: int) -> float:

    if max_capacity <= 0:

        return 0.0

    val = 1.0 - (current_load / max_capacity)

    return max(0.0, min(1.0, val))



def compute_scores_for_sop(sop_text: str, prefs: Optional[SOPPreferences]) -> List[Dict[str, Any]]:

    if not REVIEWERS:

        return []



    sop_doc = sop_doc_text(sop_text, prefs)

    reviewer_docs = [reviewer_corpus_text(r) for r in REVIEWERS.values()]

    corpus = [sop_doc] + reviewer_docs



    vectorizer = TfidfVectorizer(stop_words="english", ngram_range=(1,2), max_features=5000)

    X = vectorizer.fit_transform(corpus)



    p_vec = X[0]           # 1 x V

    r_vecs = X[1:]         # R x V



    # Cosine similarities

    sims = cosine_similarity(p_vec, r_vecs).flatten()



    # Prepare scores for each reviewer

    results = []

    for idx, (rid, r) in enumerate(REVIEWERS.items()):

        content_similarity = float(sims[idx])  # already 0..1 for tf-idf cosine

        exp_match = tag_overlap_score(sop_doc, r.expertise_tags)  # 0..1

        avail = availability_factor(r.current_load, r.max_capacity)    # 0..1



        # Weighted score

        score = (0.5 * content_similarity) + (0.3 * exp_match) + (0.2 * avail)



        results.append({

            "reviewer_id": rid,

            "name": r.name,

            "expertise": r.expertise_tags,

            "score": float(score),

            "breakdown": {

                "content_similarity": float(content_similarity),

                "expertise_match": float(exp_match),

                "availability": float(avail)

            }

        })

    # Sort by score desc

    results.sort(key=lambda d: d["score"], reverse=True)

    return results

import datetime



app = FastAPI(title="Profile matching", version="0.1.0")



@app.post("/submit-sop")

def submit_sop(req: SubmitSOPRequest):

    # Basic validation

    if not req.client_id or not req.sop_text.strip():

        raise HTTPException(status_code=400, detail="client_id and sop_text are required.")



    sop_id = str(uuid4())

    SOP[sop_id] = {

        "sop_id": sop_id,

        "client_id": req.client_id,

        "sop_text": req.sop_text,

        "preferences": req.preferences.model_dump() if req.preferences else None,

        "created_at": datetime.datetime.now(datetime.UTC)

    }

    CLIENT_TO_SOP.setdefault(req.client_id, []).append(sop_id)

    return {"status": "success", "message": "sop submitted successfully", "sop_id": sop_id}



@app.get("/match/{client_id}", response_model=MatchResponse)

def match_reviewers(client_id: str, top_k: int = 5):

    sop_ids = CLIENT_TO_SOP.get(client_id)

    if not sop_ids:

        raise HTTPException(status_code=404, detail="No sop found for this client_id.")



    # Use the latest sop for this client

    last_pid = sop_ids[-1]

    pdata = SOP[last_pid]



    scores = compute_scores_for_sop(

        sop_text=pdata["sop_text"],

        prefs=SOPPreferences(**pdata["preferences"]) if pdata["preferences"] else None

    )

    if not scores:

        return MatchResponse(client_id=client_id, top_k=0, matches=[])



    matches = [

        MatchItem(

            reviewer_id=s["reviewer_id"],

            name=s["name"],

            expertise=s["expertise"],

            score=round(s["score"], 4),

            breakdown={k: round(v, 4) for k, v in s["breakdown"].items()}

        )

        for s in scores[:max(1, top_k)]
    ]

    return MatchResponse(client_id=client_id, top_k=len(matches), matches=matches)

In [4]:
def seed_reviewers():

    REVIEWERS.clear()

    r1 = Reviewer(

        reviewer_id="rev001",

        name="Dr. Amina Rahman",

        expertise_tags=["SOP Evaluation", "Academic Writing", "Artificial Intelligence", "Machine Learning", "Technical Research"],

        notes="Expert in evaluating AI/ML SOPs; specializes in technical narrative coherence and research impact assessment.",

        max_capacity=6,

        current_load=2,

    )

    r2 = Reviewer(

        reviewer_id="rev002",

        name="John Carter",

        expertise_tags=["SOP Evaluation", "Biomedical Research", "Public Health", "Academic Writing", "Evidence Analysis"],

        notes="Experienced with research narrative evaluation and academic impact assessment for STEM applications.",

        max_capacity=5,

        current_load=4,

    )

    r3 = Reviewer(

        reviewer_id="rev003",

        name="Sophia Lee",

        expertise_tags=["SOP Evaluation", "Civil Engineering", "Infrastructure", "Transportation", "Environmental Research"],

        notes="Strong with technical project narratives and infrastructure research impact evaluation.",

        max_capacity=4,

        current_load=1,

    )

    r4 = Reviewer(

        reviewer_id="rev004",

        name="Miguel Alvarez",

        expertise_tags=["SOP Evaluation", "Economics", "Entrepreneurship", "Business Research", "Commercial Impact"],

        notes="Focus on market impact narratives, business case analysis, and commercial potential assessment.",

        max_capacity=3,

        current_load=0,

    )



    for r in (r1, r2, r3, r4):

        REVIEWERS[r.reviewer_id] = r



seed_reviewers()

list(REVIEWERS.keys())

['rev001', 'rev002', 'rev003', 'rev004']

In [5]:
import httpx

from fastapi.testclient import TestClient



client = TestClient(app)



payload = {

    "client_id": "client_123",

    "sop_text": (

        "My research in artificial intelligence focuses on robust perception for autonomous systems "

        "with applications in national security and critical infrastructure resilience. "

        "I collaborate with US defense labs and have publications at NeurIPS and ICRA. "

        "My work enables safer autonomous navigation and situational awareness."

    ),

    "preferences": {

        "field": "Artificial Intelligence, Autonomous Systems, National Security",

        "priority": "High",

        "review_style": "Detailed"

    }

}

resp_submit = client.post("/submit-sop", json=payload)

print("Submit status:", resp_submit.status_code)

print(resp_submit.json())



resp_match = client.get("/match/client_123", params={"top_k": 3})

print("\nMatch status:", resp_match.status_code)

print(resp_match.json())

Submit status: 200
{'status': 'success', 'message': 'sop submitted successfully', 'sop_id': '47dbab91-2285-499d-bbd3-2adbb4114ea5'}

Match status: 200
{'client_id': 'client_123', 'top_k': 3, 'matches': [{'reviewer_id': 'rev001', 'name': 'Dr. Amina Rahman', 'expertise': ['SOP Evaluation', 'Academic Writing', 'Artificial Intelligence', 'Machine Learning', 'Technical Research'], 'score': 0.2338, 'breakdown': {'content_similarity': 0.0809, 'expertise_match': 0.2, 'availability': 0.6667}}, {'reviewer_id': 'rev003', 'name': 'Sophia Lee', 'expertise': ['SOP Evaluation', 'Civil Engineering', 'Infrastructure', 'Transportation', 'Environmental Research'], 'score': 0.2288, 'breakdown': {'content_similarity': 0.0377, 'expertise_match': 0.2, 'availability': 0.75}}, {'reviewer_id': 'rev004', 'name': 'Miguel Alvarez', 'expertise': ['SOP Evaluation', 'Economics', 'Entrepreneurship', 'Business Research', 'Commercial Impact'], 'score': 0.2022, 'breakdown': {'content_similarity': 0.0043, 'expertise_match