In [None]:
from fastapi import FastAPI, Header, HTTPException
from pydantic import BaseModel
from typing import List, Optional
from datetime import datetime
import uuid

app = FastAPI()

# Pydantic models for input and output

class Message(BaseModel):
    role: str
    content: str

class ChatRequest(BaseModel):
    model: str
    messages: List[Message]
    stream: Optional[bool] = False

class ChatCompletionResponse(BaseModel):
    id: str
    object: str
    created: int
    model: str
    choices: List[dict]
    usage: dict

# Your custom LLM inference class/function
class YourLLMWrapper:
    def __init__(self, sso):
        self.sso = sso

    def conversations_with_question(self, question):
        # Replace this mock with your actual call to LLM using self.sso
        return {"answer": f"Answer to '{question}' with token '{self.sso}'"}

@app.post("/v1/chat/completions")
async def chat_completion(
    request: ChatRequest,
    authorization: str = Header(None)
):
    # Extract sso token from Authorization header
    if not authorization or not authorization.startswith("Bearer "):
        raise HTTPException(status_code=401, detail="Authorization header missing or invalid.")
    
    sso_token = authorization.split("Bearer ")[1]

    # Extract user question from messages
    user_messages = [msg.content for msg in request.messages if msg.role == "user"]
    if not user_messages:
        raise HTTPException(status_code=400, detail="No user message provided.")

    question = user_messages[-1]

    # Call your custom function
    llm = YourLLMWrapper(sso=sso_token)
    llm_response = llm.conversations_with_question(question)
    answer = llm_response.get("answer", "")

    # Construct OpenAI-compatible response
    response = ChatCompletionResponse(
        id=f"chatcmpl-{uuid.uuid4()}",
        object="chat.completion",
        created=int(datetime.utcnow().timestamp()),
        model=request.model,
        choices=[{
            "index": 0,
            "message": {
                "role": "assistant",
                "content": answer
            },
            "finish_reason": "stop"
        }],
        usage={
            "prompt_tokens": len(question.split()),
            "completion_tokens": len(answer.split()),
            "total_tokens": len(question.split()) + len(answer.split())
        }
    )

    return response


All preferences:
{
  "user_id": "user123",
  "display": {
    "theme": "dark",
    "colors": {
      "primary": "#6699CC",
      "secondary": "#CC6633",
      "background": "#FFFFFF",
      "text": "#333333"
    },
    "font_size": 14,
    "show_avatar": true
  },
  "notifications": {
    "email": true,
    "push": true,
    "frequency": "daily",
    "quiet_hours": {
      "start": "22:00",
      "end": "08:00"
    }
  },
  "language": "en",
  "timezone": "UTC",
  "experimental_features": [
    "beta_ui",
    "voice_commands"
  ]
}

All preferences after updates:
{
  "user_id": "user123",
  "display": {
    "theme": "dark",
    "colors": {
      "primary": "#FF5500",
      "secondary": "#CC6633",
      "background": "#FFFFFF",
      "text": "#333333"
    },
    "font_size": 16,
    "show_avatar": true
  },
  "notifications": {
    "email": true,
    "push": false,
    "frequency": "daily",
    "quiet_hours": {
      "start": "22:00",
      "end": "08:00"
    }
  },
  "language": "es",


In [None]:
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:8000/v1",
    api_key="user_sso_token_here"  #
)

completion = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are helpful."},
        {"role": "user", "content": "What's the weather today?"}
    ],
    extra_headers={"Authorization": f"Bearer {client.api_key}"}
)

print(completion.choices[0].message.content)
