In [4]:
!ngrok config add-authtoken 2vGMWXrxn69OfjnRRq4urGRPXu1_XCvRa2nVXNtmtnas4gir

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [9]:
!pip install -q fastapi uvicorn sentence-transformers transformers nest_asyncio pyngrok

from fastapi import FastAPI
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline
from typing import List, Dict
import nest_asyncio
import uvicorn

# Required for Colab
nest_asyncio.apply()

app = FastAPI()

# Load sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Load summarization model
summarizer = pipeline("summarization")

class QueryRequest(BaseModel):
    query: str
    questions: List[str]

class SummaryRequest(BaseModel):
    answers: List[str]

@app.post("/similar")
def get_similar(data: QueryRequest):
    query_emb = model.encode(data.query, convert_to_tensor=True)
    question_embs = model.encode(data.questions, convert_to_tensor=True)
    scores = util.cos_sim(query_emb, question_embs)[0]
    top_results = sorted(zip(data.questions, scores), key=lambda x: x[1], reverse=True)[:5]
    return {"results": [{"question": q, "score": float(s)} for q, s in top_results]}

@app.post("/summarize")
def summarize(data: SummaryRequest):
    meaningful_answers = [ans.strip() for ans in data.answers if len(ans.strip().split()) > 3]

    if not meaningful_answers:
        return {"summary": "Not enough information to summarize."}

    combined = " ".join(meaningful_answers)

    summary = summarizer(
        combined,
        max_length=50,
        min_length=10,
        do_sample=False
    )[0]['summary_text']

    return {"summary": summary}



No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Device set to use cpu


In [None]:

# Tunnel via ngrok
from pyngrok import ngrok
public_url = ngrok.connect(8082)
print(f"Your public Colab API URL: {public_url}")

uvicorn.run(app, port=8082)


Your public Colab API URL: NgrokTunnel: "https://76ef-34-48-86-201.ngrok-free.app" -> "http://localhost:8082"


INFO:     Started server process [1429]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8082 (Press CTRL+C to quit)


INFO:     203.145.49.160:0 - "POST /similar HTTP/1.1" 200 OK


Your max_length is set to 100, but your input_length is only 50. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=25)


INFO:     203.145.49.160:0 - "POST /summarize HTTP/1.1" 200 OK
INFO:     203.145.49.160:0 - "POST /similar HTTP/1.1" 200 OK


Your max_length is set to 100, but your input_length is only 50. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=25)


INFO:     203.145.49.160:0 - "POST /summarize HTTP/1.1" 200 OK
INFO:     203.145.49.160:0 - "POST /similar HTTP/1.1" 200 OK
INFO:     203.145.49.160:0 - "POST /summarize HTTP/1.1" 200 OK
