<a href="https://colab.research.google.com/github/jigyanshrathore/MedicassistantAI/blob/main/MedialAssistant.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q langchain langchain-community langchainhub langchain-google-genai chromadb beautifulsoup4
!pip install -q fastapi uvicorn nest-asyncio pyngrok scikit-learn pandas


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m39.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.9/18.9 MB[0m [31m90.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m20.0 MB/s[0m eta [36m0:00:00

In [None]:
import os
os.environ["USER_AGENT"] = "medical-insurance-app"
import nest_asyncio
import pandas as pd
from fastapi import FastAPI
from pydantic import BaseModel
from pyngrok import ngrok
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore")

# Langchain
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate


In [None]:
os.environ["GOOGLE_API_KEY"] = "AIzaSyDWmPDKtUsmqQ8HiPpQA_ENhKuRSViQLEc"
os.environ["LANGCHAIN_API_KEY"] = "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"

In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [None]:
url = "https://en.wikipedia.org/wiki/Health_insurance"
loader = WebBaseLoader(url)
docs = loader.load()

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = splitter.split_documents(docs)
vectorstore = Chroma.from_documents(splits, embedding=embeddings)
retriever = vectorstore.as_retriever()

In [None]:
system_prompt = (
    "You are a helpful medical insurance assistant.\n"
    "Use the following context to answer the question briefly.\n"
    "{context}"
)


In [None]:
chat_prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}")
])

In [None]:
qa_chain = create_retrieval_chain(retriever, create_stuff_documents_chain(llm, chat_prompt))

In [None]:
data = pd.read_csv("https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv")


In [None]:
le = LabelEncoder()
data["sex"] = le.fit_transform(data["sex"])
data["smoker"] = le.fit_transform(data["smoker"])
data["region"] = le.fit_transform(data["region"])

In [None]:
X = data.drop("charges", axis=1)
y = data["charges"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100)
model.fit(X_train, y_train)

In [None]:
!ngrok config add-authtoken 2rM7NOX2JYOdN2Az3KmwnCV8zyn_4v3j6utC3MFLf3ErQYcf


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
app = FastAPI(title="Medical Insurance API")

# Input data models
class PredictInput(BaseModel):
    age: int
    sex: str
    bmi: float
    children: int
    smoker: str
    region: str

class AskInput(BaseModel):
    query: str

# Prediction endpoint
@app.post("/predict")
def predict(data: PredictInput):
    input_df = pd.DataFrame([{
        "age": data.age,
        "sex": le.transform([data.sex])[0],
        "bmi": data.bmi,
        "children": data.children,
        "smoker": le.transform([data.smoker])[0],
        "region": le.transform([data.region])[0]
    }])
    prediction = model.predict(input_df)[0]
    return {"predicted_cost": round(prediction, 2)}

# Assistant endpoint
@app.post("/ask")
def ask_question(data: AskInput):
    result = qa_chain.invoke({"input": data.query})
    return {"answer": result["answer"]}

# =========================
# 🌐 STEP 7: Run app with ngrok
# =========================
nest_asyncio.apply()
public_url = ngrok.connect(8000)
print(f"Your API is live at: {public_url}/docs")

import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)


Your API is live at: NgrokTunnel: "https://1b69-34-59-106-93.ngrok-free.app" -> "http://localhost:8000"/docs


INFO:     Started server process [326]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     2401:4900:1b26:e1e8:48f2:ac10:d686:8c4d:0 - "GET /docs HTTP/1.1" 200 OK
INFO:     2401:4900:1b26:e1e8:48f2:ac10:d686:8c4d:0 - "GET /openapi.json HTTP/1.1" 200 OK


ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-1' coro=<Server.serve() done, defined at /usr/local/lib/python3.11/dist-packages/uvicorn/server.py:68> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/main.py", line 580, in run
    server.run()
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/server.py", line 66, in run
    return asyncio.run(self.serve(sockets=sockets))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 92, in run_until_complete
    self._run_once()
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 133, in _run_once
    handle._run()
  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run
    se

INFO:     2401:4900:1b26:e1e8:48f2:ac10:d686:8c4d:0 - "POST /predict HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/utils/_encode.py", line 235, in _encode
    return _map_to_integer(values, uniques)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/utils/_encode.py", line 174, in _map_to_integer
    return xp.asarray([table[v] for v in values], device=device(values))
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/utils/_encode.py", line 174, in <listcomp>
    return xp.asarray([table[v] for v in values], device=device(values))
                       ~~~~~^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/utils/_encode.py", line 167, in __missing__
    raise KeyError(key)
KeyError: 'male'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/protocols

INFO:     2401:4900:1b26:e1e8:48f2:ac10:d686:8c4d:0 - "POST /ask HTTP/1.1" 200 OK
