import

In [12]:
# === Cell 1: Imports ===

import warnings
warnings.filterwarnings("ignore")

from sentence_transformers import SentenceTransformer
import hopsworks
import config


In [None]:
import os
# os.environ["HF_API_KEY"] = ""
os.environ["SILICONFLOW_API_KEY"] = ""

In [14]:
# === Cell 2: Connect to Hopsworks ===

from config import HOPSWORKS_API_KEY
# project = hopsworks.login()

project = hopsworks.login(
        # project=HOPSWORKS_PROJECT,
        api_key_value=HOPSWORKS_API_KEY
    )
fs = project.get_feature_store()


2026-01-11 21:06:33,974 INFO: Closing external client and cleaning up certificates.
Connection closed.
2026-01-11 21:06:33,978 INFO: Initializing external client
2026-01-11 21:06:33,978 INFO: Base URL: https://c.app.hopsworks.ai:443
2026-01-11 21:06:35,383 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1286333


In [15]:
# === Cell 3: Load Feature Views ===

metadata_fv = fs.get_feature_view(
    name="paper_metadata_fv_2",
    version=3,
)

chunk_fv = fs.get_feature_view(
    name="paper_chunk_fv_2",
    version=3,
)

metadata_fv.init_serving(1)
chunk_fv.init_serving(1)


In [16]:
# === Cell 4: Load Embedding Model ===
sentence_transformer = SentenceTransformer(
    config.EMBEDDING_MODEL_NAME
)

2026-01-11 21:06:44,745 INFO: Use pytorch device_name: cpu
2026-01-11 21:06:44,745 INFO: Load pretrained SentenceTransformer: all-MiniLM-L6-v2


In [17]:
# === Cell 5: Similarity Search Engine ===

from functions.similarity_search import SimilaritySearchEngine

search_engine = SimilaritySearchEngine(
    embedding_model=sentence_transformer,
    metadata_feature_view=metadata_fv,
    chunk_feature_view=chunk_fv,
)


In [18]:
# === Cell 6: Context Builder ===

from functions.context_builder import ContextBuilder

context_builder = ContextBuilder(
    max_tokens=2000,
    max_chunks=8,
)


In [19]:
# === Cell 7: Prompt Synthesizer ===

# from functions.prompt_synthesis import PromptSynthesizer
# prompt_synthesizer = PromptSynthesizer()

from functions.prompt_synthesis_debug import DebugPromptSynthesizer

prompt_synthesizer = DebugPromptSynthesizer()



In [20]:
# === Cell 8: MCP Dispatcher ===

from functions.mcp_dispatcher import MCPDispatcher

mcp_dispatcher = MCPDispatcher(
    search_engine=search_engine
)


In [21]:
# === Cell 9: Agentic Inference ===

from functions.agent_loop import AgenticInference
from functions.llm_wrapper import LLMWrapper  

# llm = LLMWrapper(
#     model_name_or_path="mistralai/Mistral-7B-v0.1"  
# )# local llm
llm = LLMWrapper(
    model="Qwen/Qwen3-8B",
    base_url="http://api.siliconflow.cn/v1/",
    api_key=os.getenv("SILICONFLOW_API_KEY"),
    temperature=0.2,
    max_tokens=1024,
)

agent = AgenticInference(
    llm=llm,
    search_engine=search_engine,
    context_builder=context_builder,
    prompt_synthesizer=prompt_synthesizer,
    mcp_dispatcher=mcp_dispatcher,
)


In [22]:
# === Cell 10: Run Agent ===

from functions.agent_ui import launch_agent_ui

ui = launch_agent_ui(agent)
ui.launch(inline=True)


* Running on local URL:  http://127.0.0.1:7861
2026-01-11 21:06:48,503 INFO: HTTP Request: GET http://127.0.0.1:7861/gradio_api/startup-events "HTTP/1.1 200 OK"
2026-01-11 21:06:48,517 INFO: HTTP Request: HEAD http://127.0.0.1:7861/ "HTTP/1.1 200 OK"
* To create a public link, set `share=True` in `launch()`.




2026-01-11 21:06:48,869 INFO: HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Traceback (most recent call last):
  File "e:\anaconda3\envs\aq\lib\site-packages\opensearchpy\connection\http_urllib3.py", line 264, in perform_request
    response = self.pool.urlopen(
  File "e:\anaconda3\envs\aq\lib\site-packages\urllib3\connectionpool.py", line 1712, in urlopen
    conn = self._get_conn(timeout=pool_timeout, heb_timeout=timeout_obj)
  File "e:\anaconda3\envs\aq\lib\site-packages\urllib3\connectionpool.py", line 684, in _get_conn
    raise ClosedPoolError(self, "Pool is closed.")
urllib3.exceptions.ClosedPoolError: HTTPSConnectionPool(host='15.235.49.70', port=9200): Pool is closed.
Traceback (most recent call last):
  File "e:\anaconda3\envs\aq\lib\site-packages\opensearchpy\connection\http_urllib3.py", line 264, in perform_request
    response = self.pool.urlopen(
  File "e:\anaconda3\envs\aq\lib\site-packages\urllib3\connectionpool.py", line 1712, in urlopen
    conn = self._get_conn(timeout=pool_timeout, heb_timeout=timeout_obj)
  File "e:\anaconda3\envs\aq\lib

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

PAPER METADATA: {'IZB6XZRC': {'title': 'Synthetic Time Series Data Generation for Healthcare Applications: A PCG Case Study', 'abstract': 'The generation of high-quality medical time series data is essential for advancing healthcare diagnostics and safeguarding patient privacy. Specifically, synthesizing realistic phonocardiogram (PCG) signals offers significant potential as a cost-effective and efficient tool for cardiac disease pre-screening. Despite its potential, the synthesis of PCG signals for this specific application received limited attention in research. In this study, we employ and compare three state-of-the-art generative models from different categories — WaveNet, DoppelGANger, and DiffWave — to generate high-quality PCG data. We use data from the George B. Moody PhysioNet Challenge 2022. Our methods are evaluated using various metrics widely used in the previous literature in the domain of time series data generation, such as mean absolute error and maximum mean discrepan