# Co-STORM (Ollama + Local Embeddings)

This notebook mirrors `examples/costorm_examples/run_costorm_gpt.py` so you can run Co-STORM from a notebook while keeping the same behavior (Ollama LLM, local Nemotron embeddings, artifact saving). Edit the argument list in the last cell to fit your environment (model paths, retriever keys, etc.).


In [1]:

import os
import sys
import json
import traceback
from argparse import ArgumentParser
from typing import Optional
from pathlib import Path
import importlib

# Force repo root for imports so we use the local knowledge_storm implementation.
REPO_ROOT = Path('/data/coscientist/storm')
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))
# Also set PYTHONPATH for any child processes
os.environ.setdefault('PYTHONPATH', str(REPO_ROOT))

# Purge any previously imported knowledge_storm modules to force re-import from REPO_ROOT
for mod in list(sys.modules.keys()):
    if mod.startswith('knowledge_storm'):
        sys.modules.pop(mod)

from knowledge_storm.collaborative_storm.engine import (
    CollaborativeStormLMConfigs,
    RunnerArgument,
    CoStormRunner,
)
from knowledge_storm.collaborative_storm.modules.callback import (
    LocalConsolePrintCallBackHandler,
)
from knowledge_storm.lm import LitellmModel, OpenAIModel, AzureOpenAIModel
from knowledge_storm.logging_wrapper import LoggingWrapper
from knowledge_storm.rm import (
    YouRM,
    BingSearch,
    BraveRM,
    SerperRM,
    DuckDuckGoSearchRM,
    TavilySearchRM,
    SearXNG,
)
from knowledge_storm.encoder import Encoder
from knowledge_storm.utils import load_api_key


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

def build_base_url(url: str, port: Optional[int] = None) -> str:
    """Normalize base URL and optionally append port."""
    if not url.startswith("http://") and not url.startswith("https://"):
        url = f"http://{url}"
    url = url.rstrip("/")
    if port and f":{port}" not in url.split("//", 1)[-1]:
        url = f"{url}:{port}"
    return url


def main(args):
    load_api_key(toml_file_path=args.secrets_file)
    lm_config: CollaborativeStormLMConfigs = CollaborativeStormLMConfigs()
    if args.llm_provider == "ollama" and args.ollama_model_dir:
        os.environ.setdefault("OLLAMA_MODELS", args.ollama_model_dir)

    if args.encoder_type == "hf_local" and args.embedding_cache_dir:
        os.environ.setdefault("HF_HOME", args.embedding_cache_dir)

    embedding_base_url = (
        build_base_url(args.embedding_base_url, args.embedding_port)
        if args.encoder_type == "ollama"
        else None
    )
    encoder_device = None if args.embedding_device == "auto" else args.embedding_device
    # Pass model via env for compatibility with older Encoder versions
    if args.embedding_model:
        os.environ["ENCODER_MODEL_NAME"] = args.embedding_model
    encoder = Encoder(
        encoder_type=args.encoder_type,
        api_base=embedding_base_url,
    )

    llm_provider = args.llm_provider.lower()
    if llm_provider == "ollama":
        llm_base_url = build_base_url(args.llm_url, args.llm_port)
        model_name = args.llm_model
        if not model_name.startswith("ollama/"):
            model_name = f"ollama/{model_name}"
        ollama_kwargs = {
            "base_url": llm_base_url,
            "temperature": args.llm_temperature,
            "top_p": args.llm_top_p,
            "model_type": "chat",
        }

        def build_lm(max_tokens: int):
            return LitellmModel(
                model=model_name,
                max_tokens=max_tokens,
                **ollama_kwargs,
            )

    elif llm_provider == "openai":
        openai_kwargs = {
            "api_key": os.getenv("OPENAI_API_KEY"),
            "api_provider": "openai",
            "temperature": args.llm_temperature,
            "top_p": args.llm_top_p,
            "api_base": None,
        }
        ModelClass = OpenAIModel
        gpt_4o_model_name = "gpt-4o"

        def build_lm(max_tokens: int):
            return ModelClass(
                model=gpt_4o_model_name, max_tokens=max_tokens, **openai_kwargs
            )

    elif llm_provider == "azure":
        openai_kwargs = {
            "api_key": os.getenv("AZURE_API_KEY"),
            "temperature": args.llm_temperature,
            "top_p": args.llm_top_p,
            "api_base": os.getenv("AZURE_API_BASE"),
            "api_version": os.getenv("AZURE_API_VERSION"),
        }
        ModelClass = AzureOpenAIModel
        gpt_4o_model_name = "gpt-4o"

        def build_lm(max_tokens: int):
            return ModelClass(
                model=gpt_4o_model_name, max_tokens=max_tokens, **openai_kwargs
            )

    else:
        raise ValueError(
            f'Invalid llm provider: {args.llm_provider}. Choose either "ollama", "openai", or "azure".'
        )

    question_answering_lm = build_lm(1000)
    discourse_manage_lm = build_lm(500)
    utterance_polishing_lm = build_lm(2000)
    warmstart_outline_gen_lm = build_lm(500)
    question_asking_lm = build_lm(300)
    knowledge_base_lm = build_lm(1000)

    lm_config.set_question_answering_lm(question_answering_lm)
    lm_config.set_discourse_manage_lm(discourse_manage_lm)
    lm_config.set_utterance_polishing_lm(utterance_polishing_lm)
    lm_config.set_warmstart_outline_gen_lm(warmstart_outline_gen_lm)
    lm_config.set_question_asking_lm(question_asking_lm)
    lm_config.set_knowledge_base_lm(knowledge_base_lm)

    topic = input("Topic: ")
    runner_argument = RunnerArgument(
        topic=topic,
        retrieve_top_k=args.retrieve_top_k,
        max_search_queries=args.max_search_queries,
        total_conv_turn=args.total_conv_turn,
        max_search_thread=args.max_search_thread,
        max_search_queries_per_turn=args.max_search_queries_per_turn,
        warmstart_max_num_experts=args.warmstart_max_num_experts,
        warmstart_max_turn_per_experts=args.warmstart_max_turn_per_experts,
        warmstart_max_thread=args.warmstart_max_thread,
        max_thread_num=args.max_thread_num,
        max_num_round_table_experts=args.max_num_round_table_experts,
        moderator_override_N_consecutive_answering_turn=args.moderator_override_N_consecutive_answering_turn,
        node_expansion_trigger_count=args.node_expansion_trigger_count,
    )
    logging_wrapper = LoggingWrapper(lm_config)
    callback_handler = (
        LocalConsolePrintCallBackHandler() if args.enable_log_print else None
    )

    match args.retriever:
        case "bing":
            rm = BingSearch(
                bing_search_api=os.getenv("BING_SEARCH_API_KEY"),
                k=runner_argument.retrieve_top_k,
            )
        case "you":
            rm = YouRM(
                ydc_api_key=os.getenv("YDC_API_KEY"), k=runner_argument.retrieve_top_k
            )
        case "brave":
            rm = BraveRM(
                brave_search_api_key=os.getenv("BRAVE_API_KEY"),
                k=runner_argument.retrieve_top_k,
            )
        case "duckduckgo":
            rm = DuckDuckGoSearchRM(
                k=runner_argument.retrieve_top_k, safe_search="On", region="us-en"
            )
        case "serper":
            rm = SerperRM(
                serper_search_api_key=os.getenv("SERPER_API_KEY"),
                query_params={"autocorrect": True, "num": 10, "page": 1},
            )
        case "tavily":
            rm = TavilySearchRM(
                tavily_search_api_key=os.getenv("TAVILY_API_KEY"),
                k=runner_argument.retrieve_top_k,
                include_raw_content=True,
            )
        case "searxng":
            rm = SearXNG(
                searxng_api_key=os.getenv("SEARXNG_API_KEY"),
                k=runner_argument.retrieve_top_k,
            )
        case _:
            raise ValueError(
                f'Invalid retriever: {args.retriever}. Choose either "bing", "you", "brave", "duckduckgo", "serper", "tavily", or "searxng"'
            )

    os.makedirs(args.output_dir, exist_ok=True)
    costorm_runner = CoStormRunner(
        lm_config=lm_config,
        runner_argument=runner_argument,
        logging_wrapper=logging_wrapper,
        rm=rm,
        encoder=encoder,
        callback_handler=callback_handler,
    )

    article = None
    instance_copy = None
    log_dump = None
    error_payload = None
    error_exc = None

    try:
        costorm_runner.warm_start()

        for _ in range(1):
            conv_turn = costorm_runner.step()
            print(f"**{conv_turn.role}**: {conv_turn.utterance}\n")

        your_utterance = input("Your utterance: ")
        costorm_runner.step(user_utterance=your_utterance)

        conv_turn = costorm_runner.step()
        print(f"**{conv_turn.role}**: {conv_turn.utterance}\n")

        costorm_runner.knowledge_base.reorganize()
        article = costorm_runner.generate_report()
    except Exception as exc:
        error_payload = {
            "error": str(exc),
            "traceback": traceback.format_exc(),
        }
        print(f"Run failed: {exc}")
        error_exc = exc
    finally:
        try:
            instance_copy = costorm_runner.to_dict()
        except Exception as e:
            instance_copy = instance_copy or {"error": f"instance_dump_failed: {e}"}
        try:
            log_dump = costorm_runner.dump_logging_and_reset()
        except Exception as e:
            log_dump = log_dump or {"error": f"log_dump_failed: {e}"}

        if article is not None:
            with open(os.path.join(args.output_dir, "report.md"), "w") as f:
                f.write(article)

        if instance_copy is not None:
            with open(os.path.join(args.output_dir, "instance_dump.json"), "w") as f:
                json.dump(instance_copy, f, indent=2)

        if log_dump is not None:
            with open(os.path.join(args.output_dir, "log.json"), "w") as f:
                json.dump(log_dump, f, indent=2)

        if error_payload is not None:
            with open(os.path.join(args.output_dir, "error.json"), "w") as f:
                json.dump(error_payload, f, indent=2)
            if error_exc is not None:
                raise error_exc


In [3]:

# Edit this list to match your desired CLI args.
arg_list = [
    "--retriever", "tavily",
    "--llm-provider", "ollama",
    "--llm-model", "gpt-oss:120b",
    "--llm-url", "http://localhost",
    "--llm-port", "11434",
    "--encoder-type", "hf_local",
    "--embedding-model", "/data/models/nvidia-llama-embed-nemotron-8b",
    "--secrets-file", "/data/coscientist/secrets.toml",
    "--output-dir", "./results/co-storm-notebook",
    "--enable_log_print",
]

parser = ArgumentParser()
parser.add_argument(
    "--output-dir",
    type=str,
    default="./results/co-storm",
    help="Directory to store the outputs.",
)
parser.add_argument(
    "--llm-provider",
    type=str,
    choices=["ollama", "openai", "azure"],
    default="ollama",
    help="LLM provider to use.",
)
parser.add_argument(
    "--llm-model",
    type=str,
    default="gpt-oss:120b",
    help="Model name for the selected LLM provider (for Ollama, omit the 'ollama/' prefix).",
)
parser.add_argument(
    "--llm-url",
    type=str,
    default="http://localhost",
    help="Base URL for the LLM service (used for Ollama).",
)
parser.add_argument(
    "--llm-port",
    type=int,
    default=11434,
    help="Port for the LLM service (used for Ollama).",
)
parser.add_argument(
    "--ollama-model-dir",
    type=str,
    default="/data/ollama/models",
    help="Directory where Ollama should store models.",
)
parser.add_argument(
    "--llm-temperature",
    type=float,
    default=1.0,
    help="Sampling temperature for the LLM.",
)
parser.add_argument(
    "--llm-top-p",
    type=float,
    default=0.9,
    help="Top-p for nucleus sampling.",
)
parser.add_argument(
    "--encoder-type",
    type=str,
    choices=["hf_local", "ollama", "openai", "azure"],
    default="hf_local",
    help="Embedding backend to use.",
)
parser.add_argument(
    "--embedding-model",
    type=str,
    default="/data/models/nvidia-llama-embed-nemotron-8b",
    help="Embedding model name or local path.",
)
parser.add_argument(
    "--embedding-base-url",
    type=str,
    default="http://localhost",
    help="Base URL for embedding service when encoder-type is ollama.",
)
parser.add_argument(
    "--embedding-port",
    type=int,
    default=11434,
    help="Port for embedding service when encoder-type is ollama.",
)
parser.add_argument(
    "--embedding-device",
    type=str,
    default="auto",
    help="Device for local embeddings (auto, cpu, cuda).",
)
parser.add_argument(
    "--embedding-cache-dir",
    type=str,
    default="/data/models",
    help="Cache directory / HF_HOME for local embedding models.",
)
parser.add_argument(
    "--secrets-file",
    type=str,
    default="/data/coscientist/secrets.toml",
    help="Path to secrets.toml for API keys.",
)
parser.add_argument(
    "--retriever",
    type=str,
    choices=["bing", "you", "brave", "serper", "duckduckgo", "tavily", "searxng"],
    default="duckduckgo",
    help="The search engine API to use for retrieving information.",
)
parser.add_argument(
    "--retrieve_top_k",
    type=int,
    default=10,
    help="Retrieve top k results for each query in retriever.",
)
parser.add_argument(
    "--max_search_queries",
    type=int,
    default=2,
    help="Maximum number of search queries to consider for each question.",
)
parser.add_argument(
    "--total_conv_turn",
    type=int,
    default=20,
    help="Maximum number of turns in conversation.",
)
parser.add_argument(
    "--max_search_thread",
    type=int,
    default=5,
    help="Maximum number of parallel threads for retriever.",
)
parser.add_argument(
    "--max_search_queries_per_turn",
    type=int,
    default=3,
    help="Maximum number of search queries to consider in each turn.",
)
parser.add_argument(
    "--warmstart_max_num_experts",
    type=int,
    default=3,
    help="Max number of experts in perspective-guided QA during warm start.",
)
parser.add_argument(
    "--warmstart_max_turn_per_experts",
    type=int,
    default=2,
    help="Max number of turns per perspective during warm start.",
)
parser.add_argument(
    "--warmstart_max_thread",
    type=int,
    default=3,
    help="Max number of threads for parallel perspective-guided QA during warm start.",
)
parser.add_argument(
    "--max_thread_num",
    type=int,
    default=10,
    help=(
        "Maximum number of threads to use. "
        "Consider reducing it if you keep getting 'Exceed rate limit' errors when calling the LM API."
    ),
)
parser.add_argument(
    "--max_num_round_table_experts",
    type=int,
    default=2,
    help="Max number of active experts in round table discussion.",
)
parser.add_argument(
    "--moderator_override_N_consecutive_answering_turn",
    type=int,
    default=3,
    help=(
        "Number of consecutive expert answering turns before the moderator overrides the conversation."
    ),
)
parser.add_argument(
    "--node_expansion_trigger_count",
    type=int,
    default=10,
    help="Trigger node expansion for nodes that contain more than N snippets.",
)
parser.add_argument(
    "--enable_log_print",
    action="store_true",
    help="If set, enable console log print.",
)

args = parser.parse_args(arg_list)
main(args)


sentence_transformers.SentenceTransformer : INFO     : Load pretrained SentenceTransformer: /data/models/nvidia-llama-embed-nemotron-8b
Loading checkpoint shards: 100%|██████████████████| 4/4 [00:01<00:00,  2.08it/s]
sentence_transformers.SentenceTransformer : INFO     : 1 prompt is loaded, with the key: query


Topic:  거대 언어모델에 대해서 설명해줘. 기술 특징에 대해서 조사해야해.


[92m08:39:46 - LiteLLM:INFO[0m: utils.py:3427 - 
LiteLLM completion() model= gpt-oss:120b; provider = ollama
LiteLLM : INFO     : 
LiteLLM completion() model= gpt-oss:120b; provider = ollama


Warm start update: Start getting familiar with the topic by chatting with multiple LLM experts (Step 1 / 4)


  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='Topic co... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...reasoning_content=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_json(
[92m08:40:32 - LiteLLM:INFO[0m: utils.py:1307 - Wrapper: Completed Call, calling success_handler
LiteLLM : INFO     : Wrapper: Completed Call, calling success_handler
  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='Topic co... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value

Error processing expert AI 연구자 (Transformer 및 모델 구조 전문가): 대규모 언어 모델의 핵심인 Transformer 아키텍처와 self‑attention 메커니즘, 파라미터 규모가 성능에 미치는 영향 등을 과학적 근거와 최신 연구 결과를 바탕으로 설명하고, 기술적 한계와 향후 발전 방향을 제시합니다.: Query is invalid.
Error processing expert AI 연구자 (Transformer 및 모델 구조 전문가): 대규모 언어 모델의 핵심인 Transformer 아키텍처와 self‑attention 메커니즘, 파라미터 규모가 성능에 미치는 영향 등을 과학적 근거와 최신 연구 결과를 바탕으로 설명하고, 기술적 한계와 향후 발전 방향을 제시합니다.: Query is invalid.


  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='대규... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...reasoning_content=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_json(
[92m08:41:40 - LiteLLM:INFO[0m: utils.py:1307 - Wrapper: Completed Call, calling success_handler
LiteLLM : INFO     : Wrapper: Completed Call, calling success_handler
  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='대규... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be 

Warm start update: Finish browsing https://aws.amazon.com/ko/blogs/tech/hyperaccel-fpga-on-aws/
Finish browsing https://velog.io/@hbcho/LLM-%EC%B6%94%EB%A1%A0-%EB%B9%84%EC%9A%A9-%EA%B5%AC%EC%A1%B0-%EC%99%84%EB%B2%BD-%EC%9D%B4%ED%95%B4-%EC%9D%B8%ED%92%8B%EA%B3%BC-%EC%95%84%EC%9B%83%ED%92%8B%EC%9D%80-%EC%99%9C-%EB%8B%A4%EB%A5%B4%EA%B2%8C-%EA%B3%BC%EA%B8%88%EB%90%A0%EA%B9%8C
Finish browsing https://blog.scatterlab.co.kr/serving-architecture-3
Finish browsing https://dytis.tistory.com/68
Finish browsing https://www.reddit.com/r/LocalLLaMA/comments/1cqu4zf/cloud_gpus_for_llm_finetuning_storage_cost_seems/?tl=ko
Finish browsing https://www.akamai.com/ko/glossary/what-is-a-large-language-model
Finish browsing https://www.oreilly.com/library/view/daegyumo-eoneo-modeleul/0642572313845/ch05.html
Finish browsing https://www.snowflake.com/ko/fundamentals/large-language-model/
Finish browsing https://gptskorea.com/BLOG/?idx=93998061&bmode=view
Finish browsing https://www.elastic.co/kr/what-is/large

[92m08:41:50 - LiteLLM:INFO[0m: utils.py:1307 - Wrapper: Completed Call, calling success_handler
LiteLLM : INFO     : Wrapper: Completed Call, calling success_handler
[92m08:41:51 - LiteLLM:INFO[0m: utils.py:3427 - 
LiteLLM completion() model= gpt-oss:120b; provider = ollama
LiteLLM : INFO     : 
LiteLLM completion() model= gpt-oss:120b; provider = ollama


Warm start update: Finish browsing https://culture.kookmin.ac.kr/gulmal/contests/1/applicants/179/attach
Finish browsing http://www.unfuture.org/1528
Finish browsing http://www.docdocdoc.co.kr/news/articleView.html?idxno=3024859
Finish browsing https://www.genetec.com/ko/blog/cybersecurity/the-implications-of-large-language-models-in-physical-security
Finish browsing https://blog.naver.com/wisestone2007/223398205862?viewType=pc


  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='', role=... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...reasoning_content=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_json(
[92m08:41:54 - LiteLLM:INFO[0m: utils.py:1307 - Wrapper: Completed Call, calling success_handler
LiteLLM : INFO     : Wrapper: Completed Call, calling success_handler
  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='', role=... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value

Error processing expert 윤리·정책 전문가 (사회·법적 영향 분석가): 거대 언어 모델의 윤리적 위험성, 데이터 프라이버시, 편향 및 책임 문제를 조명하고, 규제 프레임워크와 기업·정부 차원의 책임 있는 AI 활용 방안을 제시합니다.: Query is invalid.


[92m08:42:26 - LiteLLM:INFO[0m: utils.py:1307 - Wrapper: Completed Call, calling success_handler
LiteLLM : INFO     : Wrapper: Completed Call, calling success_handler
[92m08:42:26 - LiteLLM:INFO[0m: utils.py:3427 - 
LiteLLM completion() model= gpt-oss:120b; provider = ollama
LiteLLM : INFO     : 
LiteLLM completion() model= gpt-oss:120b; provider = ollama


Warm start update: Finish browsing https://medium.com/@hugmanskj/%EA%B1%B0%EB%8C%80-%EC%96%B8%EC%96%B4-%EB%AA%A8%EB%8D%B8-large-language-model-%EC%97%90-%EB%8C%80%ED%95%9C-%EC%9D%B4%ED%95%B4-llm%EC%9D%98-%EA%B8%B0%EC%A4%80%EA%B3%BC-%ED%8A%B9%EC%A7%95%EC%9D%80-%EB%AC%B4%EC%97%87%EC%9D%BC%EA%B9%8C-0551b7b9d3bd
Finish browsing https://aws.amazon.com/ko/what-is/large-language-model/
Finish browsing https://www.lgresearch.ai/blog/view?seq=351
Finish browsing https://blog.naver.com/jack0604/223476644379
Finish browsing https://ko.wikipedia.org/wiki/%EB%8C%80%ED%98%95_%EC%96%B8%EC%96%B4_%EB%AA%A8%EB%8D%B8
Finish browsing https://imasoftwareengineer.tistory.com/106
Finish browsing https://medium.com/@hugmanskj/%EA%B1%B0%EB%8C%80-%EC%96%B8%EC%96%B4-%EB%AA%A8%EB%8D%B8-large-language-model-%EC%97%90-%EB%8C%80%ED%95%9C-%EC%9D%B4%ED%95%B4-llm%EC%9D%98-%EA%B8%B0%EC%A4%80%EA%B3%BC-%ED%8A%B9%EC%A7%95%EC%9D%80-%EB%AC%B4%EC%97%87%EC%9D%BC%EA%B9%8C-0551b7b9d3bd
Finish browsing https://aws.amazon.com/ko/w

  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='Write th... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...reasoning_content=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_json(
[92m08:42:33 - LiteLLM:INFO[0m: utils.py:1307 - Wrapper: Completed Call, calling success_handler
LiteLLM : INFO     : Wrapper: Completed Call, calling success_handler
  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='Write th... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value

Warm start update: Inserting collected information into knowledge base (Step 3 / 4)


[92m08:42:41 - LiteLLM:INFO[0m: utils.py:3427 - 
LiteLLM completion() model= gpt-oss:120b; provider = ollama
LiteLLM : INFO     : 
LiteLLM completion() model= gpt-oss:120b; provider = ollama
[92m08:42:41 - LiteLLM:INFO[0m: utils.py:3427 - 
LiteLLM completion() model= gpt-oss:120b; provider = ollama
LiteLLM : INFO     : 
LiteLLM completion() model= gpt-oss:120b; provider = ollama
  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='Best pla... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...reasoning_content=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_json(
[92m08:42:43 - LiteLLM:INFO[0m: utils.py:1307 - Wrapper: Completed Call, calling success_handler
LiteLLM :

Warm start update: Synthesizing background information discussion utterances (Step 4 / 4)


  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='대규... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...reasoning_content=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_json(
[92m08:43:19 - LiteLLM:INFO[0m: utils.py:1307 - Wrapper: Completed Call, calling success_handler
LiteLLM : INFO     : Wrapper: Completed Call, calling success_handler
  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='대규... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be 

Start planning next expert; inspect mind map; inspect system state.


  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='**Brief ... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...reasoning_content=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_json(
[92m08:44:08 - LiteLLM:INFO[0m: utils.py:1307 - Wrapper: Completed Call, calling success_handler
LiteLLM : INFO     : Wrapper: Completed Call, calling success_handler
  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='**Brief ... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value

Start inserting information into mind map.
Finish inserting information into mind map.
**Moderator**: Building on that foundation, I’m curious—how are emerging tricks such as model sparsity, quantization, and retrieval‑augmented generation being used to trim inference costs while still preserving the performance gains we see with these



  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='Building... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...reasoning_content=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_python(


Your utterance:  좋은 방향입니다.


[92m08:45:33 - LiteLLM:INFO[0m: utils.py:3427 - 
LiteLLM completion() model= gpt-oss:120b; provider = ollama
LiteLLM : INFO     : 
LiteLLM completion() model= gpt-oss:120b; provider = ollama


Start planning next expert; inspect mind map; inspect system state.
Reviewing discourse history; Deciding utterance intent.
Start searching with the search engine; browsing collected information.


  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='Topic co... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...reasoning_content=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_json(
[92m08:45:38 - LiteLLM:INFO[0m: utils.py:1307 - Wrapper: Completed Call, calling success_handler
LiteLLM : INFO     : Wrapper: Completed Call, calling success_handler
  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='Topic co... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value

Finish browsing https://www.snowflake.com/ko/fundamentals/large-language-model/
Finish browsing https://ahha.ai/2024/07/17/llm/
Finish browsing https://www.mfitlab.com/solutions/blog/llm
Finish browsing https://www.bureauworks.com/ko/blog/daegyumo-eoneo-modeli-mueosingayo
Finish browsing https://pmc.ncbi.nlm.nih.gov/articles/PMC11473987/
Finish browsing https://medium.com/@hugmanskj/%EA%B1%B0%EB%8C%80-%EC%96%B8%EC%96%B4-%EB%AA%A8%EB%8D%B8-large-language-model-%EC%97%90-%EB%8C%80%ED%95%9C-%EC%9D%B4%ED%95%B4-llm%EC%9D%98-%EA%B8%B0%EC%A4%80%EA%B3%BC-%ED%8A%B9%EC%A7%95%EC%9D%80-%EB%AC%B4%EC%97%87%EC%9D%BC%EA%B9%8C-0551b7b9d3bd
Finish browsing https://aws.amazon.com/ko/what-is/large-language-model/
Finish browsing https://www.lgresearch.ai/blog/view?seq=351
Finish browsing https://blog.naver.com/jack0604/223476644379
Finish browsing https://ko.wikipedia.org/wiki/%EB%8C%80%ED%98%95_%EC%96%B8%EC%96%B4_%EB%AA%A8%EB%8D%B8


  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='거대 ... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...reasoning_content=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_json(
[92m08:45:53 - LiteLLM:INFO[0m: utils.py:1307 - Wrapper: Completed Call, calling success_handler
LiteLLM : INFO     : Wrapper: Completed Call, calling success_handler
  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='거대 ... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not b

Finish generating utterance from collected information.
Start polishing utterance.


  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='맞아... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...reasoning_content=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_json(
[92m08:46:01 - LiteLLM:INFO[0m: utils.py:1307 - Wrapper: Completed Call, calling success_handler
LiteLLM : INFO     : Wrapper: Completed Call, calling success_handler
  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='맞아... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be 

Start inserting information into mind map.


  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='', role=... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...reasoning_content=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_python(
  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='Best pla... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...reasoning_content=None)), input_type=Choices])
  return self.__pydantic_serializ

Finish inserting information into mind map.
**General Knowledge Provider**: 맞아요, 거대 언어모델(Large Language Model, LLM)은 수십 ~ 수조 파라미터와 수천억 ~ 수조 토큰 규모의 텍스트 코퍼스를 사전 학습해 다음 토큰을 예측함으로써 맥락·문맥을 깊이 이해하고 고급 추론까지 수행합니다[26][31].  

- **전이·Few‑shot·Zero‑shot**: 사전 학습된 지식을 그대로 다양한 다운스트림 작업에 적용하고, 별도 미세조정 없이도 몇 개의 예시만으로 새로운 작업을 해결합니다[28].  
- **다목적·유연성**: 하나의 모델이 질의응답, 요약, 번역, 문장 완성 등 서로 다른 작업을 동일 파라미터 집합으로 처리합니다[29].  
- **규모**: 예를 들어 Google PaLM 2는 340 billion 파라미터·3.6 trillion 토큰, Meta LLaMA는 65 billion 파라미터·1.4 trillion 토큰으로 학습되었습니다[32].  

**산업 적용**  
- 데이터 사이언스 → 감성 분석·NLU·비구조화 데이터 구조화 등 자동화·고도화[23][25].  
- 제조업 → 예측 유지보수·품질 관리·프로세스 최적화[24].  
- 고객 서비스·교육·의료·창작 → 맞춤형 챗봇·자동 번역·콘텐츠 생성 등 혁신적 서비스 제공[26][27].  

요약하면, LLM은 대규모 파라미터·토큰 기반 다중 작업 학습을 통해 문맥 이해·추론·전이 학습을 구현하고, 다양한 산업에서 업무 효율성과 혁신을 촉진하는 핵심 기술이라 할 수 있습니다[23][28][31].



  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='Scaling ... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...reasoning_content=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_json(
[92m08:46:23 - LiteLLM:INFO[0m: utils.py:1307 - Wrapper: Completed Call, calling success_handler
LiteLLM : INFO     : Wrapper: Completed Call, calling success_handler
  PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='Scaling ... reasoning_content=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value