In [1]:
from typing_extensions import TypedDict
from langgraph.graph import StateGraph, START, END
from IPython.display import Image, display
from langchain_community.chat_models.tongyi import ChatTongyi

llm = ChatTongyi( # type: ignore
    model="qwen3-1.7b", # 这里用qwen3-0.6b似乎不支持structed_llm
    model_kwargs={
        "temperature": 0.0,
        "enable_thinking": False,
    },
)

In [2]:
from typing import Annotated, List
import operator
from pydantic import BaseModel, Field

# Schema for structured output to use in planning
class Section(BaseModel):
    name: str = Field(
        description="Name for this section of the report.",
    )
    description: str = Field(
        description="Brief overview of the main topics and concepts to be covered in this section.",
    )


class Sections(BaseModel):
    sections: List[Section] = Field(
        description="Sections of the report.",
    )


# Augment the LLM with schema for structured output
planner = llm.with_structured_output(Sections)

编排器-工作节点工作流很常见，LangGraph 内置了对它们的支持。Send Send 允许您动态创建工作节点并向其发送特定的输入。每个工作节点都有自己的状态，所有工作节点的输出都会写入一个共享的状态键，编排器图可以访问该状态键。这使得编排器能够访问所有工作节点的输出，并将它们合成为最终输出。下面的示例遍历一个节列表，并使用 Send API 将节发送给每个工作节点。

In [3]:
from langgraph.types import Send

In [4]:
# Graph state
class State(TypedDict):
    topic: str  # Report topic
    sections: list[Section]  # List of report sections
    completed_sections: Annotated[
        list, operator.add
    ]  # All workers write to this key in parallel
    final_report: str  # Final report


# Worker state
class WorkerState(TypedDict):
    section: Section
    completed_sections: Annotated[list, operator.add]

In [5]:
from langchain.messages import SystemMessage, HumanMessage
# Nodes
def orchestrator(state: State):
    """Orchestrator that generates a plan for the report"""

    # Generate queries
    report_sections = planner.invoke(
        [
            SystemMessage(content="Generate a plan for the report."),
            HumanMessage(content=f"Here is the report topic: {state['topic']}"),
        ]
    )

    return {"sections": report_sections.sections}


def llm_call(state: WorkerState):
    """Worker writes a section of the report"""

    # Generate section
    section = llm.invoke(
        [
            SystemMessage(
                content="Write a report section following the provided name and description. Include no preamble for each section. Use markdown formatting."
            ),
            HumanMessage(
                content=f"Here is the section name: {state['section'].name} and description: {state['section'].description}"
            ),
        ]
    )

    # Write the updated section to completed sections
    return {"completed_sections": [section.content]}


def synthesizer(state: State):
    """Synthesize full report from sections"""

    # List of completed sections
    completed_sections = state["completed_sections"]

    # Format completed section to str to use as context for final sections
    completed_report_sections = "\n\n---\n\n".join(completed_sections)

    return {"final_report": completed_report_sections}


# Conditional edge function to create llm_call workers that each write a section of the report
def assign_workers(state: State):
    """Assign a worker to each section in the plan"""

    # Kick off section writing in parallel via Send() API
    return [Send("llm_call", {"section": s}) for s in state["sections"]]



In [6]:
# Build workflow
orchestrator_worker_builder = StateGraph(State)

In [7]:
# Add the nodes
orchestrator_worker_builder.add_node("orchestrator", orchestrator)
orchestrator_worker_builder.add_node("llm_call", llm_call)
orchestrator_worker_builder.add_node("synthesizer", synthesizer)

# Add edges to connect nodes
orchestrator_worker_builder.add_edge(START, "orchestrator")
orchestrator_worker_builder.add_conditional_edges(
    "orchestrator", assign_workers, ["llm_call"]
)
orchestrator_worker_builder.add_edge("llm_call", "synthesizer")
orchestrator_worker_builder.add_edge("synthesizer", END)

# Compile the workflow
orchestrator_worker = orchestrator_worker_builder.compile()

In [8]:
# Show the workflow
display(Image(orchestrator_worker.get_graph().draw_mermaid_png()))

ValueError: Failed to reach https://mermaid.ink API while trying to render your graph after 1 retries. To resolve this issue:
1. Check your internet connection and try again
2. Try with higher retry settings: `draw_mermaid_png(..., max_retries=5, retry_delay=2.0)`
3. Use the Pyppeteer rendering method which will render your graph locally in a browser: `draw_mermaid_png(..., draw_method=MermaidDrawMethod.PYPPETEER)`

In [9]:
# Invoke
state = orchestrator_worker.invoke({"topic": "Create a report on LLM scaling laws in chinese"})

from IPython.display import Markdown
Markdown(state["final_report"])


The introduction to Large Language Models (LLMs) provides a foundational understanding of their architecture, capabilities, and the role they play in modern natural language processing. LLMs are trained on vast amounts of text data, allowing them to understand and generate human-like text across a wide range of topics and contexts. These models are typically based on transformer architectures, which enable efficient parallel processing of large volumes of data, leading to improved performance and scalability.

Scaling laws are central to the study of LLMs and their performance characteristics. These laws describe how model performance, such as accuracy, efficiency, and generalization ability, changes with increasing model size. Generally, there is a trend where larger models achieve better performance, but this improvement often comes at the cost of increased computational resources and training time. The relationship between model size and performance is not linear, and factors such as training data, optimization techniques, and model architecture significantly influence the outcomes.

Understanding scaling laws is crucial for researchers and practitioners aiming to develop more efficient and effective LLMs. It helps in making informed decisions about model size, training strategies, and resource allocation, ensuring that the benefits of larger models are realized without excessive computational overhead.

---

**Key Scaling Laws in LLMs**

Scaling laws describe how the performance and efficiency of large language models (LLMs) change with increasing model size, training data, and computational resources. These laws are critical for understanding the trade-offs involved in deploying and optimizing LLMs.

1. **Training Data Scaling**: The performance of LLMs typically scales with the amount of training data. As more data is added, the model becomes better at capturing complex patterns, improving its ability to generate coherent and contextually relevant responses. However, the rate of improvement diminishes over time, a phenomenon known as the "curse of dimensionality." This means that while more data leads to better performance, the marginal gains decrease as the dataset grows larger.

2. **Model Size Scaling**: The performance of LLMs often scales with the number of parameters in the model. Larger models generally have better accuracy and generalization capabilities, but they also require more computational resources and memory. The relationship between model size and performance is not linear; instead, it follows a non-linear trend. For example, doubling the number of parameters may result in a significant increase in performance, but the gains become less pronounced as the model size increases further.

3. **Computational Resource Scaling**: The computational requirements for training and inference scale with the model size and the complexity of the tasks being performed. Training larger models requires more time, energy, and hardware resources. This has led to the development of efficient training techniques, such as model parallelism, quantization, and knowledge distillation, which aim to reduce the computational burden while maintaining or improving performance.

These scaling laws highlight the importance of balancing model size, training data, and computational resources to achieve optimal performance. Understanding these relationships is essential for researchers, developers, and practitioners working with large language models.

---

```markdown
# Empirical Studies on LLM Scaling

This section summarizes empirical studies and experiments that have investigated the scaling laws of Large Language Models (LLMs), highlighting trends and patterns observed in real-world applications. Key findings include:

- **Training Data and Model Performance**: Studies have shown a strong correlation between the amount of training data and model performance, with larger models generally achieving higher accuracy on tasks such as text generation, reasoning, and multi-task learning. However, the gains diminish as the model size increases beyond a certain point.

- **Parameter Count and Efficiency**: There is evidence that increasing the number of parameters in an LLM does not always lead to proportional improvements in performance. Some models with more parameters exhibit diminishing returns, suggesting that efficiency in parameter usage and training cost is critical.

- **Task-Specific Scaling**: The scaling behavior varies across different tasks. For example, models trained on tasks requiring long-range dependencies (e.g., reading comprehension) show different scaling patterns compared to those focused on short-term reasoning or generation.

- **Real-World Applications**: Empirical studies have demonstrated that LLMs deployed in real-world scenarios often exhibit consistent scaling trends, with performance improvements aligning with the expected growth in model size and training data. This supports the notion that scaling laws are not only theoretical but also observable in practical deployment settings.

- **Generalization and Transfer Learning**: Research has indicated that while larger models may outperform smaller ones on specific tasks, they can sometimes struggle with generalization and transfer learning across diverse domains. This suggests that scaling laws must be considered alongside other factors like domain adaptation and fine-tuning.

These findings underscore the importance of understanding the empirical relationship between model size, training data, and performance, and how these relationships evolve in real-world applications.
```

---

**Challenges and Limitations of Scaling Laws**

Scaling laws in the context of Large Language Models (LLMs) refer to the observed relationship between model size and performance, typically expressed as $ P \propto N^\alpha $, where $ P $ is performance and $ N $ is model size. However, applying these laws to LLMs presents several challenges and limitations that hinder their effective scaling.

One major challenge is **generalization**. While larger models may perform better on training data, they often fail to generalize well to new tasks or domains. This is due to the complexity of real-world scenarios, which are not fully captured by the training data. The increased model size can lead to overfitting, where the model becomes too specialized to the training data and performs poorly on unseen data.

Another significant limitation is **bias**. Scaling laws suggest that larger models should be more accurate and less biased, but in practice, biases present in the training data can persist and even amplify with model size. This results in models that may exhibit unfair or discriminatory behavior in their outputs, which is a critical concern in AI ethics and fairness.

Additionally, **resource constraints** play a crucial role. Scaling LLMs requires substantial computational resources, including powerful GPUs and large amounts of memory. These requirements make it difficult to scale models beyond a certain point, especially for smaller organizations or institutions with limited infrastructure. The cost of training and deploying large models is also prohibitive, limiting their accessibility and practical application.

In summary, while scaling laws offer a theoretical framework for understanding model performance, they are not without limitations. The challenges of generalization, bias, and resource constraints necessitate careful consideration and alternative approaches to model development and deployment.

---

**Future Directions in LLM Scaling Research**

The future of Large Language Models (LLMs) lies in the continued exploration of scaling laws and their implications for model efficiency, performance, and generalization. While current research has made significant strides in understanding how model size impacts performance, there remains a need for deeper insights into the underlying mechanisms that govern these relationships. Future research should focus on:

1. **Enhanced Understanding of Scaling Laws**: Investigating the precise relationship between model size, training data, and performance metrics, particularly in diverse tasks and domains. This includes exploring how different components of the model—such as attention mechanisms, parameter counts, and training strategies—interact with scaling behaviors.

2. **Efficient Scaling Strategies**: Developing techniques to scale models without excessive computational or memory costs. This could involve innovations in quantization, pruning, and knowledge distillation that allow for larger models to be trained and deployed more efficiently.

3. **Generalization and Robustness**: Studying how scaling affects model robustness, adaptability, and generalization across different tasks and datasets. This is crucial for ensuring that larger models maintain performance and reliability in real-world applications.

4. **Interpretable Scaling Analysis**: Creating tools and frameworks to analyze and interpret scaling patterns in a more transparent manner. This would aid in identifying which aspects of the model contribute most to performance gains and guide future design decisions.

5. **Cross-Domain and Cross-Task Scaling**: Examining how scaling laws vary across different domains (e.g., text, code, images) and tasks (e.g., reasoning, generation, translation). This will help in tailoring scaling strategies to specific use cases and environments.

6. **Long-Term Memory and Knowledge Retention**: Exploring how scaling impacts the model's ability to retain and utilize long-term memory and prior knowledge, which is critical for tasks requiring contextual understanding and reasoning.

7. **Ethical and Fairness Considerations**: Addressing the ethical implications of scaling, including bias, fairness, and the potential for unintended consequences in large-scale models. This involves developing frameworks to ensure that scaling efforts align with ethical standards and societal needs.

By advancing these research directions, the field of LLMs can move toward more sustainable, efficient, and ethically responsible scaling strategies that maximize benefits while minimizing risks.

In [11]:
print(state["final_report"])

The introduction to Large Language Models (LLMs) provides a foundational understanding of their architecture, capabilities, and the role they play in modern natural language processing. LLMs are trained on vast amounts of text data, allowing them to understand and generate human-like text across a wide range of topics and contexts. These models are typically based on transformer architectures, which enable efficient parallel processing of large volumes of data, leading to improved performance and scalability.

Scaling laws are central to the study of LLMs and their performance characteristics. These laws describe how model performance, such as accuracy, efficiency, and generalization ability, changes with increasing model size. Generally, there is a trend where larger models achieve better performance, but this improvement often comes at the cost of increased computational resources and training time. The relationship between model size and performance is not linear, and factors such a