In [2]:
import re

In [None]:
https://www.transparenttextures.com/

https://xinzhang-ops.github.io/daily_paper/

In [3]:
content = """
1. 📘 Topic and Domain: The paper focuses on improving the temporal quality of generated videos, specifically addressing temporal coherence and diversity, within the domain of video generation. 2. 💡 Previous Research and New Ideas: The paper builds upon existing video generation models (U-Net, DiT, AR-based) and temporal refinement techniques (architecture-centric modeling, physics-informed regularization, training dynamics optimization) and proposes FLUX FLOW, a novel data-level temporal augmentation strategy. 3. ❓ Problem: The paper aims to solve the problem of temporal artifacts (flickering, discontinuous motion, repetitive dynamics) and limited temporal diversity in videos produced by existing video generation models. 4. 🛠️ Methods: The authors used FLUX FLOW, which introduces controlled temporal perturbations at the data level through frame-level (random shuffling of frames) and block-level (reordering of frame blocks) operations during training. 5. 📊 Results and Evaluation: FLUX FLOW significantly improved temporal coherence and diversity across various video generation models on UCF-101 and VBench benchmarks, while maintaining or improving spatial fidelity, as evaluated using metrics like FVD, IS, and various VBench dimensions, supported by user studies.
"""

In [5]:
content_formatted = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', content)
content_formatted

'\n1. 📘 Topic and Domain: The paper focuses on improving the temporal quality of generated videos, specifically addressing temporal coherence and diversity, within the domain of video generation. 2. 💡 Previous Research and New Ideas: The paper builds upon existing video generation models (U-Net, DiT, AR-based) and temporal refinement techniques (architecture-centric modeling, physics-informed regularization, training dynamics optimization) and proposes FLUX FLOW, a novel data-level temporal augmentation strategy. 3. ❓ Problem: The paper aims to solve the problem of temporal artifacts (flickering, discontinuous motion, repetitive dynamics) and limited temporal diversity in videos produced by existing video generation models. 4. 🛠️ Methods: The authors used FLUX FLOW, which introduces controlled temporal perturbations at the data level through frame-level (random shuffling of frames) and block-level (reordering of frame blocks) operations during training. 5. 📊 Results and Evaluation: FLU

In [6]:
re.split(r'(?=\d+\.\s*[📘💡❓🛠️📊])', content_formatted)

['\n',
 '1. 📘 Topic and Domain: The paper focuses on improving the temporal quality of generated videos, specifically addressing temporal coherence and diversity, within the domain of video generation. ',
 '2. 💡 Previous Research and New Ideas: The paper builds upon existing video generation models (U-Net, DiT, AR-based) and temporal refinement techniques (architecture-centric modeling, physics-informed regularization, training dynamics optimization) and proposes FLUX FLOW, a novel data-level temporal augmentation strategy. ',
 '3. ❓ Problem: The paper aims to solve the problem of temporal artifacts (flickering, discontinuous motion, repetitive dynamics) and limited temporal diversity in videos produced by existing video generation models. ',
 '4. 🛠️ Methods: The authors used FLUX FLOW, which introduces controlled temporal perturbations at the data level through frame-level (random shuffling of frames) and block-level (reordering of frame blocks) operations during training. ',
 '5. 📊 R

In [32]:
import os
import re
import json
import uuid
import time
import subprocess
from string import Template 
from tqdm import tqdm
from loguru import logger
from datetime import date


# HTML 模板：主页面，使用 $ 作为占位符
INDEX_TEMPLATE = """
<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Daily Paper</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
            line-height: 1.6;
        }
        h1 {
            text-align: center;
            color: #333;
        }
        ul {
            list-style: none;
            padding: 0;
        }
        li {
            margin: 10px 0;
        }
        a {
            text-decoration: none;
            color: #1a73e8;
        }
        a:hover {
            text-decoration: underline;
        }
    </style>
</head>
<body>
    <h1>Daily Paper</h1>
    <ul>
        $date_links
    </ul>
</body>
</html>
"""

# HTML 模板：子页面，使用 $ 作为占位符
SUBPAGE_TEMPLATE = """
<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>$date 论文推送</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
            line-height: 1.6;
        }
        h1 {
            color: #333;
        }
        .paper-card {
            background-color: #f9f9f9;
            border: 1px solid #ddd;
            border-radius: 5px;
            padding: 15px;
            margin-bottom: 20px;
            transition: transform 0.2s, box-shadow 0.2s; /* Smooth transition for hover effect */
        }
        .paper-card:hover {
            transform: translateY(-5px); /* Lift effect on hover */
            box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2); /* Shadow on hover */
        }
        .paper-card h2 {
            margin: 0 0 10px;
            font-size: 1.2em;
        }
        .paper-card p {
            margin: 5px 0;
        }
        .paper-card a {
            color: #1a73e8;
            text-decoration: none;
        }
        .paper-card a:hover {
            text-decoration: underline;
        }
        .category-chunk {
            padding: 10px;
            margin: 5px 0;
            border-radius: 5px;
            transition: transform 0.2s, box-shadow 0.2s; /* Smooth transition for hover effect */
        }
        .category-chunk:hover {
            transform: translateY(-3px); /* Slightly smaller lift for categories */
            box-shadow: 0 3px 10px rgba(0, 0, 0, 0.15); /* Slightly smaller shadow for categories */
        }
        .category-chunk:nth-child(1) { /* 1. Topic and Domain */
            background-color: #d3e3fd; /* Blue */
        }
        .category-chunk:nth-child(2) { /* 2. Previous Research and New Ideas */
            background-color: #e6d6fa; /* Purple */
        }
        .category-chunk:nth-child(3) { /* 3. Problem */
            background-color: #d4f8d9; /* Green */
        }
        .category-chunk:nth-child(4) { /* 4. Methods */
            background-color: #ffd7d5; /* Pink */
        }
        .category-chunk:nth-child(5) { /* 5. Results and Evaluation */
            background-color: #d3e3fd; /* Reuse Blue */
        }
    </style>
</head>
<body>
    <h1>$date 论文推送</h1>
    $paper_content
</body>
</html>
"""


def extract_categories(text):
    """
    Extract the 5 categories and their content from a formatted text string.
    
    Each category is identified by its unique emoji (📘, 💡, ❓, 🛠️, 📊) regardless of the
    exact title text. The function maps these to standardized category names.
    
    Args:
        text (str): The input text containing the 5 categories
        
    Returns:
        dict: A dictionary with standardized category titles as keys and their content as values
    """
    # Define patterns based on emojis only, not the category titles
    patterns = [
        (r'\d+\.\s+\*\*📘.*?\*\*\s+(.*?)(?=\n\n\d+\.|\Z)', "📘 Topic and Domain",),
        (r'\d+\.\s+\*\*💡.*?\*\*\s+(.*?)(?=\n\n\d+\.|\Z)', "💡 Previous Research and New Ideas"),
        (r'\d+\.\s+\*\*❓.*?\*\*\s+(.*?)(?=\n\n\d+\.|\Z)', "❓ Problem"),
        (r'\d+\.\s+\*\*🛠️.*?\*\*\s+(.*?)(?=\n\n\d+\.|\Z)', "🛠️ Methods"),
        (r'\d+\.\s+\*\*📊.*?\*\*\s+(.*?)(?=\n\n|\Z)', "📊 Results and Evaluation")
    ]
    
    # Create a dictionary to store results
    results = []
    
    # Apply each pattern and store results with standardized category names
    for pattern, category_name in patterns:
        match = re.search(pattern, text, re.DOTALL)
        if match:
            results.append((category_name, match.group(1).strip()))    
    return results



def generate_paper_html(articles):
    """生成子页面的论文内容 HTML，与 Google Chat 推送内容一致"""
    # logger.debug(articles)
    paper_html = ""
    for idx, article in enumerate(articles):
        title = article.get('title', 'No Title')
        published_at = article.get('published_at', 'No Date')
        url = article.get('url', '#')
        content = article.get('content', 'No Content')
        categories = extract_categories(content)
        # 为每个类别添加 div 和样式
        content_html = ""
        for idx, (cat, cat_content) in enumerate(categories):
            content_html += f"""<div class="category-chunk">{idx+1}.  <strong>{cat}:</strong> {cat_content}</div>"""

        paper_html += f"""
        <div class="paper-card">
            <h2>Paper: {idx+1}</h2>
            <p><strong>{title}</strong></p>
            <p><strong>Published: </strong>{published_at}</p>
            <p><strong>Link: </strong><a href="{url}" target="_blank">{url}</a></p>
            <div>{content_html}</div>
        </div>
        """
    return paper_html

In [33]:
articles = [{'title': 'Temporal Regularization Makes Your Video Generator Stronger', 'published_at': '2025-03-19', 'url': 'http://arxiv.org/pdf/2503.15417', 'content': '1.  **📘 Topic and Domain:** The paper focuses on temporal data augmentation for video generation, specifically within the domain of computer vision and deep learning.\n\n2.  **💡 Previous Research and New Ideas:** The paper builds on existing video generation models (U-Net, DiT, AR-based) and proposes FLUX FLOW, a novel temporal augmentation strategy that perturbs frame order during training.\n\n3.  **❓ Problem:** The paper aims to solve the problem of temporal inconsistency and limited temporal diversity in generated videos, such as flickering and unnatural motion.\n\n4.  **🛠️ Methods:** The authors used FLUX FLOW, which includes frame-level and block-level temporal perturbations, applied as a pre-processing step during the training of video generation models.\n\n5.  **📊 Results and Evaluation:** FLUX FLOW significantly improved temporal coherence and diversity across various video generation models, as evaluated on UCF-101 and VBench benchmarks using metrics like FVD, IS, and user studies.\n'}, {'title': 'Optimizing Decomposition for Optimal Claim Verification', 'published_at': '2025-03-19', 'url': 'http://arxiv.org/pdf/2503.15354', 'content': 'Here\'s a concise analysis of the paper based on your requested format:\n\n1.  **📘 Topic and Domain:** The paper focuses on fact-checking of long-form text, specifically optimizing the decomposition stage within the "Decompose-Then-Verify" paradigm in the domain of Natural Language Processing.\n\n2.  **💡 Previous Research and New Ideas:** The paper builds upon existing "Decompose-Then-Verify" fact-checking methods that use handcrafted prompts, and proposes a novel reinforcement learning framework (dynamic decomposition) to learn a decomposition policy tailored to the verifier, introducing the concept of "atomicity" to quantify information density.\n\n3.  **❓ Problem:** The paper aims to solve the misalignment between decomposers and verifiers in existing fact-checking systems, where static decomposition policies don\'t generate subclaims with optimal atomicity for downstream verification.\n\n4.  **🛠️ Methods:** The authors used a reinforcement learning (RL) framework, specifically Proximal Policy Optimization (PPO) in an Advantage Actor-Critic (A2C) style, to train a dynamic decomposition policy that interacts with a verifier and receives feedback.\n\n5.  **📊 Results and Evaluation:** The results, evaluated on verification confidence and accuracy across various datasets and verifiers, show that dynamic decomposition outperforms existing static decomposition policies, improving verification confidence by 0.07 and accuracy by 0.12 on average.\n'}]

In [34]:
articles[0]['content']

'1.  **📘 Topic and Domain:** The paper focuses on temporal data augmentation for video generation, specifically within the domain of computer vision and deep learning.\n\n2.  **💡 Previous Research and New Ideas:** The paper builds on existing video generation models (U-Net, DiT, AR-based) and proposes FLUX FLOW, a novel temporal augmentation strategy that perturbs frame order during training.\n\n3.  **❓ Problem:** The paper aims to solve the problem of temporal inconsistency and limited temporal diversity in generated videos, such as flickering and unnatural motion.\n\n4.  **🛠️ Methods:** The authors used FLUX FLOW, which includes frame-level and block-level temporal perturbations, applied as a pre-processing step during the training of video generation models.\n\n5.  **📊 Results and Evaluation:** FLUX FLOW significantly improved temporal coherence and diversity across various video generation models, as evaluated on UCF-101 and VBench benchmarks using metrics like FVD, IS, and user st

In [35]:
articles[1]['content']

'Here\'s a concise analysis of the paper based on your requested format:\n\n1.  **📘 Topic and Domain:** The paper focuses on fact-checking of long-form text, specifically optimizing the decomposition stage within the "Decompose-Then-Verify" paradigm in the domain of Natural Language Processing.\n\n2.  **💡 Previous Research and New Ideas:** The paper builds upon existing "Decompose-Then-Verify" fact-checking methods that use handcrafted prompts, and proposes a novel reinforcement learning framework (dynamic decomposition) to learn a decomposition policy tailored to the verifier, introducing the concept of "atomicity" to quantify information density.\n\n3.  **❓ Problem:** The paper aims to solve the misalignment between decomposers and verifiers in existing fact-checking systems, where static decomposition policies don\'t generate subclaims with optimal atomicity for downstream verification.\n\n4.  **🛠️ Methods:** The authors used a reinforcement learning (RL) framework, specifically Pro

In [36]:
print(generate_paper_html(articles))


        <div class="paper-card">
            <h2>Paper: 5</h2>
            <p><strong>Temporal Regularization Makes Your Video Generator Stronger</strong></p>
            <p><strong>Published: </strong>2025-03-19</p>
            <p><strong>Link: </strong><a href="http://arxiv.org/pdf/2503.15417" target="_blank">http://arxiv.org/pdf/2503.15417</a></p>
            <div><div class="category-chunk">1.  <strong>📘 Topic and Domain:</strong> The paper focuses on temporal data augmentation for video generation, specifically within the domain of computer vision and deep learning.</div><div class="category-chunk">2.  <strong>💡 Previous Research and New Ideas:</strong> The paper builds on existing video generation models (U-Net, DiT, AR-based) and proposes FLUX FLOW, a novel temporal augmentation strategy that perturbs frame order during training.</div><div class="category-chunk">3.  <strong>❓ Problem:</strong> The paper aims to solve the problem of temporal inconsistency and limited temporal di

In [None]:
target = """
<div class="paper-card">
    <h2>Paper: 1</h2>
    <p><strong>Temporal Regularization Makes Your Video Generator Stronger</strong></p>
    <p><strong>Published: </strong>2025-03-19</p>
    <p><strong>Link: </strong><a href="http://arxiv.org/pdf/2503.15417" target="_blank">http://arxiv.org/pdf/2503.15417</a></p>
    <div>
        <div class="category-chunk">1.  <strong>📘 Topic and Domain:</strong> The paper focuses on improving the temporal quality of video generation, specifically addressing temporal coherence and diversity, within the domain of computer vision and deep learning.</div>
        <div class="category-chunk">2.  <strong>💡 Previous Research and New Ideas:</strong> The paper builds on existing video generation models (U-Net, DiT, AR-based) and temporal refinement techniques (architecture-centric modeling, physics-informed regularization, training dynamics optimization), and proposes FLUX FLOW, a novel data-level temporal augmentation strategy.</div>
        <div class="category-chunk">3.  <strong>❓ Problem:</strong> The paper aims to solve the problem of temporal artifacts (flickering, discontinuous motion) and limited temporal diversity in videos generated by current video generation models.</div>
        <div class="category-chunk">4.  <strong>🛠️ Methods:</strong> The authors used FLUX FLOW, which involves frame-level and block-level temporal perturbations (random shuffling of frames or blocks of frames) during the training of video generation models.</div>
        <div class="category-chunk">5.  <strong>📊 Results and Evaluation:</strong> FLUX FLOW significantly improved temporal coherence and diversity across various video generation models, as evaluated on UCF-101 and VBench benchmarks using metrics like FVD, IS, and various VBench temporal and frame-wise quality scores, and was further supported by a user study.</div>
    </div>
</div>
"""

In [1]:
import json
summaries = []
with open('summaries.jsonl', 'r') as file:
    for line in file:
        summaries.append(json.loads(line.strip()))

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()
GENAI_GATEWAY_API_KEY = os.getenv("GENAI_GATEWAY_API_KEY")
os.environ['OPENAI_API_KEY'] = GENAI_GATEWAY_API_KEY

In [3]:
import os
from geotab_genai.genai_gateway_client import GenaiGatewayClient


claude35_sonnet_v2 = GenaiGatewayClient(
    api_key=os.getenv("GENAI_GATEWAY_API_KEY"),
    env="staging",
    jurisdiction="us",
    temperature=0.8,
    provider='anthropics',
    chat_model='claude-3-5-sonnet-v2',
    max_tokens=8192,
    safety_filtering='off'
)


claude37_sonnet = GenaiGatewayClient(
    api_key=os.getenv("GENAI_GATEWAY_API_KEY"),
    env="staging",
    jurisdiction="us",
    temperature=0.8,
    provider='anthropics',
    chat_model='claude-3-7-sonnet',
    max_tokens=8192,
    safety_filtering='off'
)

gemini_20_pro = GenaiGatewayClient(
    api_key=os.getenv("GENAI_GATEWAY_API_KEY"),
    env="staging",
    jurisdiction="us",
    temperature=0.8,
    provider='vertex-ai',
    chat_model='gemini-2.0-pro',
    max_tokens=8192,
    safety_filtering='off'
)


model_map = {
    'claude35': claude35_sonnet_v2,
    # 'claude35_haiku': claude35_haiku,
    'claude37': claude37_sonnet,
    '2.0 pro': gemini_20_pro,
}


def model_response(prompt, model_name, max_tokens=4096):
    model = model_map[model_name]
    version = None
    if model_name == '2.0 flash':
        version = '001'
    response = model.create_message(
        messages=[{"role": "user", "content": prompt}],
        max_tokens=max_tokens,
        provider=model._provider,
        model=model._chat_model,
        version=version,

    )['message']['content']
    return response



In [6]:
with open('test_article.txt', 'r') as f:
    test_article = f.read()

In [None]:
system_prompt = """
You are a research assistant. You job is to help me to create a flow chart of the paper content. 
Since it is about the workflow of the paper, your focus is the method applied in the paper.

Your should contain your answer in a SVG format as following format:
<format>
you should have your output with this specific <svg> tag.

<svg width="100%" viewBox="0 0 1000 800">
Here are the content you can create freely, use all shapes, text format or styles as you like.
Try to be creative, and make it look good and colorful.
</svg>

</format>

Here is the content of the paper:
<content>
{article_content}
</content>

Now please give me the SVG format of the flow chart, you should only give me the SVG format directly, do not output backticks for formatting, no other text.
"""

prompt = system_prompt.format(article_content=test_article)

In [19]:
response = model_response(prompt, 'claude35', max_tokens=4096)

In [20]:
print(response)

<svg width="100%" viewBox="0 0 1000 800">
    <!-- Background -->
    <rect x="0" y="0" width="1000" height="800" fill="#f8f9fa"/>
    
    <!-- Title -->
    <text x="500" y="50" text-anchor="middle" font-size="24" font-weight="bold" fill="#2c3e50">DeepSeek LLM Workflow</text>
    
    <!-- Main Flow -->
    <g transform="translate(0,0)">
        <!-- Pre-training Phase -->
        <rect x="100" y="100" width="200" height="80" rx="10" fill="#3498db" opacity="0.8"/>
        <text x="200" y="145" text-anchor="middle" fill="white" font-size="16">Pre-training Data</text>
        <text x="200" y="165" text-anchor="middle" fill="white" font-size="12">2 Trillion Tokens</text>
        
        <!-- Architecture -->
        <rect x="400" y="100" width="200" height="80" rx="10" fill="#2ecc71" opacity="0.8"/>
        <text x="500" y="145" text-anchor="middle" fill="white" font-size="16">Model Architecture</text>
        <text x="500" y="165" text-anchor="middle" fill="white" font-size="12">LLaMA

: 

: 