# 我想做一个小程序来帮我阅读论文
# 我的个人情况
# 1.我是一个物理专业的博士生
# 2.我有阅读论文的习惯
# 我的个人需求
# 1.程序可以接入我们学校的VPN（类似于easy connnect）从而具备期刊论文的访问权限
# 2.程序可以访问Deepseek，我个人提供账号和密码，从而完成和论文的沟通, 问答
# 3.最好能装载到chatbox上面,或者有类似的可视化对话平台
# 4.我个人对Python比较熟练,所以希望程序的功能能用python实现但是如果c++是必要的,我也可以,我懂一点c++
# 5.如果论文有开源的代码请注明代码链接
# 程序应该具有的功能(流程式)
# 1.输入自己学校的VPN账号密码
# 2.输入自己的DeepSeek账号密码
# 3.选择需要浏览的期刊,我一般就喜欢浏览这些期刊: PANS,Nature,Science,PRL,JCP,JFM,PRF,这些期刊我的学校都有访问权限，以我写的这些简写进行输入
# 4.选择模式?初步尝试这三种模式:a.新文推荐(列出来以n天为周期的最新文章，列出文章的名称，作者，摘要，是否细读，下载？),这里细读是将整篇文章给deepseek,返回deepseek的回复,下载是下载到具体的本地位置
# b.关键词搜索(这个功能期刊主页就有，我觉得我们借用就好，列出文章的名称，作者，摘要，是否细读，下载？)
# c.好文推荐: 列出来被编辑建议,或者高被引(这个我可以设置：比如时间范围和引用范围，就算高被引文章),列出文章的名称，作者，摘要，是否细读，下载？
# 5.我觉得细读功能可以直接打开于deep seek的聊天框?我没有想好

#具体的想法就这些,我的本意是涉及一个工具来帮助我省力读论文的工具,所以我希望这个程序尽量快,实现上最好能简便.当然如果我还有什么是我没有想到,或者想的不够充分的,也请及时和我沟通。


In [None]:
import os
import httpx
import subprocess
from playwright.sync_api import sync_playwright
import gradio as gr

In [41]:
# 配置信息（建议后续改为环境变量）
VPN_SERVER = "vpn.zju.edu.cn"
PRL_URL = "https://www.sciencedirect.com/journal/journal-of-computational-physics"
DEEPSEEK_API = "https://api.deepseek.com/v1/chat/completions"

In [42]:
class VPNConnector:
    def connect(self, username, password):
        """使用openconnect连接VPN（修复sudo密码问题）"""
        cmd = f"sudo -S openconnect {VPN_SERVER} -u {username} --passwd-on-stdin"
        try:
            process = subprocess.Popen(
                cmd.split(),
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True
            )
            # 同时发送sudo密码和VPN密码
            output, error = process.communicate(f"{password}\n{password}\n")
            if process.returncode != 0:
                raise gr.Error(f"VPN连接失败: {error}")
            return True
        except Exception as e:
            raise gr.Error(f"VPN错误: {str(e)}")

In [43]:
class PRLCrawler:
    def get_recent_papers(self):
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            page = browser.new_page()
            
            try:
                page.goto(PRL_URL, wait_until="networkidle", timeout=60000)
                
                # 等待内容加载
                page.wait_for_selector("article")
                
                articles = page.query_selector_all("article")
                results = []
                
                for article in articles[:5]:
                    title_elem = article.query_selector("h3.title")
                    title = title_elem.inner_text() if title_elem else "无标题"
                    
                    authors_elem = article.query_selector("div.author-group")
                    authors = authors_elem.inner_text() if authors_elem else "无作者信息"
                    
                    abstract_elem = article.query_selector("div.abstract")
                    abstract = abstract_elem.inner_text() if abstract_elem else "无摘要"
                    
                    pdf_link_elem = article.query_selector("a.pdf-download")
                    pdf_link = pdf_link_elem.get_attribute("href") if pdf_link_elem else ""
                    
                    results.append({
                        "title": title.strip(),
                        "authors": authors.replace("\n", " ").strip(),
                        "abstract": abstract.strip(),
                        "pdf": pdf_link
                    })
                return results
            except Exception as e:
                print("爬取错误:", str(e))
                return []
            finally:
                browser.close()

In [44]:
class DeepSeekHelper:
    def analyze_paper(self, api_key, text):
        headers = {"Authorization": f"Bearer {api_key}"}
        response = httpx.post(
            DEEPSEEK_API,
            json={
                "model": "deepseek-chat",
                "messages": [{
                    "role": "user",
                    "content": f"请用中文总结这篇物理论文的核心贡献和技术细节：\n{text[:3000]}"  # 限长处理
                }]
            },
            headers=headers
        )
        return response.json()["choices"][0]["message"]["content"]


In [45]:
# Gradio界面
with gr.Blocks() as demo:
    gr.Markdown("## 论文速读DEMO (PRL)")
    
    with gr.Row():
        vpn_user = gr.Textbox(label="VPN账号")
        vpn_pass = gr.Textbox(label="VPN密码", type="password")
        ds_key = gr.Textbox(label="DeepSeek API Key")
    
    fetch_btn = gr.Button("获取最新论文")
    
    paper_store = gr.State([])  # 存储论文数据
    
    with gr.Row():
        paper_selector = gr.Dropdown([], label="选择论文", allow_custom_value=False) 
        analyze_btn = gr.Button("AI解读")
    
    output = gr.Markdown()

    def fetch_papers(vpn_user, vpn_pass):
        # 连接VPN
        vpn = VPNConnector()
        if not vpn.connect(vpn_user, vpn_pass):
            return [], [], "VPN连接失败"
        
        # 获取论文列表
        crawler = PRLCrawler()
        papers = crawler.get_recent_papers()
        
        # 格式化下拉选项
        options = [f"{p['title']} - {p['authors']}" for p in papers]
        return options, papers, "获取到{}篇论文".format(len(papers))

    def analyze_paper(selected, papers, ds_key):
        helper = DeepSeekHelper()
        selected_paper = next(p for p in papers if f"{p['title']} - {p['authors']}" == selected)
        return helper.analyze_paper(ds_key, selected_paper["abstract"])

    fetch_btn.click(
        fn=fetch_papers,
        inputs=[vpn_user, vpn_pass],
        outputs=[paper_selector, paper_store, output]
    )
    
    analyze_btn.click(
        fn=analyze_paper,
        inputs=[paper_selector, paper_store, ds_key],
        outputs=output
    )

In [46]:
if __name__ == "__main__":
    demo.launch()

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


Traceback (most recent call last):
  File "/home/wangz/anaconda3/lib/python3.12/site-packages/anyio/_backends/_asyncio.py", line 851, in run
    result = context.run(func, *args)
             ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/wangz/anaconda3/lib/python3.12/site-packages/gradio/utils.py", line 890, in wrapper
    response = f(*args, **kwargs)
               ^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_3188/3782263840.py", line 36, in analyze_paper
    selected_paper = next(p for p in papers if f"{p['title']} - {p['authors']}" == selected)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
StopIteration

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/wangz/anaconda3/lib/python3.12/site-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/wangz/anaconda3/l