In [5]:
!pip install pyzotero openai

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting openai
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/9c/5b/4be258ff072ed8ee15f6bfd8d5a1a4618aa4704b127c0c5959212ad177d6/openai-2.3.0-py3-none-any.whl (999 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m999.8/999.8 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting distro<2,>=1.7.0 (from openai)
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl (20 kB)
Collecting jiter<1,>=0.10.0 (from openai)
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/de/91/25e38fbbfc17111d7b70b24290a41d611cc2a27fa6cd0ed84ddae38ec3e6/jiter-0.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (350 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m350.3/350.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pydantic<3,>=1

In [1]:
from pyzotero import zotero
import json

# --- 1. 连接 Zotero ---
library_id = '13400884'
library_type = 'user'
api_key = 'k2eCXt7ltwQQW7WNnrSiWDYU'

zot = zotero.Zotero(library_id, library_type, api_key)

# --- 2. 指定 collection ---
collection_key = 'XPY9MWBP'
items = zot.collection_items(collection_key, limit=None)

data_list = []
seen_keys = set()  # 保存已经抓过的条目 key

for item in items:
    d = item['data']
    
    # 如果是 attachment，先找到 parentItem
    if d['itemType'] == 'attachment' and 'parentItem' in d:
        parent_key = d['parentItem']
        if parent_key in seen_keys:  # 已抓过则跳过
            continue
        try:
            parent_item = zot.item(parent_key)
            d = parent_item['data']
            seen_keys.add(parent_key)
        except Exception as e:
            print(f"⚠️ 无法获取 parentItem {parent_key}: {e}")
            continue
    else:
        key = d['key']
        if key in seen_keys:  # 已抓过则跳过
            continue
        seen_keys.add(key)
    
    # 只处理主要文献类型
    if d['itemType'] in ['journalArticle', 'preprint', 'book', 'conferencePaper']:
        title = d.get('title', '').strip()
        abstract = d.get('abstractNote', '').strip()
        if title or abstract:
            data_list.append({
                'title': title,
                'abstract': abstract
            })

# --- 保存为 JSONL ---
with open('zotero_collection.jsonl', 'w', encoding='utf-8') as f:
    for entry in data_list:
        f.write(json.dumps(entry, ensure_ascii=False) + '\n')

print(f'✅ 已保存 {len(data_list)} 篇文献摘要到 zotero_collection.jsonl')

✅ 已保存 13 篇文献摘要到 zotero_collection.jsonl


In [11]:
import json
from openai import OpenAI
import re

import os
os.environ["DEEPSEEK_API_KEY"] = "sk-b6118335f5c34520abffbe6fa324257a" #my key

# --- 设置 DeepSeek API Key ---
api_key = os.environ.get("DEEPSEEK_API_KEY")
if not api_key:
    raise ValueError("Please set the environment variable DEEPSEEK_API_KEY")

client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")

# --- 输入 JSONL 文件 ---
input_file = "zotero_collection.jsonl"
output_file = "train.jsonl"

def clean_gpt_output(gpt_output: str) -> str:
    """
    清理 GPT 输出，去掉 ``` 或者非 JSON 的内容
    """
    gpt_output = gpt_output.strip()
    # 移除代码块
    gpt_output = re.sub(r"```.*?```", "", gpt_output, flags=re.DOTALL)
    # 只保留 [] 中的内容
    match = re.search(r"\[.*\]", gpt_output, flags=re.DOTALL)
    if match:
        gpt_output = match.group(0)
    return gpt_output

# --- 处理每条记录 ---
with open(input_file, "r", encoding="utf-8") as f_in, open(output_file, "w", encoding="utf-8") as f_out:
    for line in f_in:
        record = json.loads(line)
        title = record.get("title", "").strip()
        abstract = record.get("abstract", "").strip()

        if not title or not abstract:
            print(f"Skipping empty record: {record}")
            continue

        # --- GPT Prompt ---
        prompt = f"""
        You are an expert research assistant specialized in computer vision and deep learning.
        Based on the following paper title and abstract, generate 10 technical and research-oriented question-answer pairs in English.
        - Do NOT ask trivial questions (e.g., "What is the name of the model?").
        - Each question should probe the methodology, contributions, experiments, or theoretical insights.
        - Cover different aspects: architecture, modules, attention mechanisms, efficiency, ablation studies, limitations.
        - Return ONLY a JSON array of objects with exactly two fields: "question" and "answer".

        Example output:
        [
        {{"question": "How does the proposed hybrid architecture combine Transformer and Mamba layers?", "answer": "It cross-cycles blocks from both layers to leverage contextual learning and computational efficiency."}},
        {{"question": "What is the function of the IRSS module?", "answer": "It traverses four scan paths to process long image sequences efficiently."}}
        ]

        Title: {title}
        Abstract: {abstract}
        """

        # --- 调用 DeepSeek GPT ---
        response = client.chat.completions.create(
            model="deepseek-chat",
            messages=[
                {"role": "system", "content": "You are a helpful scientific assistant."},
                {"role": "user", "content": prompt},
            ],
            stream=False
        )

        gpt_output = response.choices[0].message.content
        gpt_output = clean_gpt_output(gpt_output)
        print(gpt_output)
        # --- 解析 JSON ---
        try:
            qa_pairs = json.loads(gpt_output)
            if not isinstance(qa_pairs, list):
                raise ValueError("Parsed JSON is not a list")
        except (json.JSONDecodeError, ValueError) as e:
            print(f"Warning: GPT output is not valid JSON for paper '{title}'. Skipping.\nError: {e}\nOutput: {gpt_output}")
            continue

        # --- 遍历 qa_pairs 列表，为每个 Q&A 生成一条训练记录 ---
        for pair in qa_pairs:
            # 假设每个 pair 是一个包含 "question" 和 "answer" 的字典
            if isinstance(pair, dict) and "question" in pair and "answer" in pair:
                output_record = {
                    "instruction": "you are an expert in economics", # 这是一个通用的指令
                    "input": pair["question"],
                    "output": pair["answer"]
                }
                f_out.write(json.dumps(output_record, ensure_ascii=False) + "\n")

        print(f"Processed: {title}")


[
  {"question": "How does the cross-cycling of Transformer and Mamba blocks in MatIR overcome the individual limitations of each architecture?", "answer": "The cross-cycling mechanism alternates between Transformer blocks for superior contextual feature learning and Mamba blocks for efficient long-range dependency modeling, creating a synergistic architecture that compensates for Mamba's contextual learning deficiencies while maintaining computational efficiency."},
  {"question": "What is the functional principle behind the Image Inpainting State Space (IRSS) module in handling long image sequences?", "answer": "The IRSS module implements four distinct scan paths (likely horizontal, vertical, and diagonal directions) to systematically traverse image data, enabling efficient state space modeling of long-range dependencies while maintaining linear computational complexity relative to sequence length."},
  {"question": "How does the hybrid attention mechanism in the Transformer module b

In [12]:
! git clone https://github.com/hiyouga/LLaMA-Factory.git

正克隆到 'LLaMA-Factory'...
fatal: 无法访问 'https://github.com/hiyouga/LLaMA-Factory.git/'：GnuTLS recv error (-110): The TLS connection was non-properly terminated.


In [17]:
import os
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"

In [21]:
! huggingface-cli download deepseek-ai/deepseek-llm-7b-chat --local-dir ./models

Fetching 9 files:   0%|                                   | 0/9 [00:00<?, ?it/s]Downloading 'pytorch_model-00002-of-00002.bin' to 'models/.cache/huggingface/download/HnkwBfZ0kY-ttHuN02vuxl1p6V0=.e6cc31dd99c92be73064a38a661fae821dd5f437bac16202c014420c25a5cffd.incomplete'
Downloading 'pytorch_model-00001-of-00002.bin' to 'models/.cache/huggingface/download/fPHULxv55kAe7RSfHmmL42LIc1I=.7c8e56ddd37b2c2df2ac23cfcf57b7924de43e531f66238725f58215dc9f03d3.incomplete'
Downloading 'README.md' to 'models/.cache/huggingface/download/Xn7B-BWUGOee2Y6hCZtEhtFu4BE=.9a238c25fdd40b5a4f174038873bfda3de863c2b.incomplete'
Downloading 'generation_config.json' to 'models/.cache/huggingface/download/3EVKVggOldJcKSsGjSdoUCN1AyQ=.85212c6f6424d4215a3d0b66b2b486e5cb177b53.incomplete'

README.md: 3.23kB [00:00, 21.0MB/s]
Download complete. Moving file to models/README.md

generation_config.json: 181B [00:00, 2.26MB/s]
Download complete. Moving file to models/generation_config.json
Downloading 'tokenizer.json' to '

In [20]:
!pip install huggingface_hub -i https://pypi.tuna.tsinghua.edu.cn/simple

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting huggingface_hub
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/31/a0/651f93d154cb72323358bf2bbae3e642bdb5d2f1bfc874d096f7cb159fa0/huggingface_hub-0.35.3-py3-none-any.whl (564 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m564.3/564.3 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting filelock (from huggingface_hub)
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl (15 kB)
Collecting fsspec>=2023.5.0 (from huggingface_hub)
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.3/199.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m00:01[0m
Collecting pyyaml>=5.1 (from huggingfac

In [3]:
!llamafactory-cli train /home/byh/gpt/LLaMA-Factory/examples/train_lora/deep_seek_r1.yaml

[INFO|2025-10-14 21:45:27] llamafactory.hparams.parser:423 >> Process rank: 0, world size: 1, device: cuda:0, distributed training: False, compute dtype: torch.bfloat16
[INFO|tokenization_utils_base.py:2066] 2025-10-14 21:45:27,648 >> loading file tokenizer.model
[INFO|tokenization_utils_base.py:2066] 2025-10-14 21:45:27,648 >> loading file tokenizer.json
[INFO|tokenization_utils_base.py:2066] 2025-10-14 21:45:27,648 >> loading file added_tokens.json
[INFO|tokenization_utils_base.py:2066] 2025-10-14 21:45:27,648 >> loading file special_tokens_map.json
[INFO|tokenization_utils_base.py:2066] 2025-10-14 21:45:27,648 >> loading file tokenizer_config.json
[INFO|tokenization_utils_base.py:2066] 2025-10-14 21:45:27,648 >> loading file chat_template.jinja
[INFO|tokenization_utils_base.py:2337] 2025-10-14 21:45:27,782 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
[INFO|configuration_utils.py:763] 2025-10-14 21:45:27,785 

In [None]:
from peft import PeftModel
model = PeftModel.from_pretrained(model, "./output/lora_weights")
model = model.merge_and_unload()  # 合并到原始模型


In [5]:
!python /home/byh/gpt/LLaMA-Factory/src/api.py \
    --model_name_or_path /home/byh/gpt/models/DeepSeek-R1-Distill-Qwen-7B \
    --checkpoint_dir /home/byh/gpt/models/sft/DeepSeek-R1-Distill-Qwen-7B

usage: api.py [-h] [--model_name_or_path MODEL_NAME_OR_PATH]
              [--adapter_name_or_path ADAPTER_NAME_OR_PATH]
              [--adapter_folder ADAPTER_FOLDER] [--cache_dir CACHE_DIR]
              [--use_fast_tokenizer [USE_FAST_TOKENIZER]]
              [--no_use_fast_tokenizer] [--resize_vocab [RESIZE_VOCAB]]
              [--split_special_tokens [SPLIT_SPECIAL_TOKENS]]
              [--add_tokens ADD_TOKENS]
              [--add_special_tokens ADD_SPECIAL_TOKENS]
              [--new_special_tokens_config NEW_SPECIAL_TOKENS_CONFIG]
              [--init_special_tokens {noise_init,desc_init,desc_init_w_noise}]
              [--model_revision MODEL_REVISION]
              [--low_cpu_mem_usage [LOW_CPU_MEM_USAGE]]
              [--no_low_cpu_mem_usage]
              [--rope_scaling {linear,dynamic,yarn,llama3}]
              [--flash_attn {auto,disabled,sdpa,fa2}]
              [--shift_attn [SHIFT_ATTN]] [--mixture_of_depths {convert,load}]
              [--use_unsloth [USE_

In [6]:
# run vlm
!python3 -m vllm.entrypoints.openai.api_server --model /home/byh/gpt/models/DeepSeek-R1-Distill-Qwen-7B --host 0.0.0.0 --port 8000 --served-model-name qwen3 --block-size 16

INFO 10-19 11:39:23 [__init__.py:216] Automatically detected platform cuda.
[1;36m(APIServer pid=2489522)[0;0m INFO 10-19 11:39:24 [api_server.py:1839] vLLM API server version 0.11.0
[1;36m(APIServer pid=2489522)[0;0m INFO 10-19 11:39:24 [utils.py:233] non-default args: {'host': '0.0.0.0', 'model': '/home/byh/gpt/models/DeepSeek-R1-Distill-Qwen-7B', 'served_model_name': ['qwen3'], 'block_size': 16}
[1;36m(APIServer pid=2489522)[0;0m INFO 10-19 11:39:24 [model.py:547] Resolved architecture: LlamaForCausalLM
[1;36m(APIServer pid=2489522)[0;0m `torch_dtype` is deprecated! Use `dtype` instead!
[1;36m(APIServer pid=2489522)[0;0m INFO 10-19 11:39:24 [model.py:1510] Using max model len 4096
[1;36m(APIServer pid=2489522)[0;0m INFO 10-19 11:39:26 [scheduler.py:205] Chunked prefill is enabled with max_num_batched_tokens=2048.
INFO 10-19 11:39:27 [__init__.py:216] Automatically detected platform cuda.
[1;36m(EngineCore_DP0 pid=2489672)[0;0m INFO 10-19 11:39:29 [core.py:644] Waiting 

In [5]:
import json
from openai import OpenAI
import re

import os
os.environ["DEEPSEEK_API_KEY"] = "sk-b6118335f5c34520abffbe6fa324257a" #my key

# --- 设置 DeepSeek API Key ---
api_key = os.environ.get("DEEPSEEK_API_KEY")
if not api_key:
    raise ValueError("Please set the environment variable DEEPSEEK_API_KEY")

client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
        # --- 调用 DeepSeek GPT ---
response = client.chat.completions.create(
    model="deepseek-chat",
    messages=[
        {"role": "system", "content": "我使用vllm显存超出了!python3 -m vllm.entrypoints.openai.api_server --model /home/byh/gpt/models/DeepSeek-R1-Distill-Qwen-7B --host 0.0.0.0 --port 8000 --served-model-name qwen3 "}
    ],
    stream=False
)

gpt_output = response.choices[0].message.content
gpt_output


'您遇到的是显存不足的问题。有几种方法可以解决：\n\n## 方法1：调整推理参数（推荐）\n\n```bash\npython3 -m vllm.entrypoints.openai.api_server \\\n    --model /home/byh/gpt/models/DeepSeek-R1-Distill-Qwen-7B \\\n    --host 0.0.0.0 \\\n    --port 8000 \\\n    --served-model-name qwen3 \\\n    --max-model-len 4096 \\\n    --gpu-memory-utilization 0.8 \\\n    --swap-space 4\n```\n\n关键参数说明：\n- `--max-model-len 4096`：限制最大序列长度\n- `--gpu-memory-utilization 0.8`：限制GPU内存使用率\n- `--swap-space 4`：设置交换空间（GB）\n\n## 方法2：使用量化（如果显存严重不足）\n\n```bash\npython3 -m vllm.entrypoints.openai.api_server \\\n    --model /home/byh/gpt/models/DeepSeek-R1-Distill-Qwen-7B \\\n    --host 0.0.0.0 \\\n    --port 8000 \\\n    --served-model-name qwen3 \\\n    --quantization awq \\\n    --max-model-len 2048\n```\n\n## 方法3：检查并优化系统\n\n1. **检查可用显存**：\n```bash\nnvidia-smi\n```\n\n2. **如果显存确实很小**，考虑使用更小的模型或CPU推理：\n```bash\npython3 -m vllm.entrypoints.openai.api_server \\\n    --model /home/byh/gpt/models/DeepSeek-R1-Distill-Qwen-7B \\\n    --host 0.0.0.0 \\\n    --

In [None]:
import numpy as np

In [2]:
# run vlm
!python3 -m vllm.entrypoints.openai.api_server --model /home/byh/gpt/models/DeepSeek-R1-Distill-Qwen-7B --host 0.0.0.0 --port 8000 --served-model-name qwen3  --max-num-seqs 64

INFO 10-19 13:22:56 [__init__.py:216] Automatically detected platform cuda.
[1;36m(APIServer pid=2679432)[0;0m INFO 10-19 13:22:57 [api_server.py:1839] vLLM API server version 0.11.0
[1;36m(APIServer pid=2679432)[0;0m INFO 10-19 13:22:57 [utils.py:233] non-default args: {'host': '0.0.0.0', 'model': '/home/byh/gpt/models/DeepSeek-R1-Distill-Qwen-7B', 'served_model_name': ['qwen3'], 'max_num_seqs': 64}
[1;36m(APIServer pid=2679432)[0;0m INFO 10-19 13:22:57 [model.py:547] Resolved architecture: LlamaForCausalLM
[1;36m(APIServer pid=2679432)[0;0m `torch_dtype` is deprecated! Use `dtype` instead!
[1;36m(APIServer pid=2679432)[0;0m INFO 10-19 13:22:57 [model.py:1510] Using max model len 4096
[1;36m(APIServer pid=2679432)[0;0m INFO 10-19 13:22:58 [scheduler.py:205] Chunked prefill is enabled with max_num_batched_tokens=2048.
INFO 10-19 13:23:00 [__init__.py:216] Automatically detected platform cuda.
[1;36m(EngineCore_DP0 pid=2679547)[0;0m INFO 10-19 13:23:01 [core.py:644] Waitin