## Read API Key
/root/autodl-tmp/llm-env/appsetting.txt

In [None]:
def read_api_key_from_file(path: str = "/root/autodl-tmp/llm-env/appsetting.txt") -> str:
    """
    从指定文件读取API密钥，返回字符串（自动去除首尾空白）。
    """
    with open(path, "r") as f:
        key = f.read().strip()
    return key

# 用法示例
api_key = read_api_key_from_file()
print(f"API Key: {api_key}")

### Chunk To Txt

In [None]:
from typing import List

def split_into_chunks(doc : str) -> List[str] :
    result = []
    with open(doc,'r') as file :
        content = file.read()
    # Split the content into chunks based on the separator
    chunks = content.split('\n\n_______________________________________________________________________________\n\n')
    # Remove any leading or trailing whitespace from each chunk
    for i in range(len(chunks)) :
        item = chunks[i].strip()
        if(itme.cotains('The Full Property Name Is :')) : 
            item = item.replace('The Full Property Name Is :','')
            result.append(itme.strip())
    return result

chunks = split_into_chunks('full_api.txt')

print(len(chunks))

for i , chunk in enumerate(chunks) :
    if i<= 50 :
        print(f'[{i}] {chunk} \n')


### 代码分段

In [14]:
# 对code进行编码处理
import re
from typing import List


# code_embeddings_model = SentenceTransformer( "Qwen/Qwen3-Embedding-0.6B")

def get_code_content(path  : str) -> List[str]:
    with open(path, 'r') as file:
        content = file.read()
    return re.split(r"=== CODE_BLOCK_\d{4} ===", content)


code_chunks = get_code_content('extra_data/all_codes.txt')

print(f"Total code chunks: {len(code_chunks)}")

Total code chunks: 88


In [6]:
import torch

print(torch.__version__)

print(torch.cuda.is_available())
    

2.7.0+cu128
True


### Chunk To SqLite

In [None]:
from sqlite3 import connect

db_path  = '/root/autodl-tmp/revitdocs/Output/revit_api_collection/revit_api.db'
conn = connect(db_path)
cursor = conn.cursor()

cursor.execute("SELECT title FROM api_info")
rows = cursor.fetchall()

chunks = [row[0] for row in rows]

conn.close()

print(f"Total titles: {len(chunks)}")

for i, title in enumerate(chunks):
    print(f"[{i}] {title}")

### Embedding Fileds

In [5]:
from typing import List
import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
os.environ['PYTORCH_CUDA_ALLOC_CONF']='expandable_segments:True'
from sentence_transformers import SentenceTransformer

import torch
torch.cuda.empty_cache()
torch.cuda.set_per_process_memory_fraction(0.8)

embedding_model = SentenceTransformer(
     "Qwen/Qwen3-Embedding-0.6B"
 )

def embed_chunk(chunk : str) -> List[float] :
    embedding = embedding_model.encode_query(chunk , convert_to_tensor=True)
    # embedding = embedding[:768] # 一维向量截取 [: ,:768]二维向量截取
    embedding = embedding.cpu()
    # embedding = F.normalize(embedding, p=2, dim=1)  # Normalize the embedding to unit length
      # Ensure the model is used for query encoding
    return embedding.tolist()

test_embedding = embed_chunk("The capital of China is Beijing.")
print(len(test_embedding))
print(test_embedding)

1024
[-0.04706183448433876, -0.03642774373292923, -0.00014000797818880528, -0.014083481393754482, -0.04292481392621994, -0.02553073689341545, 0.03137432038784027, 0.053047120571136475, -0.0626080259680748, 0.06606028228998184, 0.0659131184220314, 0.029106969013810158, 0.07445437461137772, -0.001165645895525813, -0.024886492639780045, 0.02081368863582611, -0.03135136142373085, 0.0905204489827156, -0.009008750319480896, -0.020332610234618187, 0.05675996094942093, 0.018259713426232338, 0.07115014642477036, -0.050004683434963226, 0.03989148885011673, 0.03393232822418213, -0.08764395862817764, 0.011991586536169052, 0.0005942885763943195, 0.009335730224847794, 0.08951549977064133, 0.014075205661356449, -0.010129385627806187, 0.03049849160015583, 0.04567449167370796, -0.003890200750902295, 0.07049321383237839, -0.005185698624700308, 0.015178905799984932, -0.006498791743069887, -0.01790591888129711, 0.005174529738724232, -0.05250843986868858, 0.02945772185921669, 0.004505584482103586, 0.021848

#### 错误信息
OutOfMemoryError: CUDA out of memory. Tried to allocate 1.45 GiB. GPU 0 has a total capacity of 31.37 GiB of which 713.81 MiB is free. Including non-PyTorch memory, this process has 30.66 GiB memory in use. Of the allocated memory 30.18 GiB is allocated by PyTorch, and 514.00 KiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

cuda版本与实际版本不一致
`Nvidia-smi` 本机为： 12.8 
conda list : 显示为 12.6
将conda版本对齐到12.8

### 1. 安全方式运行时间两小时

In [None]:
import torch
import gc
from tqdm import tqdm

def process_embeddings_safely(chunks, batch_size=8):
    torch.cuda.empty_cache()
    torch.cuda.set_per_process_memory_fraction(0.7)  # 更保守的内存分配
    
    embeddings = []
    chunks_fake = []
    chunks_failed = []
    total_processed = 0
    
    # 初始批次大小，会根据内存情况动态调整
    current_batch_size = batch_size
    
    with tqdm(total=len(chunks)) as pbar:
        while total_processed < len(chunks):
            # 动态调整批次大小
            try:
                # 获取当前GPU内存使用情况
                mem_used = torch.cuda.memory_allocated() / (1024 ** 3)
                mem_free = torch.cuda.memory_reserved() / (1024 ** 3) - mem_used
                
                # 根据可用内存动态调整批次大小
                if mem_free < 1.0:  # 可用内存小于1GB时减小批次
                    current_batch_size = max(1, current_batch_size // 2)
                elif mem_free > 4.0 and current_batch_size < batch_size * 2:  # 可用内存充足时增大批次
                    current_batch_size = min(batch_size * 2, current_batch_size * 2)
            except:
                pass  # 如果获取内存失败则使用默认批次
            
            # 计算当前批次的起止索引
            end_idx = min(total_processed + current_batch_size, len(chunks))
            batch = chunks[total_processed:end_idx]
            
            try:
                # 清理内存
                torch.cuda.empty_cache()
                gc.collect()
                
                # 生成嵌入
                batch_embeddings = []
                for chunk in batch:
                    embedding = embed_chunk(chunk)
                    batch_embeddings.append(embedding)
                    # 及时转移到CPU并释放GPU内存
                    if isinstance(embedding, torch.Tensor):
                        embedding = embedding.cpu()
                
                # 添加到结果列表
                chunks_fake.extend(batch)
                embeddings.extend(batch_embeddings)
                total_processed = end_idx
                pbar.update(len(batch))
                
            except RuntimeError as e:
                if 'out of memory' in str(e).lower():
                    # 内存不足错误，减小批次大小并重试
                    current_batch_size = max(1, current_batch_size // 2)
                    print(f"OOM错误，调整批次大小为: {current_batch_size}")
                    if current_batch_size == 1:
                        # 单个处理仍失败，记录并跳过
                        chunks_failed.extend(batch)
                        total_processed = end_idx
                        pbar.update(len(batch))
                else:
                    # 其他错误
                    print(f"处理批次时出错: {e}")
                    chunks_failed.extend(batch)
                    total_processed = end_idx
                    pbar.update(len(batch))
            except Exception as e:
                print(f"处理批次时出错: {e}")
                chunks_failed.extend(batch)
                total_processed = end_idx
                pbar.update(len(batch))
            
            # 最终清理
            torch.cuda.empty_cache()
            gc.collect()
    
    print(f"总嵌入数量: {len(embeddings)}")
    print(f"失败数量: {len(chunks_failed)}")
    if embeddings:
        print(f"第一个嵌入形状: {embeddings[0].shape if hasattr(embeddings[0], 'shape') else '未知'}")
    return embeddings, chunks_fake, chunks_failed


process_embeddings_safely(chunks, batch_size=8)

### 2. 加速函数 2 

In [None]:
import torch
import gc
import time
from tqdm import tqdm

def optimized_embedding_processing(chunks,  initial_batch_size=20, max_retries=3):
    # 内存优化配置
    torch.cuda.empty_cache()
    torch.cuda.set_per_process_memory_fraction(0.85)  # 提高内存利用率
    
    # 性能监控变量
    total_start_time = time.time()
    batch_times = []
    
    # 结果存储
    embeddings = []
    chunks_fake = []
    chunks_failed = []
    total_processed = 0
    
    # 智能批次控制
    current_batch_size = initial_batch_size
    best_batch_size = initial_batch_size  # 记录最佳批次大小
    consecutive_success = 0
    
    # 预热阶段：快速找到最佳批次大小
    warmup_batch_sizes = [initial_batch_size, initial_batch_size*2, initial_batch_size//2]
    warmup_results = []
    
    print("=== 启动批次大小预热 ===")
    for test_size in warmup_batch_sizes:
        if test_size < 1: continue
        try:
            start_time = time.time()
            batch = chunks[:test_size]
            batch_embeddings = [embed_chunk(chunk) for chunk in batch]
            duration = time.time() - start_time
            warmup_results.append((test_size, duration, True))
            print(f"预热批次 {test_size}: 成功, 耗时 {duration:.2f}s")
            del batch_embeddings
            torch.cuda.empty_cache()
        except Exception as e:
            print(f"预热批次 {test_size}: 失败 - {str(e)[:50]}")
            warmup_results.append((test_size, 0, False))
    
    # 选择最佳预热批次大小
    if warmup_results:
        # 过滤成功的批次并按效率排序 (元素/秒)
        valid_results = [(s, s/d) for s, d, success in warmup_results if success and d > 0]
        if valid_results:
            best_warmup = max(valid_results, key=lambda x: x[1])
            best_batch_size = best_warmup[0]
            current_batch_size = best_batch_size
            print(f"预热完成，最佳初始批次大小: {best_batch_size}")
    
    # 主处理循环
    with tqdm(total=len(chunks), desc="处理进度") as pbar:
        while total_processed < len(chunks):
            batch_start = time.time()
            batch_success = False
            retry_count = 0
            
            while retry_count < max_retries and not batch_success:
                try:
                    # 计算批次范围
                    end_idx = min(total_processed + current_batch_size, len(chunks))
                    actual_batch_size = end_idx - total_processed
                    if actual_batch_size <= 0:
                        break
                    
                    # 只在内存使用超过阈值时清理
                    mem_used = torch.cuda.memory_allocated() / (1024**3)
                    if mem_used > 0.7 * torch.cuda.get_device_properties(0).total_memory / (1024**3):
                        torch.cuda.empty_cache()
                        gc.collect()
                    
                    # 处理批次
                    batch = chunks[total_processed:end_idx]
                    batch_embeddings = []
                    for chunk in batch:
                        # 使用混合精度加速计算（如果支持）
                        with torch.amp.autocast('cuda',enabled=True):
                            embedding = embed_chunk(chunk)
                        batch_embeddings.append(embedding.cpu() if isinstance(embedding, torch.Tensor) else embedding)
                    
                    # 更新结果
                    chunks_fake.extend(batch)
                    embeddings.extend(batch_embeddings)
                    total_processed = end_idx
                    pbar.update(actual_batch_size)
                    
                    # 记录成功批次信息
                    batch_time = time.time() - batch_start
                    batch_times.append(batch_time)
                    consecutive_success += 1
                    batch_success = True
                    
                    # 动态增大批次大小（连续成功3次）
                    if consecutive_success >= 3 and current_batch_size < best_batch_size * 2:
                        new_batch_size = int(current_batch_size * 1.2)
                        # 检查新批次大小是否超过安全阈值
                        if new_batch_size <= len(chunks) - total_processed:
                            current_batch_size = new_batch_size
                            consecutive_success = 0  # 重置计数器
                            print(f"动态增大批次大小至: {current_batch_size}")
                    
                except RuntimeError as e:
                    if 'out of memory' in str(e).lower():
                        retry_count += 1
                        # 指数退避策略减小批次大小
                        new_batch_size = max(1, int(current_batch_size * 0.7))
                        if new_batch_size < current_batch_size:
                            print(f"内存不足，批次大小从 {current_batch_size} 调整为 {new_batch_size} (重试 {retry_count}/{max_retries})")
                            current_batch_size = new_batch_size
                            torch.cuda.empty_cache()
                            gc.collect()
                        else:
                            # 无法再减小批次，记录失败
                            chunks_failed.extend(batch)
                            total_processed = end_idx
                            pbar.update(actual_batch_size)
                            break
                    else:
                        # 其他错误
                        print(f"处理错误: {str(e)}")
                        chunks_failed.extend(batch)
                        total_processed = end_idx
                        pbar.update(actual_batch_size)
                        break
                except Exception as e:
                    print(f"处理错误: {str(e)}")
                    chunks_failed.extend(batch)
                    total_processed = end_idx
                    pbar.update(actual_batch_size)
                    break
    
    # 性能统计
    total_time = time.time() - total_start_time
    avg_batch_time = sum(batch_times)/len(batch_times) if batch_times else 0
    
    print("\n=== 处理统计 ===")
    print(f"总处理时间: {total_time:.2f}s ({total_time/60:.1f}分钟)")
    print(f"平均批次处理时间: {avg_batch_time:.2f}s")
    print(f"成功处理: {len(embeddings)} 条")
    print(f"失败处理: {len(chunks_failed)} 条")
    print(f"最佳批次大小: {best_batch_size}")
    
    return embeddings, chunks_fake, chunks_failed

embeddings_apis , chunks_fake_apis ,chunks_failed_apis =  optimized_embedding_processing(chunks, initial_batch_size=20)

 第一次运行结果
 
  0%|          | 0/445 [00:00<?, ?it/s]
 31%|███       | 138/445 [12:15<28:00,  5.47s/it]
Error at batch 6850 : CUDA out of memory. Tried to allocate 2.36 GiB. GPU 0 has a total capacity of 31.37 GiB of which 5.76 GiB is free. Including non-PyTorch memory, this process has 25.60 GiB memory in use. 25.09 GiB allowed; Of the allocated memory 22.61 GiB is allocated by PyTorch, and 2.40 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
 73%|███████▎  | 324/445 [28:32<10:36,  5.26s/it]
Error at batch 16150 : CUDA out of memory. Tried to allocate 2.22 GiB. GPU 0 has a total capacity of 31.37 GiB of which 7.03 GiB is free. Including non-PyTorch memory, this process has 24.33 GiB memory in use. 25.09 GiB allowed; Of the allocated memory 21.50 GiB is allocated by PyTorch, and 2.24 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
 73%|███████▎  | 325/445 [28:35<09:33,  4.78s/it]
Error at batch 16200 : CUDA out of memory. Tried to allocate 11.39 GiB. GPU 0 has a total capacity of 31.37 GiB of which 15.39 GiB is free. Including non-PyTorch memory, this process has 15.97 GiB memory in use. 25.09 GiB allowed; Of the allocated memory 15.19 GiB is allocated by PyTorch, and 194.79 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
100%|██████████| 445/445 [38:58<00:00,  5.25s/it]
Total embeddings: 22060

In [None]:
print(len(embeddings_apis))

[print(f'[{i}] {chunk} \n') for i, chunk in enumerate(chunks_fake_apis[:50])]


### 与chromadb进行匹配

In [12]:
import chromadb

chromadb_batch_size = 5000 # max size 5461
chromadb_client = chromadb.PersistentClient("./chromadb0815_api_1.db")
chromadb_collection = chromadb_client.get_or_create_collection(name="revit_api_1")

def save_embeddings(chunks : List[str] , embeddings : List[List[float]]) -> None :
    ids = [str(i) for i in range(len(embeddings))]
    for j in tqdm(range(0, len(chunks), chromadb_batch_size)):
        chunks_sub = chunks[j : j + chromadb_batch_size]
        id_sub = ids[j : j + chromadb_batch_size]
        embeddings_sub = embeddings[j : j + chromadb_batch_size]
        chromadb_collection.add(documents=chunks_sub , embeddings=embeddings_sub , ids=id_sub)
        

save_embeddings(chunks=chunks_fake_apis , embeddings=embeddings_apis)

100%|██████████| 5/5 [00:34<00:00,  6.89s/it]


### Chromadb With Code


In [None]:
embeddings_codes , chunks_fake_codes ,chunks_failed_codes =  optimized_embedding_processing(code_chunks, initial_batch_size=20)

In [18]:
import chromadb

chromadb_batch_size = 5000 # max size 5461
chromadb_client = chromadb.PersistentClient("./chromadb0818_code_1.db")
chromadb_collection = chromadb_client.get_or_create_collection(name="revit_api_1")

def save_embeddings(chunks : List[str] , embeddings : List[List[float]]) -> None :
    ids = [str(i) for i in range(len(embeddings))]
    for j in tqdm(range(0, len(chunks), chromadb_batch_size)):
        chunks_sub = chunks[j : j + chromadb_batch_size]
        id_sub = ids[j : j + chromadb_batch_size]
        embeddings_sub = embeddings[j : j + chromadb_batch_size]
        chromadb_collection.add(documents=chunks_sub , embeddings=embeddings_sub , ids=id_sub)
        

save_embeddings(chunks= code_chunks, embeddings=embeddings_codes)

100%|██████████| 1/1 [00:00<00:00,  2.59it/s]


### LLM Chat To Imporve User Ask

In [None]:
from openai import OpenAI

def query_retrieve(query: str):
    client = OpenAI(api_key=api_key , base_url="https://api.deepseek.com")

    prompt_sys = f"""
    you are a professional bim engineer, you are good at Revit API, you can answer any question about Revit API.
    also you have a good skill in c# and algorithm in graph 2d, you can write code in c# to solve the problem.
    and can translate the user question to english if the user question is not in english.

    you think chain need to by thi step and check it :
    1. Understand the user question and translate it to English if necessary.
    2. Retrieve relevant information from the Revit API database using the provided query.
    3. Generate the keyword will help databse to find the best api reference.
    4. need output just one line answer to the user question

    example:
    User Question:  结构柱着色的命令是什么?
    Step 1: Translate to English: "What is the command for coloring structural columns in Revit?" 
    Step 2: May Be User Need Api Keyword: "structural columns, coloring Override Element Graphics"
    Step 3: Generate keywords: "structural columns, coloring Override Element Graphics , View Filter"
    Step 4: Output the answer in one line.


    output format:

    Keywords: structural columns, coloring Override Element Graphics , View Filter , OverrideGraphicSettings ,  SetElementOverrides

    Remember:
    - Only provide the keywords in the output.
    - Do not include any additional text or explanations.
    - Ensure the keywords are relevant to the user's question and can help in retrieving the best API references.
    - The keywords should be concise and directly related to Revit API functionalities.
    - Avoid using generic terms

    """

    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": f"{prompt_sys}"},  
            
            {"role": "user", "content": f"{query}"}
        ],
        stream = False
    )

    print(f"query: {query}")
    print("Response Query from DeepSeek:")
    print(response.choices[0].message.content)

    return response.choices[0].message.content


if __name__ == "__main__":
    query = "创建结构柱"
    retireve_chunk = query_retrieve(query)
    print(retireve_chunk)

### Test With LLM

In [None]:
# 查找数据

def retireve(query : str , top_k : int) -> List[str] :
    query_embedding = embed_chunk(query)
    results = chromadb_collection.query(query_embeddings=[query_embedding] , n_results=top_k)
    return results['documents'][0]


query = "创建管道"
retireve_query = query_retrieve(query)
retireve_chunk = retireve(retireve_query,10)
for i in range(len(retireve_chunk)) : 
    print(retireve_chunk[i])

API分词结束后,对sdk进行整理,将文件名称,文件夹内部的*.rtf文件作embedding,与api做score,获得相近的项目,将两者一起传递给LLM


### 文档分段

In [11]:
# 对code进行编码处理
import re
from typing import List


# code_embeddings_model = SentenceTransformer( "Qwen/Qwen3-Embedding-0.6B")

def get_docs_content(path  : str) -> List[str]:
    with open(path, 'r') as file:
        content = file.read()
    return re.split(r"=== DOC_BLOCK_\d{4} ===", content)


doc_chunks = get_docs_content('extra_data/all_docs.txt')

print(f"Total code chunks: {len(doc_chunks)}")

Total code chunks: 88


#### Combine 文档字符

In [24]:
combined_conteent = []

for i in range(len(code_chunks)):
    line_conetent = f"{code_chunks[i]} \n {doc_chunks[i]}"
    combined_conteent.append(line_conetent)
    
    
print(f"Total combined content chunks: {len(combined_conteent)}")


Total combined content chunks: 88


In [None]:
import torch
from tqdm import tqdm
torch.cuda.empty_cache()
torch.cuda.set_per_process_memory_fraction(0.8)

def embed_type_chunks(chunks: List[str], batch_size: int) -> List[List[float]]:
    print(f"Total chunks to embed: {len(chunks)}")
    embeddings = []
    chunks_fake = []
    chunks_failed = []
    
    # 修复1：正确计算总批次
    total_batches = (len(chunks) + batch_size - 1) // batch_size
    
    for batch_idx in tqdm(range(total_batches)):
        start = batch_idx * batch_size
        end = min(start + batch_size, len(chunks))  # 修复2：避免索引超限
        print(f"Processing batch {batch_idx}: chunks {start} to {end-1}")
        
        torch.cuda.empty_cache()
        try:
            batch = chunks[start:end]  # 修复3：正确批次切片
            for chunk_idx, chunk in enumerate(batch):
                try:
                    embedding = embed_chunk(chunk)
                    embeddings.append(embedding)
                    chunks_fake.append(chunk)
                    # 修复4：添加详细进度日志
                    print(f"  Processed chunk {start+chunk_idx}/{len(chunks)}")
                except Exception as e:
                    # 修复5：精确记录单个失败chunk
                    failed_idx = start + chunk_idx
                    print(f"  Failed chunk {failed_idx}: {str(e)[:50]}")
                    chunks_failed.append((failed_idx, chunk))
        except Exception as e:
            print(f"Batch {batch_idx} failed entirely: {e}")
            chunks_failed.extend([(start+i, chunk) for i, chunk in enumerate(batch)])
        
        torch.cuda.empty_cache()
    
    return embeddings, chunks_fake, [chunk for idx, chunk in chunks_failed]

embeddings_code, code_chunks_fake, code_chunks_failed = embed_type_chunks(combined_conteent, batch_size=20)
 # 修复6：添加完整性校验
print(f"\n=== Embedding Summary ===")
print(f"Successfully embedded: {len(embeddings_code)}")
print(f"Failed chunks: {len(code_chunks_failed)}")
print(f"Integrity check: {len(embeddings_code) + len(code_chunks_failed) == len(combined_conteent)}")

### ~使用mergekit合并向量~ 不支持QWen3 放弃

In [None]:
import torch
import yaml
from typing import List

OUTPUT_PATH = "./merged"
CONFIG_PATH = "union_merge_config.yaml"
LORA_MERGE_CACHE = "/tmp"

from mergekit.config import MergeConfiguration
from mergekit.merge import MergeOptions, run_merge

def merge_code_doc_embeddings(code_embeddings: List[List[float]], doc_embeddings: List[List[float]]) -> List[List[float]]:
    """
    Merge code and document embeddings into a single model.
    """
    with open(CONFIG_PATH, "r", encoding="utf-8") as fp:
        merge_config = MergeConfiguration.model_validate(yaml.safe_load(fp))
    run_merge(
    merge_config,
    out_path=OUTPUT_PATH,
    options=MergeOptions(
        lora_merge_cache=LORA_MERGE_CACHE,
        cuda=torch.cuda.is_available()),
    )
    return merged_model


merged_embeddings = merge_code_doc_embeddings(embeddings_code, embeddings_doc)
print(f"Total merged embeddings: {len(merged_embeddings)}")

### Code Embedding

In [22]:
import chromadb

chromadb_client = chromadb.PersistentClient("./chromadb_code_docs_0810_code_1.db")
chromadb_collection = chromadb_client.get_or_create_collection(name="revit_api_1")

def save_embeddings(chunks : List[str] , embeddings : List[List[float]]) -> None :
    ids = [str(i) for i in range(len(embeddings))]
    chromadb_collection.add(documents=chunks , embeddings=embeddings , ids=ids)
        
        
save_embeddings(chunks=code_chunks , embeddings=embeddings_codes)

In [None]:
def retireve(query : str , top_k : int) -> List[str] :
    query_embedding = embed_chunk(query)
    results = chromadb_collection.query(query_embeddings=[query_embedding] , n_results=top_k)
    return results['documents'][0]


query = "Revit 修改墙体宽度?"
retireve_code_docs_chunk = retireve(query,5)
for i in range(len(retireve_code_docs_chunk)) : 
    print(retireve_code_docs_chunk[i])

## 本地加载chromadb.db

In [None]:
import chromadb
from typing import List
from chromadb.config import Settings

def initialize_chromadb(path : str , name : str) -> chromadb.Collection:
    """
    初始化ChromaDB客户端和集合。
    """
    chromadb_1 = chromadb.PersistentClient(path )
    chromadb_collection = chromadb_1.get_collection(name=name)
    return chromadb_collection

def retireve_collection_name(query : str , top_k : int , collection : chromadb.Collection) -> List[str] :
    query_embedding = embed_chunk(query)
    results = collection.query(query_embeddings =[query_embedding] , n_results=top_k)
    return results['documents'][0]

chromadb_api_collection = initialize_chromadb("./chromadb0815_api_1.db", "revit_api_1")
chromadb_code_docs_collection = initialize_chromadb("./chromadb0818_code_1.db", "revit_api_1")

query = "创建结构柱?"
query_retrieve_re = query_retrieve(query)
retireve_code_docs_chunk = retireve_collection_name(query,5, collection=chromadb_code_docs_collection)
retireve_api_chunk = retireve_collection_name(query_retrieve_re,30, collection=chromadb_api_collection)
print(f"Total API chunks retrieved: {len(retireve_api_chunk)}")
for i in range(len(retireve_api_chunk)) :
    print(f"Code/Doc [{i}]: {retireve_api_chunk[i]}")
    print("-" * 80)

## Rerank

In [None]:
from sentence_transformers import CrossEncoder
from transformers import AutoTokenizer
from typing import List
import gc
import torch
import torch.backends.cudnn as cudnn
import time  # 缺少time模块

# 全局变量控制批次大小和设备
BATCH_SIZE = 1  # 保持最小批次大小
MAX_LENGTH = 128  # 进一步减少序列长度
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
cudnn.benchmark = False
cudnn.deterministic = True

# 模型初始化移到函数外部，避免重复加载
model = CrossEncoder("Qwen/Qwen3-Reranker-0.6B")
model.to(DEVICE)
model.eval()
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-Reranker-0.6B", padding_side='left')
if model.config.pad_token_id is None:
    print(f"Is Token null")
    model.config.pad_token_id = tokenizer.pad_token_id

model.model.gradient_checkpointing_enable()
model.half()

def rerank(query: str, candidates: List[str], top_k: int = 5) -> List[str]:
    """
    使用CrossEncoder对候选答案进行重排序，优化内存使用以避免OOM错误。
    """
    torch.cuda.empty_cache()
    gc.collect()
    # print_gpu_memory_usage()

    inputs = [[query, candidate] for candidate in candidates]
    scores = []

    try:
        # 分批次处理以减少内存占用
        for i in range(0, len(inputs), BATCH_SIZE):
            batch = inputs[i:i+BATCH_SIZE]

            # 使用混合精度和no_grad减少内存使用
            with torch.no_grad(), torch.amp.autocast("cuda"):  # 修正amp.autocast为torch.amp.autocast
                batch_scores = model.predict(
                    batch,
                    batch_size=BATCH_SIZE,
                    show_progress_bar=False,
                    convert_to_tensor=False
                )
            scores.extend(batch_scores)

            del batch, batch_scores
            torch.cuda.empty_cache()
            gc.collect()

            if i % BATCH_SIZE == 0:
                print(f"Processed {i}/{len(inputs)} candidates, cleaning memory")
                # print_gpu_memory_usage()
                time.sleep(0.1)

    except RuntimeError as e:
        if 'out of memory' in str(e):
            print("OOM error encountered, reducing batch size to 1 and retrying...")
            return rerank_with_batch_size_1(query, candidates, top_k)
        else:
            raise e
    finally:
        torch.cuda.empty_cache()
        gc.collect()

    ranked_candidates = sorted(zip(candidates, scores), key=lambda x: x[1], reverse=True)
    return [candidate for candidate, score in ranked_candidates[:top_k]]

def rerank_with_batch_size_1(query: str, candidates: List[str], top_k: int = 5) -> List[str]:
    """使用批次大小1进行重排序,作为OOM时的后备方案"""
    global BATCH_SIZE
    original_batch_size = BATCH_SIZE
    BATCH_SIZE = 1

    try:
        return rerank(query, candidates, top_k)
    finally:
        BATCH_SIZE = original_batch_size

#print("Reranking candidates...")
#print(f"rerank_api_chunks: {len(retireve_api_chunk)} candidates")
#print(f"rerank_code_docs_chunks: {len(retireve_code_docs_chunk)} candidates")
rerank_api_chunks = rerank(query, retireve_api_chunk, top_k=15)
rerank_code_docs_chunks = rerank(query, retireve_code_docs_chunk, top_k=5)
"""ArithmeticError: division by zero"""
# 添加错误处理，确保两个列表长度相同
"""
min_length = min(len(rerank_api_chunks), len(rerank_code_docs_chunks))
for i in range(min_length):
    print(f"API Chunk {i+1}: {rerank_api_chunks[i]}")
    print(f"Code Doc Chunk {i+1}: {rerank_code_docs_chunks[i]}")

# 处理剩余项
if len(rerank_api_chunks) > min_length:
    for i in range(min_length, len(rerank_api_chunks)):
        print(f"API Chunk {i+1}: {rerank_api_chunks[i]}")
elif len(rerank_code_docs_chunks) > min_length:
    for i in range(min_length, len(rerank_code_docs_chunks)):
        print(f"Code Doc Chunk {i+1}: {rerank_code_docs_chunks[i]}")
"""

## LLM

### Get Detail From Sqlite

In [50]:
from sqlite3 import connect


def search_title_in_db(titles: List[str]) -> List[str]:
    detail_results = []
    db_path  = '/root/autodl-tmp/revitdocs/Output/revit_api_collection/revit_api.db'
    conn = connect(db_path)
    cursor = conn.cursor()
    for title in titles:
        cursor.execute("SELECT title, content FROM api_info WHERE title = ?", (title,))
        rows = cursor.fetchall()
        
        detail_results.extend(rows)
    conn.close()
    return detail_results


if __name__ == "__main__":
    results =  search_title_in_db(retireve_api_chunk)
    for result in results :
        print(result)

('FamilyInstanceCreationData(XYZ, FamilySymbol, Level, StructuralType) Constructor : Initializes a new instance of the FamilyInstanceCreationData class\n', 'Full Name: public FamilyInstanceCreationData ( XYZ location , FamilySymbol symbol , Level level , StructuralType structuralType )\n\nParameters:\n[location : XYZ]  - The physical location where the instance is to be placed.\n[symbol : FamilySymbol]  - A FamilySymbol object that represents the type of the instance that is to be inserted.\n[level : Level]  - A Level object that is used as the base level for the object.\n[structuralType : StructuralType]  - If structural then specify the type of the component.\n\n')
('FamilyInstanceCreationData(XYZ, FamilySymbol, Element, Level, StructuralType) Constructor : Initializes a new instance of the FamilyInstanceCreationData class\n', 'Full Name: public FamilyInstanceCreationData ( XYZ location , FamilySymbol symbol , Element host , Level level , StructuralType structuralType )\n\nParameters

### Check With API And Code

In [None]:
from openai import OpenAI

query_llm = "如何创建一个结构柱？请给出完整的C#代码示例。"
retrieve_query =  query_retrieve(query_llm)


client = OpenAI(api_key=api_key , base_url="https://api.deepseek.com")

prompt_sys = f"""
you are a professional bim engineer, you are good at Revit API, you can answer any question about Revit API.
also you have a good skill in c# and algorithm in graph 2d, you can write code in c# to solve the problem.

you need to base on this four reference to answer the question:
1. Completeness: It includes the entire process from start to submission
2. Professionalism: Correctly handle the characteristics of Revit structural elements
3. Robustness: It includes error handling and boundary condition checking
4. Scalability: The code structure makes it easy to add more functions
5. Best practice: Follow the Revit API development specifications


this is the reference of Revit API :
{"api reference".join(rerank_api_chunks)}
{"code and doc reference".join(rerank_code_docs_chunks)}

you will combine the revit api and c# code to generation a true add-in plugins and check the parameters and logic.

Give User a professional answer to the question, if you can not find the answer in the reference, please say "I don't know" or "I can't find the answer in the reference".
Also Give Code to solve the problem, if you can not generate the code in the reference, please say "I don't know" or "I can't find the code in the reference".

Remember : 1. Must Be True to the reference, 2. do not generate code that is not in the reference and RevitAPI. 3.  Konw What User Want , 4. Give User A Complete Code Solution

"""

response = client.chat.completions.create(
    model="deepseek-chat",
    messages=[
        {"role": "system", "content": f"{prompt_sys}"},  
        
        {"role": "user", "content": f"{retrieve_query}"}
    ],
    stream = False
)

print(f"query: {retrieve_query}")
print("Response from DeepSeek:")
print(response.choices[0].message.content)

query: 如何创建一个结构柱？请给出完整的C#代码示例。
Response from DeepSeek:
Keywords: structural column, create, C#, Revit API, FamilyInstance, NewFamilyInstance  

Here's the complete C# code to create a structural column in Revit:  

```csharp
Document doc = commandData.Application.ActiveUIDocument.Document;
Level level = doc.ActiveView.GenLevel; // Get current level
XYZ location = new XYZ(0, 0, 0); // Set insertion point
FamilySymbol columnType = new FilteredElementCollector(doc)
    .OfClass(typeof(FamilySymbol))
    .OfCategory(BuiltInCategory.OST_StructuralColumns)
    .FirstOrDefault() as FamilySymbol;
if (columnType != null && !columnType.IsActive) columnType.Activate();
using (Transaction t = new Transaction(doc, "Create Structural Column"))
{
    t.Start();
    FamilyInstance column = doc.Create.NewFamilyInstance(
        location, columnType, level, StructuralType.Column);
    t.Commit();
}
```
query: Keywords: structural column, create, C#, Revit API, FamilyInstance, NewFamilyInstance  

Here's

## FullCode

In [None]:
from sqlite3 import connect
from openai import OpenAI
import re
import torch

DATABASE_PATH = '/root/autodl-tmp/revitdocs/Output/revit_api_collection/revit_api.db'
EXTRACT_CODE_PATH = 'extra_data/all_codes.txt'
QUERY = "创建结构柱"

chromadb_api_collection = initialize_chromadb("./chromadb0815_api_1.db", "revit_api_1")
chromadb_code_docs_collection = initialize_chromadb("./chromadb_code_docs_0730_1.db", "revit_default_code_doc_1")

# Retrieve Query
query_llm = query_retrieve(QUERY)
query_apis = retireve_collection_name(query_llm,30, collection=chromadb_api_collection)
query_code_docs = retireve_collection_name(query_llm,5, collection=chromadb_code_docs_collection)

# ReRank
rerank_api_chunks = rerank(query_llm, query_apis, top_k=15)
rerank_code_docs_chunks = rerank(query_llm, query_code_docs, top_k=3)

# Create Response
client = OpenAI(api_key=api_key , base_url="https://api.deepseek.com")

prompt_sys = f"""
you are a professional bim engineer, you are good at Revit API, you can answer any question about Revit API.
also you have a good skill in c# and algorithm in graph 2d, you can write code in c# to solve the problem.

you need to base on this four reference to answer the question:
1. Completeness: It includes the entire process from start to submission
2. Professionalism: Correctly handle the characteristics of Revit structural elements
3. Robustness: It includes error handling and boundary condition checking
4. Scalability: The code structure makes it easy to add more functions
5. Best practice: Follow the Revit API development specifications


this is the reference of Revit API :
{"api reference".join(rerank_api_chunks)}
{"code and doc reference".join(rerank_code_docs_chunks)}

you will combine the revit api and c# code to generation a true add-in plugins and check the parameters and logic.

Give User a professional answer to the question, if you can not find the answer in the reference, please say "I don't know" or "I can't find the answer in the reference".
Also Give Code to solve the problem, if you can not generate the code in the reference, please say "I don't know" or "I can't find the code in the reference".

Remember : 1. Must Be True to the reference, 2. do not generate code that is not in the reference and RevitAPI. 3.  Konw What User Want , 4. Give User A Complete Code Solution

"""

response = client.chat.completions.create(
    model="deepseek-chat",
    messages=[
        {"role": "system", "content": f"{prompt_sys}"},  
        
        {"role": "user", "content": f"{query_llm}"}
    ],
    stream = False
)

print(f"query: {query_llm}")
print("Response from DeepSeek:")
print(response.choices[0].message.content)

