In [1]:
PINECONE_KEY = ""
OPENAI_KEY = ""
index_name = "api-doc"
import os
os.environ['OPENAI_API_KEY'] = OPENAI_KEY

In [5]:
from pinecone import Pinecone, ServerlessSpec
from langchain_openai import OpenAIEmbeddings


# 初始化Pinecone
pc = Pinecone(api_key=PINECONE_KEY)

# 创建一个索引（如果已经创建了可以跳过）
# if index_name not in pc.list_indexes():
#     pc.create_index(
#     name=index_name,test
#     dimension=3072, # Replace with your model dimensions
#     metric="cosine", # Replace with your model metric
#     spec=ServerlessSpec(
#         cloud="aws",
#         region="us-east-1"
#     ) )  # 根据OpenAI的embedding维度

# 连接到Pinecone索引
index = pc.Index(index_name)

# 创建OpenAI的嵌入模型
embeddings = OpenAIEmbeddings(model="text-embedding-3-large", openai_api_key=OPENAI_KEY)


  from tqdm.autonotebook import tqdm


In [4]:
import os 
os.listdir()

['rag', 'Untitled.ipynb', '.aws', '.ipynb_checkpoints']

In [7]:
import re

with open("rag/docs/_rest-api.md", 'r') as file:
    data = file.read()

sections = re.split(r'\n## ', data)

In [8]:
import json

def extract_api_information(api_text): # Extract the name, description, response, and parameters from the API text. api_info = {}
    api_info = {}

    # Extract the API name (from the header)
    name_end = api_text.find("\n", 0)
    api_info["name"] = api_text[0:name_end]

    # Extract the description (between the "Limit" and "Response")
    description_start = api_text.find("**Limit") 
    response_start = api_text.find("> **Response**")
    description = api_text[description_start:response_start].strip()
    api_info["description"] = description

    # Extract the response block (between "Response" and the next section)
    response_start = api_text.find("```js", response_start)
    response_end = api_text.find("```", response_start + 3)
    response = api_text[response_start:response_end + 3].strip()
    api_info["response"] = response

    # Extract parameters (if any)
    parameters_start = api_text.find("**Parameters**")
    if parameters_start != -1:
        parameters = api_text[parameters_start:].strip()
    else:
        parameters = "None"
    api_info["parameters"] = parameters

    return api_info

def extract_api_information(api_text):    
    # Extract the API name (from the header)
    name_end = api_text.find("\n", 0)
    name = api_text[0:name_end]

    # 提取描述
    desc_match = re.findall("`\n\n([\S\n\t\v ]*?)\*\*", api_text)
    description = desc_match[0].replace(">", "").replace("\n", "").strip() if desc_match else "No description"

    endpoint_match = re.search(r'`\s*(GET|POST|PUT|DELETE)\s+([^\s]+)\s*`', api_text)
    endpoint = f"{endpoint_match.group(1)} {endpoint_match.group(2)}" if endpoint_match else "No endpoint"

    # 使用正则表达式根据 "**Parameters**" 和 "**Response**" 进行拆分
    parts = re.split(r'\*\*(Parameters|Response|Permission)\*\*', api_text)

    parameters = "No parameters"
    response = "No response"

    # 遍历拆分后的部分，根据标记提取内容
    for i in range(1, len(parts), 2):
        if parts[i] == "Parameters":
            parameters = parts[i+1].strip()
        elif parts[i] == "Response":
            response_match = re.search(r'```js(.*?)```', parts[i+1], re.DOTALL)
            if response_match:
                response = response_match.group(1).strip()
                # 清理多余的空格和换行符
                response = re.sub(r'\s+', ' ', response).replace(' ,', ',')
    
    # 返回提取到的结果，保证顺序为parameters -> response
    return {
        "name": name,
        "description": description,
        "endpoint": endpoint,
        "parameters": parameters,
        "response": response
    }

parsed = []
for i in sections:
    c = extract_api_information(i)
    parsed.append(c)
parsed = parsed[1:]

In [9]:
docs = []
for i in parsed:
    docs.append({
        'id': i['name'],
        'text': json.dumps(i)
    })

In [17]:
from openai import OpenAI
client = OpenAI()

# 提取文本内容进行嵌入
texts = [doc["text"] for doc in docs]
ids = [doc["id"] for doc in docs]

# 调用OpenAI生成嵌入向量
response = client.embeddings.create(
    input=texts,
    model="text-embedding-3-large"
)

# 获取嵌入向量
embeddings = [embedding.embedding for embedding in response.data]

# 格式化为Pinecone要求的格式 (id, 向量)
pinecone_data = [(doc_id, embedding) for doc_id, embedding in zip(ids, embeddings)]

# 将嵌入向量存储到Pinecone中
index.upsert(vectors=pinecone_data)


{'upserted_count': 79}

In [2]:
from openai import OpenAI
client = OpenAI()

In [12]:


def query_pinecone(text):
    query_embedding = client.embeddings.create(
        input=[text],
        model="text-embedding-3-large"
    ).data[0].embedding

    query_result = index.query(vector=query_embedding, top_k=5)
    docs_results = []
    for i in query_result.get('matches'):
        r = [k for k in docs if k['id'] == i['id']]
        docs_results.append(r[0]['text'])
    return docs_results
def enhance_response(query, docs):
  prompt = f"""
  You are an API support assistant. Please generate a concise, accurate, and customer-friendly response based on the following documentation to help them understand how to call the API.

  These are the related api documentations: 
  """


  for r in docs:
      prompt += f"{r}\n"

  response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
      {"role": "system", "content": prompt},
      {"role": "user", "content": query_text,},
    ]
  )

  return response.choices[0].message.content

In [13]:

query_text = "Can you show me how to use the position info endpoint? I need a python example code"
response = query_pinecone(query_text)
print(enhance_response(query_text, response))


Sure! Below is an example of how you can use the "Get One Position Info" endpoint in Python. This example assumes you have the `requests` library installed. If you don't have it yet, you can install it using `pip install requests`.

```python
import requests

def get_position_info(symbol):
    # Define the endpoint URL, replace ':symbol' with the actual symbol
    url = f"https://api.yourservice.com/v1/position/{symbol}"

    # Make a GET request to the API endpoint
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the response as JSON
        data = response.json()
        if data["success"]:
            return data
        else:
            print("Failed to retrieve position info.")
            return None
    else:
        print(f"Error: {response.status_code}")
        return None

# Replace 'PERP_BTC_USDT' with the symbol you want to check
symbol = 'PERP_BTC_USDT'
position_info = get_position_info(symbol

In [34]:
import openai
openai.__version__

'1.45.0'