In [1]:
import os

In [None]:
os.getcwd()

In [3]:
os.environ["HTTPS_PROXY"] = "http://127.0.0.1:7078"
os.environ["HTTPS_PROXY"]

'http://127.0.0.1:7078'

In [None]:
os.environ["OPENAI_API_KEY"]

In [22]:
import json
import pandas as pd
import numpy as np
import openai
openai.api_key = os.environ["OPENAI_API_KEY"]

In [11]:
COMPLETIONS_MODEL='text-davinci-003'
EMBEDDING_MODEL = "text-embedding-ada-002"

In [13]:
def get_embedding(text: str, model: str=EMBEDDING_MODEL) -> list[float]:
    result = openai.Embedding.create(
      model=model,
      input=text
    )
    return result["data"][0]["embedding"]

def vector_similarity(x: list[float], y: list[float]) -> float:
    """
    Returns the similarity between two vectors.
    
    Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.
    """
    return np.dot(np.array(x), np.array(y))

def calc_embeddings(df):
    df['embedding'] = df['Questions'].apply(lambda s: get_embedding(s))
    df.to_csv('../data/embedded.csv', index=False)
    return df.head()

def ask(prompt: str, q: str, a: str):
    effective_prompt = f"""
    Q: {q}
    A: {a}
    
    Q: {prompt}
    A: """
    response = openai.Completion.create(
        prompt=effective_prompt,
        temperature=0,
        max_tokens=1000,
        model=COMPLETIONS_MODEL
    )
    return response['choices'][0]['text']

In [23]:
df = pd.read_csv('../data/md_QA_embedded.csv')
df['embedding'] = df['embedding'].apply(lambda x: json.loads(x))
df.head()

Unnamed: 0,ID,Questions,Answers,Attachment,RegBy,RegTime,Submit,SubmitBy,SubmitTime,embedding
0,1,上海市居住证业务-持证人居住地址发生变化怎么办？,持证人居住地住址等信息发生变化的，请在发生变化的30日内到现居住地街道（乡镇）社区事务受理服...,,1,2021-09-01 12:11:17.830,,,,"[0.017005516216158867, 0.013673987239599228, 0..."
1,2,上海市居住证业务-如何查询居住证办理信息？,居住证办理过程的状态信息，如受理时间、签发时间、有效期限、受理状态、审核状态、审核未通过原因...,,1,2021-09-01 12:11:17.830,,,,"[0.021357620134949684, 0.02255536988377571, -0..."
2,3,上海市居住证业务-办证时未提供手机号码或提供的号码有错误的，如何变更？,如需变更手机号码，请及时前往居住地街道（乡镇）社区事务受理服务中心办理信息变更手续。,,1,2021-09-01 12:11:17.830,,,,"[0.011813518591225147, 0.015791472047567368, -..."
3,4,上海市居住证业务-居住证有效期届满前30日内持证人可否自行办理签注手续？,可以。持证人自行办理签注手续成功后，系统不再自动签注。,,1,2021-09-01 12:11:17.830,,,,"[0.01432208251208067, 0.016711320728063583, 0...."
4,5,上海市居住证业务-收到居住证签注未通过或不符合自动签注条件的提醒短信，持证人应如何办理签注？,持证人可持证前往居住地街道（乡镇）社区事务受理服务中心咨询办理。,,1,2021-09-01 12:11:17.830,,,,"[0.0040360321290791035, 0.011765829287469387, ..."


In [26]:
q = "我已经办过居住证了，现在我家地址变了，该做些什么？"

In [27]:
q_embedding = get_embedding(q)
q_embedding

[0.007468193769454956,
 -0.0052923704497516155,
 -0.00546835595741868,
 -0.01302294246852398,
 -0.02426682971417904,
 0.01093671191483736,
 -0.03102468140423298,
 -0.014961984939873219,
 -0.006905039306730032,
 -0.027927331626415253,
 0.02915603294968605,
 -0.012389393523335457,
 0.007026629522442818,
 0.008568904362618923,
 -0.005826727021485567,
 0.017355389893054962,
 0.0037244975101202726,
 -0.012664571404457092,
 0.020580729469656944,
 -0.012389393523335457,
 -0.038345687091350555,
 0.012594177387654781,
 0.006159499753266573,
 -0.010085580870509148,
 -0.018507298082113266,
 -0.010392756201326847,
 0.015793917700648308,
 -0.007807366084307432,
 -0.0075193895027041435,
 0.00029937567887827754,
 0.015857912600040436,
 -0.0066298614256083965,
 -0.031767021864652634,
 -0.021911820396780968,
 -0.010891915298998356,
 -0.007282608654350042,
 -0.005797929130494595,
 -0.03143424913287163,
 0.003007755847647786,
 -0.008319324813783169,
 -0.00039376801578328013,
 -0.0004359646118246019,
 0.0

In [28]:
df['similarity'] = df['embedding'].apply(lambda x: vector_similarity(x, q_embedding))
df.head()

Unnamed: 0,ID,Questions,Answers,Attachment,RegBy,RegTime,Submit,SubmitBy,SubmitTime,embedding,similarity
0,1,上海市居住证业务-持证人居住地址发生变化怎么办？,持证人居住地住址等信息发生变化的，请在发生变化的30日内到现居住地街道（乡镇）社区事务受理服...,,1,2021-09-01 12:11:17.830,,,,"[0.017005516216158867, 0.013673987239599228, 0...",0.905537
1,2,上海市居住证业务-如何查询居住证办理信息？,居住证办理过程的状态信息，如受理时间、签发时间、有效期限、受理状态、审核状态、审核未通过原因...,,1,2021-09-01 12:11:17.830,,,,"[0.021357620134949684, 0.02255536988377571, -0...",0.861465
2,3,上海市居住证业务-办证时未提供手机号码或提供的号码有错误的，如何变更？,如需变更手机号码，请及时前往居住地街道（乡镇）社区事务受理服务中心办理信息变更手续。,,1,2021-09-01 12:11:17.830,,,,"[0.011813518591225147, 0.015791472047567368, -...",0.862277
3,4,上海市居住证业务-居住证有效期届满前30日内持证人可否自行办理签注手续？,可以。持证人自行办理签注手续成功后，系统不再自动签注。,,1,2021-09-01 12:11:17.830,,,,"[0.01432208251208067, 0.016711320728063583, 0....",0.845336
4,5,上海市居住证业务-收到居住证签注未通过或不符合自动签注条件的提醒短信，持证人应如何办理签注？,持证人可持证前往居住地街道（乡镇）社区事务受理服务中心咨询办理。,,1,2021-09-01 12:11:17.830,,,,"[0.0040360321290791035, 0.011765829287469387, ...",0.841276


In [29]:
sorted_df = df.sort_values(by='similarity', ascending=False)
sorted_df.head()

Unnamed: 0,ID,Questions,Answers,Attachment,RegBy,RegTime,Submit,SubmitBy,SubmitTime,embedding,similarity
0,1,上海市居住证业务-持证人居住地址发生变化怎么办？,持证人居住地住址等信息发生变化的，请在发生变化的30日内到现居住地街道（乡镇）社区事务受理服...,,1,2021-09-01 12:11:17.830,,,,"[0.017005516216158867, 0.013673987239599228, 0...",0.905537
7,8,上海市居住证业务-怎么办理临时居住证？,您好！《上海市临时居住证》须本人前往现居住证地的居住证受理点办理，须提供《来沪人员居住登记表...,,1,2021-09-01 12:11:17.830,,,,"[0.025090347975492477, 0.01781642436981201, -0...",0.873321
2,3,上海市居住证业务-办证时未提供手机号码或提供的号码有错误的，如何变更？,如需变更手机号码，请及时前往居住地街道（乡镇）社区事务受理服务中心办理信息变更手续。,,1,2021-09-01 12:11:17.830,,,,"[0.011813518591225147, 0.015791472047567368, -...",0.862277
1,2,上海市居住证业务-如何查询居住证办理信息？,居住证办理过程的状态信息，如受理时间、签发时间、有效期限、受理状态、审核状态、审核未通过原因...,,1,2021-09-01 12:11:17.830,,,,"[0.021357620134949684, 0.02255536988377571, -0...",0.861465
5,6,上海市居住证业务-收到居住证签注已通过的提醒短信，持证人是否需要办理卡面有效期限更新手续？,持证人如需更新卡面有效期限，可持证到就近街道（乡镇）社区事务受理服务中心办理。,,1,2021-09-01 12:11:17.830,,,,"[-0.0037496446166187525, 0.01798757165670395, ...",0.855415


In [30]:
best_q, best_a = sorted_df[['Questions', 'Answers']].iloc[0]
best_q, best_a

('上海市居住证业务-持证人居住地址发生变化怎么办？',
 '持证人居住地住址等信息发生变化的，请在发生变化的30日内到现居住地街道（乡镇）社区事务受理服务中心办理信息变更手续，以避免系统无法自动签注。')

In [31]:
a = ask(q, best_q, best_a)
q, a, best_q, best_a

('我已经办过居住证了，现在我家地址变了，该做些什么？',
 ' 持证人居住地住址等信息发生变化的，请在发生变化的30日内到现居住地街道（乡镇）社区事务受理服务中心办理信息变更手续，以避免系统无法自动签注。',
 '上海市居住证业务-持证人居住地址发生变化怎么办？',
 '持证人居住地住址等信息发生变化的，请在发生变化的30日内到现居住地街道（乡镇）社区事务受理服务中心办理信息变更手续，以避免系统无法自动签注。')