In [4]:
import os
proxy = os.environ["HTTPS_PROXY"]
api_key = os.environ["OPENAI_API_KEY"]

print(f"HTTPS_PROXY: {proxy}")
print("OPENAI_API_KEY: configured")

HTTPS_PROXY: http://127.0.0.1:7078
OPENAI_API_KEY: configured


In [5]:
import json
import pandas as pd
import numpy as np
import openai
openai.api_key = os.environ["OPENAI_API_KEY"]

In [6]:
COMPLETIONS_MODEL='text-davinci-003'
EMBEDDING_MODEL = "text-embedding-ada-002"

In [7]:
def get_embedding(text: str, model: str=EMBEDDING_MODEL) -> list[float]:
    result = openai.Embedding.create(
      model=model,
      input=text
    )
    return result["data"][0]["embedding"]

def vector_similarity(x: list[float], y: list[float]) -> float:
    """
    Returns the similarity between two vectors.
    
    Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.
    """
    return np.dot(np.array(x), np.array(y))

def calc_embeddings(df):
    df['embedding'] = df['Questions'].apply(lambda s: get_embedding(s))
    df.to_csv('../data/embedded.csv', index=False)
    return df.head()

def ask(prompt: str, q: str, a: str):
    effective_prompt = f"""
    Q: {q}
    A: {a}
    
    Q: {prompt}
    A: """
    response = openai.Completion.create(
        prompt=effective_prompt,
        temperature=0,
        max_tokens=1000,
        model=COMPLETIONS_MODEL
    )
    return response['choices'][0]['text']

In [8]:
df = pd.read_csv('../data/md_QA_embedded.csv')
df['embedding'] = df['embedding'].apply(lambda x: json.loads(x))
df.head()

Unnamed: 0,ID,Questions,Answers,Attachment,RegBy,RegTime,Submit,SubmitBy,SubmitTime,embedding
0,1,上海市居住证业务-持证人居住地址发生变化怎么办？,持证人居住地住址等信息发生变化的，请在发生变化的30日内到现居住地街道（乡镇）社区事务受理服...,,1,2021-09-01 12:11:17.830,,,,"[0.017005516216158867, 0.013673987239599228, 0..."
1,2,上海市居住证业务-如何查询居住证办理信息？,居住证办理过程的状态信息，如受理时间、签发时间、有效期限、受理状态、审核状态、审核未通过原因...,,1,2021-09-01 12:11:17.830,,,,"[0.021357620134949684, 0.02255536988377571, -0..."
2,3,上海市居住证业务-办证时未提供手机号码或提供的号码有错误的，如何变更？,如需变更手机号码，请及时前往居住地街道（乡镇）社区事务受理服务中心办理信息变更手续。,,1,2021-09-01 12:11:17.830,,,,"[0.011813518591225147, 0.015791472047567368, -..."
3,4,上海市居住证业务-居住证有效期届满前30日内持证人可否自行办理签注手续？,可以。持证人自行办理签注手续成功后，系统不再自动签注。,,1,2021-09-01 12:11:17.830,,,,"[0.01432208251208067, 0.016711320728063583, 0...."
4,5,上海市居住证业务-收到居住证签注未通过或不符合自动签注条件的提醒短信，持证人应如何办理签注？,持证人可持证前往居住地街道（乡镇）社区事务受理服务中心咨询办理。,,1,2021-09-01 12:11:17.830,,,,"[0.0040360321290791035, 0.011765829287469387, ..."


In [9]:
q = "我的居住证快到期了，怎么续办？"

In [10]:
q_embedding = get_embedding(q)
q_embedding

[-0.0006816151435486972,
 0.008490454405546188,
 -0.006813656073063612,
 -0.02005503885447979,
 -0.030528374016284943,
 0.009701475501060486,
 -0.03702263906598091,
 -0.0004453995206858963,
 -0.019363027065992355,
 -0.017859231680631638,
 0.03925837203860283,
 0.011637778021395206,
 0.0019013694254681468,
 0.005269937217235565,
 -0.011225231923162937,
 0.0008142784936353564,
 0.014479019679129124,
 -0.010320293717086315,
 0.03861958906054497,
 -0.022916242480278015,
 -0.013853547163307667,
 -0.0018930518999695778,
 -0.014891564846038818,
 0.001994524849578738,
 0.006820309907197952,
 0.01264252606779337,
 0.004215284250676632,
 0.0032288345973938704,
 -0.00015168951358646154,
 -0.003593138884752989,
 0.023009397089481354,
 0.0004649455368053168,
 -0.025231819599866867,
 -0.017885847017169,
 -0.0005651708343066275,
 -0.014665329828858376,
 -0.0019496105378493667,
 -0.0017433378379791975,
 0.012828837148845196,
 -0.014612098224461079,
 0.0017782710492610931,
 0.013560772873461246,
 0.005

In [11]:
df['similarity'] = df['embedding'].apply(lambda x: vector_similarity(x, q_embedding))
df.head()

Unnamed: 0,ID,Questions,Answers,Attachment,RegBy,RegTime,Submit,SubmitBy,SubmitTime,embedding,similarity
0,1,上海市居住证业务-持证人居住地址发生变化怎么办？,持证人居住地住址等信息发生变化的，请在发生变化的30日内到现居住地街道（乡镇）社区事务受理服...,,1,2021-09-01 12:11:17.830,,,,"[0.017005516216158867, 0.013673987239599228, 0...",0.867427
1,2,上海市居住证业务-如何查询居住证办理信息？,居住证办理过程的状态信息，如受理时间、签发时间、有效期限、受理状态、审核状态、审核未通过原因...,,1,2021-09-01 12:11:17.830,,,,"[0.021357620134949684, 0.02255536988377571, -0...",0.868395
2,3,上海市居住证业务-办证时未提供手机号码或提供的号码有错误的，如何变更？,如需变更手机号码，请及时前往居住地街道（乡镇）社区事务受理服务中心办理信息变更手续。,,1,2021-09-01 12:11:17.830,,,,"[0.011813518591225147, 0.015791472047567368, -...",0.850976
3,4,上海市居住证业务-居住证有效期届满前30日内持证人可否自行办理签注手续？,可以。持证人自行办理签注手续成功后，系统不再自动签注。,,1,2021-09-01 12:11:17.830,,,,"[0.01432208251208067, 0.016711320728063583, 0....",0.881849
4,5,上海市居住证业务-收到居住证签注未通过或不符合自动签注条件的提醒短信，持证人应如何办理签注？,持证人可持证前往居住地街道（乡镇）社区事务受理服务中心咨询办理。,,1,2021-09-01 12:11:17.830,,,,"[0.0040360321290791035, 0.011765829287469387, ...",0.862152


In [12]:
sorted_df = df.sort_values(by='similarity', ascending=False)
sorted_df.head()

Unnamed: 0,ID,Questions,Answers,Attachment,RegBy,RegTime,Submit,SubmitBy,SubmitTime,embedding,similarity
7,8,上海市居住证业务-怎么办理临时居住证？,您好！《上海市临时居住证》须本人前往现居住证地的居住证受理点办理，须提供《来沪人员居住登记表...,,1,2021-09-01 12:11:17.830,,,,"[0.025090347975492477, 0.01781642436981201, -0...",0.888678
39,40,积分到期忘记续办了，已经过期半年了，还能续办吗？,根据目前政策可以续办，从系统中修改，提交完成后告知HR审批，审批完成后会告知需提供的纸质材料...,,1,2021-09-01 12:11:17.830,,,,"[0.012294166721403599, -0.0032590138725936413,...",0.884429
5,6,上海市居住证业务-收到居住证签注已通过的提醒短信，持证人是否需要办理卡面有效期限更新手续？,持证人如需更新卡面有效期限，可持证到就近街道（乡镇）社区事务受理服务中心办理。,,1,2021-09-01 12:11:17.830,,,,"[-0.0037496446166187525, 0.01798757165670395, ...",0.882023
3,4,上海市居住证业务-居住证有效期届满前30日内持证人可否自行办理签注手续？,可以。持证人自行办理签注手续成功后，系统不再自动签注。,,1,2021-09-01 12:11:17.830,,,,"[0.01432208251208067, 0.016711320728063583, 0....",0.881849
1,2,上海市居住证业务-如何查询居住证办理信息？,居住证办理过程的状态信息，如受理时间、签发时间、有效期限、受理状态、审核状态、审核未通过原因...,,1,2021-09-01 12:11:17.830,,,,"[0.021357620134949684, 0.02255536988377571, -0...",0.868395


In [13]:
best_q, best_a = sorted_df[['Questions', 'Answers']].iloc[0]
best_q, best_a

('上海市居住证业务-怎么办理临时居住证？',
 '您好！《上海市临时居住证》须本人前往现居住证地的居住证受理点办理，须提供《来沪人员居住登记表》、居民身份证、在本市的住所证明等相关材料办理。')

In [14]:
a = ask(q, best_q, best_a)
q, a, best_q, best_a

('我的居住证快到期了，怎么续办？',
 '《上海市居住证》有效期为一年，您可以在有效期届满前一个月内，前往现居住证地的居住证受理点办理续办手续，须提供本人身份证、原居住证、在本市的住所证明等相关材料办理。',
 '上海市居住证业务-怎么办理临时居住证？',
 '您好！《上海市临时居住证》须本人前往现居住证地的居住证受理点办理，须提供《来沪人员居住登记表》、居民身份证、在本市的住所证明等相关材料办理。')