In [1]:
import qdrant_client as qc
import qdrant_client.http.models as qmodels
import uuid
import csv
import openai
import os
import gradio as gr

os.environ["OPENAI_API_KEY"] = ' '
openai.api_key = os.environ["OPENAI_API_KEY"]

client = qc.QdrantClient(url="localhost")
METRIC = qmodels.Distance.DOT
DIMENSION = 1536

MODEL = "text-embedding-ada-002"

def embed_text(text):
    response = openai.Embedding.create(
        input=text,
        model=MODEL
    )
    embeddings = response['data'][0]['embedding']
    return embeddings

def create_index(collection_name):
    client.recreate_collection(
    collection_name=collection_name,
    vectors_config = qmodels.VectorParams(
            size=DIMENSION,
            distance=METRIC,
        )
    )


def delete_index(collection_name):
    client.delete_collection(collection_name=collection_name)
    
def create_vector(content):
    vector = embed_text(content)
    id = str(uuid.uuid1().int)[:32]
    payload = {
        "text": content
    }
    return id, vector, payload

def add_doc_to_index(collection_name, content):
    ids = []
    vectors = []
    payloads = []
    
    _id, vector, payload = create_vector(content)
    ids.append(_id)
    vectors.append(vector)
    payloads.append(payload)

    client.upsert(
        collection_name=collection_name,
        points=qmodels.Batch(
            ids = ids,
            vectors=vectors,
            payloads=payloads
        ),
    )
    
def query_index(collection_name, query, top_k=30):
    vector = embed_text(query)
    
    results = client.search(
        collection_name=collection_name,
        query_vector=vector,
        # query_filter=_filter,
        limit=top_k,
        with_payload=True,
        # search_params=_search_params,
    )

    results = [
        (
            res.payload["text"],
            res.score,
        )
        for res in results
    ]

    return results

In [4]:
from datetime import datetime

i = 0
queries = []
rating_option = ['1','','-1']


delete_index('chat_records')
create_index('chat_records')
with open('Chat Records.csv') as f:
    reader = csv.reader(f, delimiter=',')
    next(reader)
    for row in reader:
        prompt = row[0]
        rating = row[2]
        reason = row[3]
        created_at = datetime.strptime(row[5][:19], '%Y-%m-%d %H:%M:%S')
        if created_at > datetime.strptime('2023-07-04 14:00:00', '%Y-%m-%d %H:%M:%S') and rating in rating_option:
            queries.append(prompt)
            
queries = set(queries)
for q in queries:
    add_doc_to_index('chat_records', q)

In [5]:
with gr.Blocks() as demo:
    with gr.Row():
        md0 = gr.Markdown("""Hi, I will find top 30 user chat records that are most relevant to your sentence or word ^^""")
        
    with gr.Row(visible=True) as r1:
        sentence = gr.Textbox(label="Enter your sentence or word", placeholder="is it dangerous to hike under a thunder storm?")
            
    with gr.Row(visible=True) as r2:
        button = gr.Button("Continue")
        
    with gr.Row(visible=True) as r3:
        button2 = gr.Button("Restart")
        
    def submit(sentence):
        res = {md0:""}
        responses = query_index('chat_records', sentence)
        for response in responses:     
            res[md0] += "### " + str(response[1]) + ": " + str(response[0]) + "\n"
        res[r1] = gr.update(visible=False)
        res[r2] = gr.update(visible=False)
        return res
        
    def restart():
        res = {}
        
        res[md0] = """Hi, I will find top 30 user chat records that are most relevant to your sentence or word ^^"""
        res[sentence] = ''
        res[r1] = gr.update(visible=True)
        res[r2] = gr.update(visible=True)
        
        return res
    
    button.click(fn=submit, inputs=[sentence], outputs=[md0, r1, r2, r3])
    
    button2.click(fn=restart, inputs=None, outputs=[md0, r1, r2, r3, sentence])
    
demo.launch(share=True, debug=False)

Running on local URL:  http://127.0.0.1:7864
Running on public URL: https://59f66ef82396ebf3d5.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [36]:
with open('categories.csv') as f:
    reader = csv.reader(f, delimiter=',')
    next(reader)
    for row in reader:
        index = row[0]
        subcat = row[1]
        add_doc_to_index(index, subcat)

In [32]:
a = query_index('weather_info_or_prediction', 'Are there any hurricanes forming now', top_k=5)
print(a)

[('tornado', 0.7964211), ('wind', 0.79486805), ('snow', 0.79297185), ('general weather', 0.78826755), ('thunderstorm', 0.7850449)]


In [38]:
with open('category_analysis_2.csv') as f:
    reader = csv.reader(f, delimiter='|')
    next(reader)
    with open('category_analysis_3.csv', 'a') as f1:
        writer = csv.writer(f1, delimiter='|')
        for row in reader:
            prompt = row[0]
            cat = row[1].replace(' ', '_').replace('explanation_on', 'explanation_of')
            temp, prob = query_index(cat, prompt, top_k=1)[0]
            if prob > 0.82:
                subcat = temp
            else:
                subcat = 'other'
            writer.writerow([prompt, cat, subcat])
            