<a href="https://colab.research.google.com/github/hlin-0420/Llama-Chatbot-Notebook/blob/main/Llama_Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Library Import

In [1]:
import cohere
import numpy as np
import pandas as pd
from tqdm import tqdm

## Data Loading

In [2]:
training_data_filename = "processed_content.txt"

In [3]:
def load_content(filename="processed_content.txt"):
    try:
        with open(filename, "r", encoding="utf-8") as file:
            content = file.read()
        print("Content loaded successfully!")
        return content
    except FileNotFoundError:
        print(f"Error: The file '{filename}' was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

In [4]:
content = load_content(training_data_filename)

Content loaded successfully!


## API Key

In [5]:
api_key_file = "api_key.txt"

api_key = load_content(api_key_file)

Content loaded successfully!


In [6]:
co = cohere.Client(api_key)

## Content Processing

In [7]:
content = content.split(".")

content = [c.strip('/n') for c in content]

## Embedding

In [10]:
response = co.embed(texts = content, input_type="search_document", ).embeddings

embeds = np.array(response)

print(embeds.shape)

(3897, 4096)


In [11]:
# Save embeddings to a file
np.save("embeddings.npy", embeds)

## Building Search Index

In [13]:
import faiss

In [14]:
dim = embeds.shape[1]

In [15]:
index = faiss.IndexFlatL2(dim)

In [16]:
print(index.is_trained)

True


In [17]:
index.add(np.float32(embeds))

## Search

In [20]:
def search(query, number_of_results = 3):
    query_embeds = co.embed(texts = [query], input_type = "search_query").embeddings[0]

    distances, similar_item_ids = index.search(np.float32([query_embeds]), number_of_results)

    texts_np = np.array(content)

    results = pd.DataFrame(data = {'texts': texts_np[similar_item_ids[0]], 'distance': distances[0]})

    print(f"Query: {query}\nResults:")
    return results

In [21]:
query = "How many tadpole definitions am I allowed to create?"
results = search(query)
results

Query: How many tadpole definitions am I allowed to create?
Results:


Unnamed: 0,texts,distance
0,\n\nEnter a name for this tadpole definition i...,5138.734375
1,"\n\nFor a new tadpole definition, select an un...",6227.813965
2,TheTadpole Definitionswill ope,6267.456055
