In [2]:
from glob import glob
import os
import re

csvs = glob('../sample-data/*.csv')
print("Possible project names:")
for fname in csvs:
    possible_project_name = re.sub(r"\W", "_", os.path.basename(fname).split(".")[0])
    print(possible_project_name)


Possible project names:
youtube_missed_political_ads
Tweets_by__NYCFireWire___Sheet1
wapo_opinion_headlines
search_warrants_for_ner_from_rss
constellation_10q
search_warrant_training_data___rss_document_types_all_courts


In [4]:
from llama_index.vector_stores.postgres import PGVectorStore
from llama_index.vector_stores.duckdb import DuckDBVectorStore
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb

from llama_index.core import VectorStoreIndex, StorageContext
import re
project_name = 'constellation_10q'

VECTOR_STORE = "chroma"

if VECTOR_STORE == 'chroma':
    chroma_client = chromadb.PersistentClient(path="./chroma")
    chroma_collection = chroma_client.get_or_create_collection("{}".format(project_name))
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
elif VECTOR_STORE == "duckdb":
    vector_store = DuckDBVectorStore("{}2.duckdb".format(project_name), persist_dir="../duckdb/")
elif VECTOR_STORE == "postgres":
    vector_store = PGVectorStore.from_params(
        database="meaningfully",
        host="localhost",
        # password=url.password,
        port=5432,
        user="jeremybmerrill",
        table_name=project_name,
        embed_dim=model_dims[MODEL_NAME],  # openai embedding dimension
        hnsw_kwargs={
            "hnsw_m": 16,
            "hnsw_ef_construction": 64,
            "hnsw_ef_search": 40,
            "hnsw_dist_method": "vector_cosine_ops",
        },
    )

In [5]:
import os
from dotenv import load_dotenv
load_dotenv()

# from llama_index.core import Settings
from llama_index.embeddings.openai import OpenAIEmbedding

MODEL_NAME = "text-embedding-3-small"
# Settings.embed_model = OpenAIEmbedding(model=MODEL_NAME)


index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

In [6]:
from llama_index.core.vector_stores.types import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
)

filters = MetadataFilters(
    filters=[
        #MetadataFilter(key="date", value="February 15", operator=FilterOperator.TEXT_MATCH),
        # MetadataFilter(key="author", value="sven@timescale.com"),
    ],
    # condition="or",
)

retriever = index.as_retriever(
    similarity_top_k=10,
    #filters=filters,
    embed_model = OpenAIEmbedding(model=MODEL_NAME)
)
result_nodes = retriever.retrieve("we fired an executive")

In [7]:
METADATA_COLUMNS_TO_DISPLAY = ["date", "acct", "url", "addr", "notes"]

In [8]:
import pandas as pd
# just for lookin'
result_nodes_list = []
for node_with_score in result_nodes:
    result_node_dict = {"text": node_with_score.node.text.replace("\n", ' '), **node_with_score.node.metadata}
    # TODO add shingles
    result_node_dict["score"] = node_with_score.score
    result_nodes_list.append(result_node_dict)
result_nodes_df = pd.DataFrame(result_nodes_list)
with pd.option_context('display.max_colwidth', 500):
    display(result_nodes_df[["text"] + [c for c in METADATA_COLUMNS_TO_DISPLAY if c in result_nodes_df.columns] + ["score"]])

Unnamed: 0,text,score
0,"elected Mark Weinberger as a director, effective January 16, 2024, and the Board of Directors appointed him as a member of the Audit Committee.",0.306379
1,There are no modifications to Mr. Okray’s compensation arrangements in connection with his departure. He will not receive severance and will forfeit all equity that has not vested as of his termination date.,0.293827
2,"Mr. Okray expressed full support for the Registrant's financial practices and policies, emphasizing that the mutual decision is not the result of any disagreements he has with the Registrant concerning financial reporting. There are no modifications to Mr. Okray’s compensation arrangements in connection with his departure.",0.293604
3,"Mr. Okray expressed full support for the Registrant's financial practices and policies, emphasizing that the mutual decision is not the result of any disagreements he has with the Registrant concerning financial reporting. There are no modifications to Mr. Okray’s compensation arrangements in connection with his departure. He will not receive severance and will forfeit all equity that has not vested as of his termination date.",0.293432
4,"On January 11, 2024 , Eaton Corporation (the “Company”) and Thomas B. Okray, Executive Vice President and Chief Financial Officer mutually agreed that effective February 2, 2024, Mr. Okray’s employment with the Company will end. Mr. Okray expressed full support for the Registrant's financial practices and policies, emphasizing that the mutual decision is not the result of any disagreements he has with the Registrant concerning financial reporting. There are no modifica...",0.292352
5,"On January 11, 2024 , Eaton Corporation (the “Company”) and Thomas B. Okray, Executive Vice President and Chief Financial Officer mutually agreed that effective February 2, 2024, Mr. Okray’s employment with the Company will end.",0.289938
6,"Prior to this role, he served as Senior Managing Director, East Region, and Managing Director of the Northeast, Mid-Atlantic and Southeast Regions, respectively. Mr. Roth has also held various other leadership positions at the Company, including Senior Vice President and Senior Market Officer for the Mid-Atlantic and Northeast portfolio, and Vice President and Regional Officer.",0.280205
7,"This action includes headcount reductions across our Selling, General and Administrative (“SG&A”) cost base and capacity realignment (the “2024 Restructuring Plan”).",0.279227
8,"This action includes headcount reductions across our Selling, General and Administrative (“SG&A”) cost base and capacity realignment (the “2024 Restructuring Plan”).",0.279227
9,"Mr. Roth, age 48, has been the Company’s Executive Vice President, National Operations and East Region President since January of 2023. Prior to this role, he served as Senior Managing Director, East Region, and Managing Director of the Northeast, Mid-Atlantic and Southeast Regions, respectively. Mr. Roth has also held various other leadership positions at the Company, including Senior Vice President and Senior Market Officer for the Mid-Atlantic and Northeast portfolio, and Vice President a...",0.278368


In [9]:
# mimicing asking for more results
# TODO: Figure out how to not re-embed the query
retriever.similarity_top_k=30
result_nodes = retriever.retrieve("we fired an executive and he isn't getting paid")

In [10]:
import pandas as pd
# just for lookin'
result_nodes_list = []
for node_with_score in result_nodes:
    result_node_dict = {"text": node_with_score.node.text.replace("\n", ' '), **node_with_score.node.metadata}
    # TODO add shingles
    result_node_dict["score"] = node_with_score.score
    result_nodes_list.append(result_node_dict)
result_nodes_df = pd.DataFrame(result_nodes_list)
with pd.option_context('display.max_colwidth', 500):
    display(result_nodes_df[["text"] + [c for c in METADATA_COLUMNS_TO_DISPLAY if c in result_nodes_df.columns] + ["score"]])

Unnamed: 0,text,score
0,He will not receive severance and will forfeit all equity that has not vested as of his termination date.,0.33646
1,There are no modifications to Mr. Okray’s compensation arrangements in connection with his departure. He will not receive severance and will forfeit all equity that has not vested as of his termination date.,0.30311
2,"Mr. Okray expressed full support for the Registrant's financial practices and policies, emphasizing that the mutual decision is not the result of any disagreements he has with the Registrant concerning financial reporting. There are no modifications to Mr. Okray’s compensation arrangements in connection with his departure. He will not receive severance and will forfeit all equity that has not vested as of his termination date.",0.30229
3,"If the Employment Agreement is terminated in certain circumstances, such as by the Company without Cause, by the Company following a Change in Control, or by Dr. McManus for Good Reason (each such capitalized term as defined in the Employment Agreement), the Company will be required to pay severance to Dr. McManus in an amount equal to one year of his then-current base salary, a prorated portion of annual cash incentive compensation, and health insurance coverage for one year.",0.300237
4,"The Employment Agreement may be terminated at any time by either party upon written notice. If the Employment Agreement is terminated in certain circumstances, such as by the Company without Cause, by the Company following a Change in Control, or by Dr. McManus for Good Reason (each such capitalized term as defined in the Employment Agreement), the Company will be required to pay severance to Dr. McManus in an amount equal to one year of his then-current base salary, a prorated portion of an...",0.2987
5,Any severance paid to Dr. McManus will be paid in exchange for Dr. McManus’s release of claims against the Company.,0.296919
6,"Mr. Okray expressed full support for the Registrant's financial practices and policies, emphasizing that the mutual decision is not the result of any disagreements he has with the Registrant concerning financial reporting. There are no modifications to Mr. Okray’s compensation arrangements in connection with his departure.",0.295519
7,"On January 11, 2024 , Eaton Corporation (the “Company”) and Thomas B. Okray, Executive Vice President and Chief Financial Officer mutually agreed that effective February 2, 2024, Mr. Okray’s employment with the Company will end. Mr. Okray expressed full support for the Registrant's financial practices and policies, emphasizing that the mutual decision is not the result of any disagreements he has with the Registrant concerning financial reporting. There are no modifica...",0.290595
8,There are no modifications to Mr. Okray’s compensation arrangements in connection with his departure.,0.287581
9,"The Employment Agreement provides for an annual base salary of \$550,000, which will be annualized for the portion of the year actually served. For future fiscal years, Dr. McManus’s compensation will be subject to annual review by the Compensation Committee of the Company’s Board of Directors.",0.282062
