In [None]:
! pip install --upgrade google-cloud-storage --quiet
! pip install langchain_community --quiet
! pip install langchain_huggingface --quiet

In [2]:
'''
import os
from langchain.vectorstores.chroma import Chroma
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

BUCKET_NAME = "bucket_arxiv_researcher"
GCS_PERSIST_PATH = "chroma_db/"
LOCAL_PERSIST_PATH = "./local_chromadb/"
'''
import os
from google.cloud import storage
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

BUCKET_NAME = "arxiv-researcher-bucket"
GCS_PERSIST_PATH = "chroma_db/"
LOCAL_PERSIST_PATH = "./local_chromadb/"

# Initialize GCS client
storage_client = storage.Client()

def download_directory_from_gcs(gcs_directory, local_directory, bucket_name):
    """Download all files from a GCS directory to a local directory."""
    bucket = storage_client.bucket(bucket_name)
    blobs = bucket.list_blobs(prefix=gcs_directory)

    for blob in blobs:
        if not blob.name.endswith("/"):  # Avoid directory blobs
            relative_path = os.path.relpath(blob.name, gcs_directory)
            local_file_path = os.path.join(local_directory, relative_path)
            os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
            blob.download_to_filename(local_file_path)
            print(f"Downloaded {blob.name} to {local_file_path}")

# Download Chroma persisted data from GCS to local directory
download_directory_from_gcs(GCS_PERSIST_PATH, LOCAL_PERSIST_PATH, BUCKET_NAME)

Downloaded chroma_db/4d8e52f2-6027-41ef-b06b-77f96cb894fa/data_level0.bin to ./local_chromadb/4d8e52f2-6027-41ef-b06b-77f96cb894fa/data_level0.bin
Downloaded chroma_db/4d8e52f2-6027-41ef-b06b-77f96cb894fa/header.bin to ./local_chromadb/4d8e52f2-6027-41ef-b06b-77f96cb894fa/header.bin
Downloaded chroma_db/4d8e52f2-6027-41ef-b06b-77f96cb894fa/index_metadata.pickle to ./local_chromadb/4d8e52f2-6027-41ef-b06b-77f96cb894fa/index_metadata.pickle
Downloaded chroma_db/4d8e52f2-6027-41ef-b06b-77f96cb894fa/length.bin to ./local_chromadb/4d8e52f2-6027-41ef-b06b-77f96cb894fa/length.bin
Downloaded chroma_db/4d8e52f2-6027-41ef-b06b-77f96cb894fa/link_lists.bin to ./local_chromadb/4d8e52f2-6027-41ef-b06b-77f96cb894fa/link_lists.bin
Downloaded chroma_db/chroma.sqlite3 to ./local_chromadb/chroma.sqlite3


In [3]:
from langchain_huggingface import HuggingFaceEmbeddings

EMBEDDING_MODEL = 'sentence-transformers/all-mpnet-base-v2'
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)

# Load the stored vector database
db = Chroma(persist_directory=LOCAL_PERSIST_PATH, embedding_function=embeddings)

# Now use db for retrieval
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})

  from .autonotebook import tqdm as notebook_tqdm
  db = Chroma(persist_directory=LOCAL_PERSIST_PATH, embedding_function=embeddings)


In [5]:
query = "Neural networks for image recognition"
retrieved_docs = retriever.get_relevant_documents(query)

i = 1
for doc in retrieved_docs:
    print(i)
    print(doc.page_content)
    print(doc.metadata)
    print("\n")
    i += 1

  retrieved_docs = retriever.get_relevant_documents(query)


1
comparing robustness of pairwise and multiclass neuralnetwork systems
  for face recognition   noise corruptions and variations in face images can seriously hurt the
performance of face recognition systems to make such systems robust
multiclass neuralnetwork classifiers capable of learning from noisy data have
been suggested however on large face data sets such systems cannot provide the
robustness at a high level in this paper we explore a pairwise neuralnetwork
system as an alternative approach to improving the robustness of face
recognition in our experiments this approach is shown to outperform the
multiclass neuralnetwork system in terms of the predictive accuracy on the
face images corrupted by noise

{'id': '0704.3515', 'year': '2007'}


2
the parameterless selforganizing map algorithm   the parameterless selforganizing map plsom is a new neural network
algorithm based on the selforganizing map som it eliminates the need for a
learning rate and annealing schemes for learning r