In [1]:
from llama_index.core import SimpleDirectoryReader, StorageContext, load_index_from_storage
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import VectorStoreIndex

from qdrant_client import QdrantClient
from llama_index.vector_stores.qdrant import QdrantVectorStore
from index import extract_paper_metadata

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
client = QdrantClient(path="./contents/qdrant_storage")
if client.collection_exists("llama2_bm42"):
    client.delete_collection("llama2_bm42")

In [5]:
embed_model = OpenAIEmbedding(model="text-embedding-3-small")
reader = SimpleDirectoryReader(
    input_dir="./contents/files/",
    file_metadata=extract_paper_metadata,
)

docs = reader.load_data()
print(f"Count of Techcrunch articles: {len(docs)}")
print(docs[0])

client = QdrantClient(path="./contents/qdrant_storage")
vector_store = QdrantVectorStore(
    collection_name="llama2_bm42",
    client=client,
    fastembed_sparse_model="Qdrant/bm42-all-minilm-l6-v2-attentions",
)

Count of Techcrunch articles: 129
Doc ID: dde26d00-fc1f-4e7e-b251-8a6a50b56a95
Text: Published as a conference paper at ICLR 2021 AN IMAGE IS WORTH
16X16 W ORDS : TRANSFORMERS FOR IMAGE RECOGNITION AT SCALE Alexey
Dosovitskiy∗,†, Lucas Beyer∗, Alexander Kolesnikov∗, Dirk
Weissenborn∗, Xiaohua Zhai∗, Thomas Unterthiner, Mostafa Dehghani,
Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil
Houlsby∗,† ∗equal tech...


Fetching 6 files: 100%|██████████| 6/6 [00:01<00:00,  5.51it/s]


In [6]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)
embed_model = OpenAIEmbedding(model="text-embedding-3-small")
index = VectorStoreIndex.from_documents(
    docs,
    # our dense embedding model
    embed_model=embed_model,
    storage_context=storage_context,
)

  self._client.create_payload_index(


## Testing

In [2]:
client = QdrantClient(path="./contents/qdrant_storage")
vector_store = QdrantVectorStore(
    collection_name="llama2_bm42",
    client=client,
    fastembed_sparse_model="Qdrant/bm42-all-minilm-l6-v2-attentions",
)
# storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    embed_model = OpenAIEmbedding(model="text-embedding-3-small")
)
retriever = index.as_retriever(similarity_top_k=5)

In [3]:
nodes = retriever.retrieve("What is Audio spectrogram transformer?")

In [4]:
for node in nodes:
    print(node.get_content())
    print("\n\n")

2. Audio Spectrogram Transformer
2.1. Model Architecture
Figure 1 illustrates the proposed Audio Spectrogram Trans-
former (AST) architecture. First, the input audio waveform of t
seconds is converted into a sequence of 128-dimensional log
Mel ﬁlterbank (fbank) features computed with a 25ms Ham-
ming window every 10ms. This results in a128 ×100t spectro-
gram as input to the AST. We then split the spectrogram into a
sequence of N 16×16 patches with an overlap of 6 in both time
and frequency dimension, where N = 12⌈(100t −16)/10⌉is
the number of patches and the effective input sequence length
for the Transformer. We ﬂatten each16×16 patch to a 1D patch
embedding of size 768 using a linear projection layer. We re-
fer to this linear projection layer as the patch embedding layer.
Since the Transformer architecture does not capture the input
order information and the patch sequence is also not in tem-
poral order, we add a trainable positional embedding (also of
size 768) to each patch emb