# A gentle (5-min) how-to of Seismic.

## This Jupyter notebook is a gentle overview on how to use Seismic.

## For questions, feel free to open a GitHub issue.

In [None]:
import json
import numpy as np

from seismic import SeismicIndex

In [None]:
# if you need to install 
#!pip install ir_datasets ir_measures

import ir_datasets
import ir_measures

from ir_measures import nDCG

# Building the index on the document collection

In [None]:
# add the path to your collection below!
json_input_file = ""

index = SeismicIndex.build(json_input_file)

In [None]:
print("Number of documents: ", index.len)
print("Avg number of non-zero components: ", index.nnz / index.len)
print("Dimensionality of the vectors: ", index.dim)

index.print_space_usage_byte()

# Load queries

In [None]:
# add the path to your query file below!
queries_path = ""

queries = []
with open(queries_path, 'r') as f:
    for line in f:
        queries.append(json.loads(line))

MAX_TOKEN_LEN = 30
string_type  = f'U{MAX_TOKEN_LEN}'

queries_ids = np.ascontiguousarray(np.array([q['id'] for q in queries], dtype=string_type))

query_components = []
query_values = []

for query in queries:
    vector = query['vector']
    query_components.append(np.array(list(vector.keys()), dtype=string_type))
    query_values.append(np.array(list(vector.values()), dtype=np.float32))

# Perform the search on the index

In [None]:
results = index.batch_search(
    queries_ids=queries_ids,
    query_components=query_components,
    query_values=query_values,
    k=10,
    query_cut=10,
    heap_factor=0.7,
    # sorted=True,
    # n_knn=0,
)

# Evaluation

In [None]:
# add your ir_dataset dataset string id below, e.g., "beir/quora/test"
ir_dataset_string = ""

metric_name = "nDCG@10" # on BEIR/quora
ir_measure = ir_measures.parse_measure(metric_name)
ir_results = [ir_measures.ScoredDoc(query_id, doc_id, score) for r in results for (query_id, score, doc_id) in r]

qrels = ir_datasets.load(ir_dataset_string).qrels
ir_measures.calc_aggregate([ir_measure], qrels, ir_results)