# Index Compression Comparison

Eko Julianto Salim - 1906350925

---
## Setup

In [1]:
from pathlib import Path

from bsbi import BSBIIndex
from compression import VBEPostings, StandardPostings, BICPostings

In [2]:
# Disable tqdm to reduce clutter
from tqdm import tqdm
from functools import partialmethod

tqdm.__init__ = partialmethod(tqdm.__init__, disable=True)

## Benchmark
### Index Creation Speed

In [5]:
def create_index(postings_encoding):
	instance = BSBIIndex(data_dir="collection", output_dir="index", postings_encoding=postings_encoding)
	instance.index()

In [10]:
%timeit -r 30 -n 10 -p 5 create_index(StandardPostings)

317.02 ms ± 59.514 ms per loop (mean ± std. dev. of 30 runs, 10 loops each)


In [11]:
%timeit -r 30 -n 10 -p 5 create_index(VBEPostings)

310.38 ms ± 772.69 µs per loop (mean ± std. dev. of 30 runs, 10 loops each)


In [12]:
%timeit -r 30 -n 10 -p 5 create_index(BICPostings)

621.69 ms ± 1.4299 ms per loop (mean ± std. dev. of 30 runs, 10 loops each)


### Index Size

In [13]:
INDEX_FOLDER_PATH = Path("./index")
def get_index_size_statistics():
	index_path = INDEX_FOLDER_PATH / "main_index.index"
	print(f"Index size: {index_path.stat().st_size / 1024} KiB")
	index_path = INDEX_FOLDER_PATH / "main_index.dict"
	print(f"Metadata size: {index_path.stat().st_size / 1024} KiB")
	intermediate_indexes = INDEX_FOLDER_PATH.glob("intermediate_*.index")
	print(f"Intermediate index size: {sum([i.stat().st_size for i in intermediate_indexes]) / 1024} KiB")

In [14]:
create_index(StandardPostings)
print("StandardPostings")
print("="*20)
get_index_size_statistics()

StandardPostings
Index size: 362.1640625 KiB
Metadata size: 75.0439453125 KiB
Intermediate index size: 362.1640625 KiB


In [15]:
create_index(VBEPostings)
print("VBEPostings")
print("="*20)
get_index_size_statistics()

VBEPostings
Index size: 51.2978515625 KiB
Metadata size: 66.0390625 KiB
Intermediate index size: 60.40625 KiB


In [16]:
create_index(BICPostings)
print("BICPostings")
print("="*20)
get_index_size_statistics()

BICPostings
Index size: 37.5078125 KiB
Metadata size: 66.013671875 KiB
Intermediate index size: 56.099609375 KiB
