In [4]:
import numpy as np
import rtree
from rtree import index

In [None]:
# - dataset: numpy array
def DatasetGenerator(dataset):
    for i in range(len(dataset)):
        yield(i, (dataset[i,0], dataset[i,1], dataset[i,0], dataset[i,1]), dataset[i])
    return

# Generate Rtree MBRs using Bulk Loading (STR version)
# Hilbert R-tree: An improved R-tree using fractals. VLDB 1994
# STR: A Simple and Efficient Algorithm for R-Tree Packing. ICDE 1997
def GenerateRtreePartitionUsingBulkLoading(dataset, maximum_capacity = 10000):
    
    p = index.Property()
    p.leaf_capacity = maximum_capacity # cannot be less than 100, indicate the maximum capacity
    #p.fill_factor = 0.9
    p.fill_factor = 0.5
    p.overwrite = True

    idx = index.Index(DatasetGenerator(dataset), properties = p)
    
    leaves = idx.leaves()
    dims = int(len(leaves[0][2])/2)
    
    MBRs = []
    for i in range(len(leaves)):
        mbr = leaves[i][2]
        interleaved_mbr = []
        for j in range(dims):
            interleaved_mbr.append(mbr[j])
            interleaved_mbr.append(mbr[j+dims])
        interleaved_mbr.append(len(leaves[i][1]))
        MBRs.append(interleaved_mbr)
                   
    MBRs = np.array(MBRs) 
    return MBRs

In [None]:
# # debug
# p = index.Property()
# p.leaf_capacity = 10000
# p.fill_factor = 0.5
# p.overwrite = True

# # idx = index.Index(properties = p)
# idx = index.Index(DatasetGenerator(dataset),properties = p)

# leaves = idx.leaves()
# print(len(leaves)) # 171400
# print(len(leaves[0][1])) # 70

In [None]:
# reform the partition representation of RTree's MBR into kdnodes
# MBR[i, ...]: each line: L1. U1, L2, U2,...,Ln Un, Counts
# kdnodes[i][0/1][k][0/1]: i partitions, 0/1 boundary/count  k dimension  0/1 min/max
#
def PartitionFormalization(RTreePartition):
    
    dims = int(len(RTreePartition[0])/2)
    kdnodes = []
    for i in range(len(RTreePartition)):
        domains = []
        for k in range(dims):
            domains.append([ RTreePartition[i,2*k], RTreePartition[i,2*k+1] ])     
        kdnodes.append([domains, RTreePartition[i,-1]])
    return kdnodes

# rtree_partitions = genfromtxt('C:/Users/Cloud/iCloudDrive/HUAWEI_LKD/Dataset/Legacy/data/temp/RTreePartitions_M5K_Dim12.csv', delimiter=',')
# rtree_kdnodes = PartitionFormalization(rtree_partitions)

In [5]:
# Test Rtree Bulk Loading

tiny_kdnodes = [ [[[0,10],[0,4]],13], [[[0,10],[4,10]],14] ]

def DatasetGenerator(kdnodes):
    for i in range(len(kdnodes)):
        lower = [domain[0] for domain in kdnodes[i][0]]
        upper = [domain[1] for domain in kdnodes[i][0]]
        border = tuple(lower + upper) # non interleave
        yield(i, border, kdnodes[i][1])
    return

p = index.Property()
p.leaf_capacity = 100 # cannot be less than 100, indicate the maximum capacity
p.fill_factor = 0.5
p.overwrite = True

idx = index.Index(DatasetGenerator(tiny_kdnodes), properties = p)

In [9]:
print(idx.leaves())
print(list(idx.intersection((1.0, 1.0, 2.0, 2.0))))
print(list(idx.intersection((1.0, 5.0, 2.0, 7.0))))

[(0, [0, 1], [0.0, 0.0, 10.0, 10.0])]
[0]
[1]


In [10]:
import copy
copy_index = copy.deepcopy(idx)

In [11]:
print(list(idx.intersection((1.0, 5.0, 2.0, 7.0))))

[1]


In [13]:
copy_index.delete(1, (0,4,10,10))

In [15]:
print(list(idx.intersection((1.0, 5.0, 2.0, 7.0))))
print(list(copy_index.intersection((1.0, 5.0, 2.0, 7.0))))

[1]
[]
