In [1]:
import import_ipynb
import numpy as np
from rtree import index
import math
import time
import copy
import sys
from collections import defaultdict

In [30]:
class MyHist:
    '''
    My histogram class, for ease of maintenance
    '''
    
    def __init__(self, dataset, buckets, domains, is_query_hist, hist_type, dim_probs=[], query_threshold_up = 2, query_threshold_low = 2):
        ''' Create numpy format histogram for queryset or dataset.
        @param dataset        numpy format, could be dataset (point) or queryset (hyper rectangles) depends on is_query_hist
        @param buckets[..]    the number of buckets in each dimension
        @param hist_type      the type of hist, 0: normal (1*d-dimensional histgramdd) 1: independent (d*1-dimensional histogram)
        @param dim_probs[..]  the probability of queries in each dimension
        '''
        self.buckets = buckets
        self.domains = [tuple(domains[i]) for i in range(len(domains))]
        self.dims = len(buckets)
        self.is_query_hist = is_query_hist
        self.hist_type = hist_type
        self.query_threshold_up = query_threshold_up
        self.query_threshold_low = query_threshold_low 
        
        self.dim_steps = [(domains[k][1] - domains[k][0]) / buckets[k] for k in range(self.dims)]
        self.records_amount = len(dataset)
        self.dim_probs = dim_probs
        self.query_threshold_up_each_dims = []
        self.query_threshold_low_each_dims = []
        if len(dim_probs) != 0:
            for k in range(self.dims):
                self.query_threshold_up_each_dims.append(self.records_amount * (1-dim_probs[k]) + query_threshold_up)
                self.query_threshold_low_each_dims.append(self.records_amount * (1-dim_probs[k]) + query_threshold_low)
        
        if is_query_hist == False:
            if hist_type == 0:
                self.hist, edges = np.histogramdd(dataset, bins = tuple(buckets), range = self.domains)
            elif hist_type == 1:
                self.hist = []
                for d in range(self.dims):
                    self.hist.append(np.histogram(dataset[:,d], bins = buckets[d], range = self.domains[d]))
        else:
            queryset = dataset # dataset is actually queryset
            if hist_type == 0:
                self.hist = np.full(tuple(buckets), 0) # used to store count, cannot use full for set() or it will be 'reference'
                self.qid_hist = np.ndarray(tuple(buckets),dtype=object) # an extra hist to store qid will be created
                for index, value in np.ndenumerate(self.qid_hist):
                    self.qid_hist[index] = set()
                     
            elif hist_type == 1:
                self.hist = []
                self.qid_hist = []
                for d in range(self.dims):
                    self.hist.append(np.array([0]*buckets[d]))
                    self.qid_hist.append(np.array([set() for i in range(buckets[d])])) # this is different, note [set()] * n is reference!
                
            # insert query into thses histograms
            for i in range(len(queryset)):
                self.InserQueryIntoHist(i, queryset[i])
            
    
    def create_accu_hist(self, hist=None):
        ''' Create accumulative histogram (for dataset).
        @param hist    if this argument is given, apply accu on this histogram, should be numpy format
        '''
        if self.hist_type == 1:
            print("Fatal Error: ","cannot apply accu hist on independent histogram!", file=sys.stderr)
            return None
        
        if hist != None:
            self.accu_hist = copy.deepcopy(hist)
        else:
            self.accu_hist = copy.deepcopy(self.hist)
            
        for k in reversed(range(len(self.accu_hist.shape))): # for each dimension
            for i in range(1,self.accu_hist.shape[k]):
                # https://stackoverflow.com/questions/42817508/get-the-i-th-slice-of-the-k-th-dimension-in-a-numpy-array
                self.accu_hist[(slice(None),) * k + (i,)] += self.accu_hist[(slice(None),) * k + (i-1,)] 
                    
        return self.accu_hist
    
    
    def query_accu_hist(self, query):
        ''' Query accu histogram.
        @param query    
        @param domain    
        '''
        if self.hist_type == 1:
            print("Fatal Error: ","cannot apply accu query on independent histogram!", file=sys.stderr)
            return None

        # notice there should be -1 here for min
        query_index_min = [int((query[k][0] - self.domains[k][0]) / self.dim_steps[k])-1 for k in range(self.dims)] 
        query_index_max = [int((query[k][1] - self.domains[k][0]) / self.dim_steps[k]) for k in range(self.dims)]
        query_index_min = [min(query_index_min[k], self.accu_hist.shape[k]-1) for k in range(self.dims)]
        query_index_max = [min(query_index_max[k], self.accu_hist.shape[k]-1) for k in range(self.dims)]
        
        approximate_result = 0
        for value in range(0, 2**self.dims):

            ignore_flag = False # to skip those 0 accumulation point

            # used to denotes the upper and lower end
            query_expr = bin(value).split('b')[1]
            query_expr = '0'*(self.dims - len(query_expr)) + query_expr # add 0s at the front to make the dimensions consistent
            low_count = query_expr.count('0')

            # find the corresponding sign
            sign = 1
            if low_count % 2 != 0:
                sign = -1

            # find corresponding index of the prefix sum array
            indexes = ()
            for k in range(self.dims):
                if query_expr[self.dims-k-1] == '0':
                    if query_index_min[k] < 0:
                        ignore_flag = True
                        break # this accumulation is 0
                    indexes += (slice(query_index_min[k], query_index_min[k]+1, 1),) # the 1 here is stride
                else:
                    indexes += (slice(query_index_max[k], query_index_max[k]+1, 1),)

            # calculate the result for this one
            if not ignore_flag:
                approximate_result += sign * self.accu_hist[indexes]

        return float(approximate_result)
    
    
    def InserQueryIntoHist(self, qid, query, return_new_region = False):
        ''' Insert a single query to histogram. The histogram could be type 0 or type 1, used to maintain the count.
        @param qid          the query id, should be unique
        @param query        the query itself
        @param threshold    the threshold for query to be regarded as distributive queries
        '''
        if self.is_query_hist == False:
            print("Fatal Error: ","cannot apply query insert on non-queryset histogram!", file=sys.stderr)
            return None
        
        query_index_min = [int((query[k][0] - self.domains[k][0]) / self.dim_steps[k]) for k in range(self.dims)]
        # because we use range selection in the following, we need to add 1 to include the original index
        query_index_max = [int((query[k][1] - self.domains[k][0]) / self.dim_steps[k]) + 1 for k in range(self.dims)]
        query_index_min = [min(query_index_min[k], self.buckets[k]-1) for k in range(self.dims) ]
        # note that because we only use the max side for range (exclusive), we could allow it greater than the maximum index
        query_index_max = [min(query_index_max[k], self.buckets[k]) for k in range(self.dims) ]
        
        # https://stackoverflow.com/questions/35215161/most-efficient-way-to-map-function-over-numpy-array
        set_add = lambda t: t.add(qid)
        vfunc = np.vectorize(set_add) 
        
        new_distribution_regions = None
        
        if self.hist_type == 0:
            
            # find the overlap indexes
            indexes = ()
            for k in range(len(query_index_min)):
                indexes += (slice(query_index_min[k], query_index_max[k], 1),) # the 1 here is stride
            
            self.hist[indexes] += 1 # the query hist for count 
            vfunc(self.qid_hist[indexes]) # add the qid in query hist for qids
  
            if return_new_region:
                # whether a region just become distribution query region
                new_distribution_regions = np.argwhere(self.hist == self.query_threshold_up)
                new_distribution_regions = [tuple(new_distribution_regions[i]) for i in range(len(new_distribution_regions))]
            return new_distribution_regions 
        
        elif self.hist_type == 1:
            
            for k in range(self.dims):
                #print(k, query_index_min[k], query_index_max[k])
                if query_index_max[k] == query_index_min[k]:
                    query_index_max[k] += 1
                self.hist[k][query_index_min[k]:query_index_max[k]] += 1
                try:
                    vfunc(self.qid_hist[k][(slice(query_index_min[k], query_index_max[k], 1),)])
                except:
                    print('query_index_min[k]:',query_index_min[k],'query_index_max[k]:',query_index_max[k])
                    print('(slice(query_index_min[k], query_index_max[k], 1),):',(slice(query_index_min[k], query_index_max[k], 1),))
            
            if return_new_region:
                new_distribution_regions = [np.argwhere(self.hist[k] == self.query_threshold_up) for k in range(self.dims)]
                new_distribution_regions = [[tuple(new_distribution_regions[k][i]) for i in range(len(new_distribution_regions[k]))] \
                                            for k in range(self.dims)]
            return new_distribution_regions 
        
    def RemoveQueryFromHist(self, qid, query, return_new_region = False):
        pass
        
    def filter_distribution_query(self):
        '''
        @return distribution query ids (note only ids are returned)
        '''
        if self.is_query_hist == False:
            print("Fatal Error: ","cannot apply query classification on non-queryset histogram!", file=sys.stderr)
            return None
        
        if self.hist_type == 0:
            qid_greater_threshold = defaultdict(int) # [qid] -> Count (appear in bins greater than threshold) dict in this dimension
            qid_all = defaultdict(int) # [qid] -> Count (appear in bins) dictionary in this dimension
            to_check_regions = np.argwhere(self.hist > 0)
            
            for suffix in to_check_regions:
                suffix = tuple(suffix)
                qids = self.qid_hist[suffix]
                if self.hist[suffix] >= self.query_threshold_up:
                    for qid in qids:
                        qid_greater_threshold[qid] += 1
                        qid_all[qid] += 1
                else:
                    for qid in qids:
                        qid_all[qid] += 1
            
            filtered_qids = []
            for qid in qid_greater_threshold:
                if qid_greater_threshold[qid] / qid_all[qid] >= 0.5**(1.0/float(self.dims)):
                    filtered_qids.append(qid)
            return filtered_qids
        
        elif self.hist_type == 1:
            # first filter distribution query in each seperate dimension
            distributive_query_qids_each_dims = [set() for k in range(self.dims)]
            for k in range(self.dims):
                qid_greater_threshold = defaultdict(int) # [qid] -> Count (appear in bins greater than threshold) dict in this dimension
                qid_all = defaultdict(int) # [qid] -> Count (appear in bins) dictionary in this dimension
                for i in range(self.buckets[k]):
                    qids = self.qid_hist[k][i]
                    #if len(qids) >= self.query_threshold_up: 
                    if len(qids) >= self.query_threshold_up_each_dims[k]:
                        for qid in qids:
                            qid_greater_threshold[qid] += 1
                            qid_all[qid] += 1
                    elif len(qids) > 0:
                        for qid in qids:
                            qid_all[qid] += 1
                            
                for qid in qid_greater_threshold:
                    if qid_greater_threshold[qid] / qid_all[qid] >= 0.5**(1.0/float(self.dims)):
                        distributive_query_qids_each_dims[k].add(qid)
            
            filtered_qids = set.intersection(*distributive_query_qids_each_dims) # expand the arguments with *
            return list(filtered_qids)

In [3]:
# # - - - Test Case 1 - - -
# # Target: basic functionality
# # Result: Pass
# dataset = [[0,0],[0,2],[2,0],[2,0],[2,2],[2,2],[2,2]]
# queryset = [[[0,1],[0,1]]]
# buckets = [2, 2]
# domains = [[0, 2], [0, 2]]
# is_query_hist = True 
# hist_type = 1
# query_hist = MyHist(queryset, buckets, domains, is_query_hist, hist_type)
# print(query_hist.hist) # should be [array([1, 0]), array([1, 0])]
# print(query_hist.qid_hist) # [array([{0}, set()], dtype=object), array([{0}, set()], dtype=object)]

# # - - - Test Case 2 - - -
# # Target: perform identical to existing methods when hist_type = 0
# # Result: Pass
# # ... generate queryset here
# queryset = training_set
# buckets = [300, 300, 300]
# is_query_hist = True 
# hist_type = 0
# query_hist = MyHist(queryset, buckets, domains, is_query_hist, hist_type)
# disqids = query_hist.filter_distribution_query()
# disqids.sort()
# print(disqids) # should contains the distribution queries in training set

# # - - - Test Case 3 - - -
# # Target: 4 dimensional distribution query classification using hist_type 1
# # Result: 
# # ... generate queryset here
# queryset = training_set
# buckets = [300, 300, 300, 300]
# is_query_hist = True 
# hist_type = 1
# query_hist = MyHist(queryset, buckets, domains, is_query_hist, hist_type)
# disqids = query_hist.filter_distribution_query()
# disqids.sort()
# print(disqids) # should contains the distribution queries in training set

In [4]:
class PartitionAlgorithm:
    '''
    all methods in this class are static and perform recursive calls to themselves
    '''
    @staticmethod
    def skip_gain_for_split(original_node, child_node1, child_node2, queryset, idx):
        # original node
        lower0 = [domain[0] for domain in original_node[0]]
        upper0 = [domain[1] for domain in original_node[0]]
#         for i in range(len(lower0)):
#             if lower0[i] > upper0[i]:
#                 print("found illegal! lower: ", lower0," upper: ",upper0)
        border0 = tuple(lower0 + upper0)
        overlapped_query_id0 = list(idx.intersection(border0))
        original_overlap = original_node[1] * len(overlapped_query_id0)

        # child node 1
        lower1 = [domain[0] for domain in child_node1[0]]
        upper1 = [domain[1] for domain in child_node1[0]]
        border1 = tuple(lower1 + upper1)
        overlapped_query_id1 = list(idx.intersection(border1))
        child_node1_overlap = child_node1[1] * len(overlapped_query_id1)

        # child node 2
        lower2 = [domain[0] for domain in child_node2[0]]
        upper2 = [domain[1] for domain in child_node2[0]]
        border2 = tuple(lower2 + upper2)
        overlapped_query_id2 = list(idx.intersection(border2))
        child_node2_overlap = child_node2[1] * len(overlapped_query_id2)

        skip_gain = original_overlap - child_node1_overlap - child_node2_overlap
        return skip_gain

    @staticmethod
    def perform_split(i, kdnodes, max_skip_dim, max_skip_value, temp_dataset, dataset_dict, kdnode_dict, accu_count):
        sub_domains1 = np.copy(kdnodes[i][0])
        if kdnodes[i][0][max_skip_dim][0] < kdnodes[i][0][max_skip_dim][1]:
            sub_domains1[max_skip_dim][1] = max_skip_value - min(0.000001, 0.01 * (kdnodes[i][0][max_skip_dim][1] - \
                                                                                   kdnodes[i][0][max_skip_dim][0]))
        else:
            sub_domains1[max_skip_dim][1] = max_skip_value
        sub_domains2 = np.copy(kdnodes[i][0])
        #sub_domains2[max_skip_dim][0] = max_skip_value + 0.00001
        sub_domains2[max_skip_dim][0] = max_skip_value

        sub_dataset1 = temp_dataset[temp_dataset[:,max_skip_dim] < max_skip_value]
        sub_dataset2 = temp_dataset[temp_dataset[:,max_skip_dim] >= max_skip_value]
        sub_dataset1_size = len(sub_dataset1)
        sub_dataset2_size = len(sub_dataset2)

        parent_node_id = kdnodes[i][-4]
        sub_kdnodes_1 = [sub_domains1, sub_dataset1_size, accu_count + 1, parent_node_id, -1, -1]
        sub_kdnodes_2 = [sub_domains2, sub_dataset2_size, accu_count + 2, parent_node_id, -1, -1]

        kdnode_dict[parent_node_id][-2] = accu_count + 1
        kdnode_dict[parent_node_id][-1] = accu_count + 2
        #kdnode_dict.update({accu_count + 1: copy.deepcopy(sub_kdnodes_1)}) 
        #kdnode_dict.update({accu_count + 2: copy.deepcopy(sub_kdnodes_2)}) 
        kdnode_dict.update({accu_count + 1: sub_kdnodes_1}) 
        kdnode_dict.update({accu_count + 2: sub_kdnodes_2}) 

        kdnodes[i] = sub_kdnodes_1
        dataset_dict.update({i:sub_dataset1}) # one put in the original place
        kdnodes.append(sub_kdnodes_2)
        dataset_dict.update({len(kdnodes)-1:sub_dataset2}) # the other put in the end


    # this idx is index for query!!!
    @staticmethod
    def try_split(kdnode, candidate_cut, temp_dataset, queryset, idx, min_block_size):
        split_dimension = candidate_cut[0]
        split_value = candidate_cut[1]

        #sub_dataset1 = temp_dataset[temp_dataset[:,split_dimension] <= split_value]  # process time: 0.019
        #sub_dataset2 = temp_dataset[temp_dataset[:,split_dimension] > split_value]
        #sub_dataset1 = temp_dataset  # used to test whether the above operation is too slow, and it is too slow!
        #sub_dataset2 = temp_dataset

        temp_sub_domains1 = np.copy(kdnode[0])
        temp_sub_domains1[split_dimension][1] = split_value
        temp_sub_domains2 = np.copy(kdnode[0])
        temp_sub_domains2[split_dimension][0] = split_value
        
        if temp_sub_domains1[split_dimension][1] < temp_sub_domains1[split_dimension][0] or \
        temp_sub_domains2[split_dimension][0] > temp_sub_domains2[split_dimension][1]:
            return False, 0
        #print("try split dimension: ",split_dimension, " original domain: ", kdnode[0])
        # check if the subnodes greater than threshold
        #sub_dataset1_size = len(sub_dataset1)
        sub_dataset1_size = np.count_nonzero(temp_dataset[:,split_dimension] < split_value) # process time: 0.007
        sub_dataset2_size = kdnode[1] - sub_dataset1_size

        if sub_dataset1_size < min_block_size or sub_dataset2_size < min_block_size:
            return False, 0

        temp_sub_kdnodes_1 = [temp_sub_domains1, sub_dataset1_size]
        temp_sub_kdnodes_2 = [temp_sub_domains2, sub_dataset2_size]

        if temp_sub_domains1[split_dimension][1] < temp_sub_domains1[split_dimension][0] or \
        temp_sub_domains2[split_dimension][0] > temp_sub_domains2[split_dimension][1]:
            return False, 0

        skip_gain = PartitionAlgorithm.skip_gain_for_split(kdnode, temp_sub_kdnodes_1, temp_sub_kdnodes_2, queryset, idx)
        return True, skip_gain

        #count_skip = skip_for_split(queryset, temp_sub_kdnodes_1, idx) + skip_for_split(queryset, temp_sub_kdnodes_2, idx)
        #return True, count_skip


    # this idx is index for query!!!
    @staticmethod
    def try_split_approximate(kdnode, candidate_cut, queryset, idx, min_block_size, accu_hist, domain):
        split_dimension = candidate_cut[0]
        split_value = candidate_cut[1]

        temp_sub_domains1 = np.copy(kdnode[0])
        temp_sub_domains1[split_dimension][1] = split_value - 0.00001
        temp_sub_domains2 = np.copy(kdnode[0])
        temp_sub_domains2[split_dimension][0] = split_value + 0.00001 # this is used to avoid being recognized as intersection

        if temp_sub_domains1[split_dimension][1] < temp_sub_domains1[split_dimension][0] or \
        temp_sub_domains2[split_dimension][0] > temp_sub_domains2[split_dimension][1]:
            return False, 0

        # check if the subnodes greater than threshold
        sub_dataset1_size = accu_hist.query_accu_hist(temp_sub_domains1) # QueryAccuHist(accu_hist, temp_sub_domains1, domain)
        #sub_dataset2_size = QueryAccuHist(accu_hist, temp_sub_domains2, domain)
        sub_dataset2_size = kdnode[1] - sub_dataset1_size

        if sub_dataset1_size < min_block_size or sub_dataset2_size < min_block_size:
            return False, 0

        temp_sub_kdnodes_1 = [temp_sub_domains1, sub_dataset1_size]
        temp_sub_kdnodes_2 = [temp_sub_domains2, sub_dataset2_size]

        # this is incorrect
        #count_skip = skip_for_split(queryset, temp_sub_kdnodes_1, idx) + skip_for_split(queryset, temp_sub_kdnodes_2, idx)

        skip_gain = PartitionAlgorithm.skip_gain_for_split(kdnode, temp_sub_kdnodes_1, temp_sub_kdnodes_2, queryset, idx)

        return True, skip_gain
    
    @staticmethod
    def query_domain_ratio(query, domain):
        space_query = 1
        for i in range(len(query)):
            space_query *= (query[i][1] - query[i][0])

        space_domain = 1
        for i in range(len(domain)):
            space_domain *= (domain[i][1] - domain[i][0])

        return space_query / space_domain

    # this works for one dimension only !!! An implementation of query bounding.
    @staticmethod
    def getoverlap(al, au, bl, bu):
        return max(0, min(au,bu)-max(al,bl))

    # currently not used.
    @staticmethod
    def bounding_union(query_collection):

        # should keep it ordered first by the lower interval !!!!!!
        query_collection = query_collection[query_collection[:,0].argsort()]

        remaining_query = query_collection
        bounded_intervals = []

        while len(remaining_query) != 0:

            initial_interval = [remaining_query[0][0], remaining_query[0][1]]
            temp_interval = []

            for i in range(len(remaining_query)-1):

                overlap = PartitionAlgorithm.getoverlap(initial_interval[0],initial_interval[1],
                                                        remaining_query[i+1][0], remaining_query[i+1][1])
                # there is no overlap
                if overlap == 0:
                    temp_interval.append([remaining_query[i+1][0], remaining_query[i+1][1]])
                else: # update interval border
                    initial_interval[0] = min(initial_interval[0], remaining_query[i+1][0])
                    initial_interval[1] = max(initial_interval[1], remaining_query[i+1][1])

            bounded_intervals.append(initial_interval)
            remaining_query = temp_interval

        return bounded_intervals
    
    
    @staticmethod
    def AKD(dataset, queryset, data_threshold, accu_count_list, kdnode_dict, root_node, current_dim = 0, level = 0, show_step = False):
        '''
        This is a recursive algorithm.
        Parameters:
        @dataset: the dataset will not be changed, in the subsequent call, it's the dataset corresponding to the kdnode
        @queryset: query in this domain
        @root_node: for subsequent call, this is actually the child node itself
        Reture:
        @kdnodes[i][sub_domains, len(sub_dataset1), id, parent_id, left_child_id, right_child_id]
        @is_valid: if any child node is not valid, the parent node will return itself as a whole for further processing
                    not valid here means, further processing will not help produce better partition(no enough space)
        '''

        current_size = len(dataset)
        if current_size <= data_threshold:
            return [root_node], True # here we assume the children nodes are -1 and -1

        domains = root_node[0] # for ease of use in the following
        total_size = len(dataset)

        if len(queryset) == 0:  # query in this domain
            return [root_node], True

        # if the partition with single query is small enough, candiate value: 0.5, 0.25
        if len(queryset) == 1 and PartitionAlgorithm.query_domain_ratio(queryset[0], domains) > 0.25:
            return [root_node], False

        split_distance_each_dim = []
        split_position_each_dim = [] # sorted index value in array
        split_value_each_dim = []

        # for each dimension, we calculated the distance from median to its first non-cross split
        for D in range(len(dataset[0])):

            # median, with fast median algorithm
            median = np.median(dataset[:,D])
            median_low = domains[D][0]
            median_up = domains[D][1]    

            # split position
            split_distance = 0
            split_position = int(total_size / 2)

            query_in_this_dim = queryset[:,D]
            # bound the projected queries in this dimension
            query_bound = PartitionAlgorithm.bounding_union(query_in_this_dim)

            # check if the split position intersect some query boundings in this dim
            for i in range(len(query_bound)):

                # if intersect some query bounds (only possible to intersect one bounded query)
                if median > query_bound[i][0] and median < query_bound[i][1]:

                    # check if the two end already exceeds domain, if yes, split from the middle
                    if query_bound[i][0] <= domains[D][0] and query_bound[i][1] >= domains[D][1]:
                        split_distance = int(total_size / 2)
                        break;
                    # if not exceeds, determine which side is closer to the median
                    else:
                        # for the left side
                        if query_bound[i][0] > domains[D][0]:
                            median_low = query_bound[i][0]         
                        # for the right side
                        if query_bound[i][1] < domains[D][1]:
                            median_up = query_bound[i][1]

                    # if not exceeds then choose the one that is closest from the median (in terms of #records!)
                    number_of_records_from_low_to_median = np.count_nonzero((dataset[:,D]>=median_low) & (dataset[:,D] < median)) # 0.018
                    number_of_records_from_up_to_median = np.count_nonzero((dataset[:,D]<=median_up) & (dataset[:,D] > median))
                    #number_of_records_from_low_to_median = len(dataset[(dataset[:,D]>=median_low) & (dataset[:,D] < median)]) # 0.025
                    #number_of_records_from_up_to_median = len(dataset[(dataset[:,D]<=median_up) & (dataset[:,D] > median)])

                    if number_of_records_from_low_to_median <= number_of_records_from_up_to_median:
                        median = median_low
                        split_distance = number_of_records_from_low_to_median
                    else:
                        median = median_up
                        split_distance = number_of_records_from_up_to_median

            # for each dimension, record its result
            split_distance_each_dim.append(split_distance)
            split_value_each_dim.append(median)

        # aftern calculating the distance from median to its first non-cross split
        split_distance_each_dim = np.asarray(split_distance_each_dim)
        split_dimension = 0
        split_value = 0

        # degradation mechansim (if no valid split position, then using round robin)
        # if the median do not cross any historical query, split round robin to enhance robustness
        if max(split_distance_each_dim) <= 10:
            split_dimension = current_dim + 1
            if split_dimension >= len(domains):
                split_dimension %= len(domains)
            split_value = np.median(dataset[:,split_dimension])
        elif min(split_distance_each_dim) >= int((total_size / 2)-10): # if there is no valid split position
            return [root_node], False
        else:
            split_dimension = np.argmin(split_distance_each_dim)  # get the split dimension
            split_value = split_value_each_dim[split_dimension]

        # split the dataset according to the split position
        sub_dataset1 = dataset[dataset[:,split_dimension] < split_value]
        sub_dataset2 = dataset[dataset[:,split_dimension] >= split_value]

        if len(sub_dataset1) < data_threshold or len(sub_dataset2) < data_threshold:
            return [root_node], True

        # change the domains
        sub_domains1 = np.copy(domains)
        sub_domains1[split_dimension][1] = split_value
        sub_domains2 = np.copy(domains)
        sub_domains2[split_dimension][0] = split_value

        sub_query1 = queryset[queryset[:,split_dimension,0] < split_value]
        sub_query2 = queryset[queryset[:,split_dimension,1] > split_value]

        sub_kdnode_1 = [sub_domains1, len(sub_dataset1), accu_count_list[0] + 1, root_node[-4], -1, -1]
        sub_kdnode_2 = [sub_domains2, len(sub_dataset2), accu_count_list[0] + 2, root_node[-4], -1, -1]

        root_node[-2] = sub_kdnode_1[-4]
        root_node[-1] = sub_kdnode_2[-4]

        kdnode_dict.update({sub_kdnode_1[-4]: sub_kdnode_1})
        kdnode_dict.update({sub_kdnode_2[-4]: sub_kdnode_2})

        accu_count_list[0] += 2

        # used to see the current depth
        level += 1

        # recursion
        kdnodes = []

        # dataset, queryset, data_threshold, accu_count_list, kdnode_dict, root_node, level = 0, show_step

        kdnodes_1, is_valid_1 = PartitionAlgorithm.AKD(sub_dataset1, sub_query1, data_threshold, accu_count_list, 
                                                       kdnode_dict, sub_kdnode_1, split_dimension, level, show_step)
        kdnodes_2, is_valid_2 = PartitionAlgorithm.AKD(sub_dataset2, sub_query2, data_threshold, accu_count_list, 
                                                       kdnode_dict, sub_kdnode_2, split_dimension, level, show_step)

        # if one of the sub partition is not valid and contain queries
        if (not is_valid_1 and len(sub_query1) > 0) or (not is_valid_2 and len(sub_query2) > 0):
            return [root_node], True

        kdnodes = kdnodes_1 + kdnodes_2
        return kdnodes, True
    
    @staticmethod
    def LKD(candidate_cut_pos, idx, use_hist, accu_hist, dataset, queryset, domains, min_block_size, accu_count_list, kdnode_dict, 
            root_node, show_step = False):
    
        '''
        @dataset: local dataset
        '''

        kdnodes = [root_node]

        dataset_dict = {} # the total dataset size should maintain the same as original
        dataset_dict.update({0:dataset})
        can_split = True
        accu_count = accu_count_list[0] # newly add attribute, the total kdnodes generated, including the intermediate

        while can_split:

            can_split = False

            for i in range(len(kdnodes)):
                #print('= = = current try split kdnode:', kdnodes[i][0])

                if kdnodes[i][1] <= 2 * min_block_size: # including those marked as -1, i.e., deleted
                    continue

                # try to split
                benefits = []
                max_skip = 0
                max_skip_dim = 0
                max_skip_value = 0
                temp_dataset = np.copy(dataset_dict[i])

                for j in range(len(candidate_cut_pos)):

                    # the skip here is skip gain instead of the original definition
                    is_split, skip = False, 0
                    if use_hist == True:
                        is_split, skip = PartitionAlgorithm.try_split_approximate(kdnodes[i], candidate_cut_pos[j], queryset, idx, 
                                                                                  min_block_size, accu_hist, domains)
                    else:
                        is_split, skip = PartitionAlgorithm.try_split(kdnodes[i], candidate_cut_pos[j], temp_dataset, queryset, idx, 
                                                                      min_block_size)

                    if show_step:
                        print("candidate cut position:", j, "try split dim:", candidate_cut_pos[j][0], "try split value:", candidate_cut_pos[j][1], "skip gain:", skip)
                    benefits.append(skip)

                    if skip > max_skip:
                        max_skip = skip
                        max_skip_dim = candidate_cut_pos[j][0]
                        max_skip_value = candidate_cut_pos[j][1]

                if max(benefits) <= 0:
                    #can_split = False  # the skip after split should be greater than before split
                    #break

                    # this node cannot be split, keep search the remaining nodes
                    continue
                else:
                    # perform split, at this place, we keep the exact size
                    PartitionAlgorithm.perform_split(i, kdnodes, max_skip_dim, max_skip_value, temp_dataset, dataset_dict, 
                                                     kdnode_dict, accu_count)
                    can_split = True
                    accu_count += 2

                    if show_step:
                        print("current skip gain:", max_skip, "skip dim:", max_skip_dim, "skip pos:", max_skip_value)
                        visualize_kdnodes_and_query_2(kdnodes, queryset, [], domains)

        accu_count_list[0] = accu_count
        return kdnodes

    @staticmethod
    def KDT(dataset, current_dim, data_threshold, root_node, kdnode_dict, accu_count_list):
        
        current_size = len(dataset)
        if current_size <= data_threshold:
            return [root_node] # here we assume the children nodes are -1 and -1

        # try partition this node into 2
        median = np.median(dataset[:,current_dim])

        sub_domains1 = np.copy(root_node[0])
        sub_domains1[current_dim][1] = median
        sub_domains2 = np.copy(root_node[0])
        sub_domains2[current_dim][0] = median

        sub_dataset1 = dataset[dataset[:,current_dim] < median]
        sub_dataset2 = dataset[dataset[:,current_dim] >= median]

        if len(sub_dataset1) < data_threshold or len(sub_dataset2) < data_threshold:
            return [root_node]

        sub_kdnode_1 = [sub_domains1, len(sub_dataset1), accu_count_list[0] + 1, root_node[-4], -1, -1]
        sub_kdnode_2 = [sub_domains2, len(sub_dataset2), accu_count_list[0] + 2, root_node[-4], -1, -1]

        root_node[-2] = sub_kdnode_1[-4]
        root_node[-1] = sub_kdnode_2[-4]

        kdnode_dict.update({sub_kdnode_1[-4]: sub_kdnode_1})
        kdnode_dict.update({sub_kdnode_2[-4]: sub_kdnode_2})

        accu_count_list[0] += 2

        current_dim += 1
        if current_dim >= len(root_node[0]):
            current_dim %= len(root_node[0])

        kdnodes = []
        kdnodes += PartitionAlgorithm.KDT(sub_dataset1, current_dim, data_threshold, sub_kdnode_1, kdnode_dict, accu_count_list)
        kdnodes += PartitionAlgorithm.KDT(sub_dataset2, current_dim, data_threshold, sub_kdnode_2, kdnode_dict, accu_count_list)

        return kdnodes
    

In [139]:
class NORA:
    
    def __init__(self, dataset, queryset, domains, data_threshold, dim_probs,
                 use_data_hist, data_hist_buckets, data_hist_type,
                 use_query_hist, query_hist_buckets, query_hist_type,
                 query_threshold_up, query_threshold_low):
        
        self.dataset = dataset
        self.queryset = queryset
        self.domains = domains
        self.dims = len(domains)
        self.data_threshold = data_threshold
        self.dim_probs = dim_probs # if not given, should be calculated by itself
        
        self.use_data_hist = use_data_hist
        self.data_hist_buckets = data_hist_buckets
        self.data_hist_type = data_hist_type
        if use_data_hist:
            self.data_hist = MyHist(dataset, data_hist_buckets, domains, False, data_hist_type, dim_probs)
        else:
            self.data_hist = None
        
        self.use_query_hist = use_query_hist # should always be True
        self.query_hist_buckets = query_hist_buckets
        self.query_hist_type = query_hist_type
        self.query_threshold_up = query_threshold_up
        self.query_threshold_low = query_threshold_low
        if use_query_hist:
            self.query_hist = MyHist(queryset, query_hist_buckets, domains, True, query_hist_type, dim_probs, 
                                     query_threshold_up, query_threshold_low)
        else:
            self.query_hist = None  
        
        self.kdnode_counter = [0] # make if 'referenceable'
        self.kdnode_dict = {}
        self.candidate_cut_pos = self.generate_candidate_cut_pos()
        self.kdnodes = []
        
        p = index.Property()
        p.dimension = self.dims # the dimensions!
        p.leaf_capacity = 1000 # cannot be less than 100, indicate the maximum capacity
        p.fill_factor = 0.5
        p.overwrite = True
        
        self.query_idx = index.Index(properties = p) # Rtree index for queries
        for i in range(len(queryset)):
            self.query_idx.insert(i, self.query_2_border(queryset[i]))
            
        self.kdnode_idx = index.Index(properties = p) # Rtree index for kdnodes
    
    @staticmethod
    def kdnode_2_border(kdnode):
        lower = [domain[0] for domain in kdnode[0]]
        upper = [domain[1] for domain in kdnode[0]]
        border = tuple(lower + upper) # non interleave
        return border
    
    @staticmethod
    def query_2_border(query):
        lower = [domain[0] for domain in query]
        upper = [domain[1] for domain in query]
        border = tuple(lower + upper) # non interleave
        return border
    
    def generate_candidate_cut_pos(self):
        '''
        @param queryset
        @return candiate_cut_pos
        '''
        queryset = self.queryset
        candidate_cut_pos = []
        for i in range(len(queryset)):
            for j in range(len(queryset[i])):
                candidate_cut_pos.append((j, queryset[i][j][0]))
                candidate_cut_pos.append((j, queryset[i][j][1]))      
        return candidate_cut_pos
    
    
    def retrieve_sub_dataset(self, domain):
        constraints = []
        for i in range(len(domain)):
            constraint_1 = dataset[:,i] >= domain[i][0]
            constraint_2 = dataset[:,i] < domain[i][1]
            constraints.append(constraint_1)
            constraints.append(constraint_2)
        overall_constraint = np.all(constraints, axis=0)
        return self.dataset[overall_constraint]
    
    
    def apply_AKD(self, bounded_filtered_queries, root_nodes):
        '''apply AKD on a collection of nodes
        @param: bounded_filtered_queries    bounded queries
        @param: root_nodes                  a collection of kdnodes
        '''
        processed_kdnodes = []
        for i in range(len(root_nodes)):
            if root_nodes[i][1] > 2 * self.data_threshold:
                dataset_of_node = self.retrieve_sub_dataset(root_nodes[i][0]) # the domain of this node
                akd_kdnodes, is_valid = PartitionAlgorithm.AKD(dataset_of_node, bounded_filtered_queries, self.data_threshold, 
                                                               self.kdnode_counter, self.kdnode_dict, root_nodes[i])
                processed_kdnodes += akd_kdnodes
            else:
                processed_kdnodes.append(root_nodes[i])
        return processed_kdnodes
    
    
    def apply_LKD(self, root_nodes):
        '''apply LKD on a collection of nodes
        @param: root_nodes                  a collection of kdnodes
        '''
        # determine whether or not to use exact measure according to the data_hist 's status
        processed_kdnodes = []
        candidate_cut_pos = self.generate_candidate_cut_pos()
        use_hist = False
        if self.data_hist != None:
            use_hist = True
            self.data_hist.create_accu_hist()
            
        for i in range(len(root_nodes)):
            if root_nodes[i][1] > 2 * self.data_threshold:
                dataset_of_node = self.retrieve_sub_dataset(root_nodes[i][0]) # the domain of this node

                lkd_kdnodes = PartitionAlgorithm.LKD(candidate_cut_pos, self.query_idx, use_hist, self.data_hist, dataset_of_node, 
                                                     self.queryset, self.domains, self.data_threshold, self.kdnode_counter, 
                                                     self.kdnode_dict, root_nodes[i])
                #print("lkd_kdnodes from call",i,": ", lkd_kdnodes)
                processed_kdnodes += lkd_kdnodes
            else:
                processed_kdnodes.append(root_nodes[i])
        return processed_kdnodes
            
    
    def apply_KDT(self, root_nodes):
        '''apply KDT on a collection of nodes
        @param: root_nodes                  a collection of kdnodes
        '''
        processed_kdnodes = []
        for i in range(len(root_nodes)):
            if root_nodes[i][1] > 2 * self.data_threshold:
                dataset_of_node = self.retrieve_sub_dataset(root_nodes[i][0]) # the domain of this node
                kdt_kdnodes = PartitionAlgorithm.KDT(dataset_of_node, 0, self.data_threshold, root_nodes[i],
                                                     self.kdnode_dict, self.kdnode_counter)
                processed_kdnodes += kdt_kdnodes
            else:
                processed_kdnodes.append(root_nodes[i])
        return processed_kdnodes
    
    
    @staticmethod
    def is_overlap(query1, query2):
        for dim in range(len(query1)):
            if query1[dim][0] > query2[dim][1] or query1[dim][1] < query2[dim][0]:
                return False
        return True

    
    @staticmethod
    def union_query(query1, query2):
        union_query = [[min(query1[dim][0], query2[dim][0]), max(query1[dim][1], query2[dim][1])] for dim in range(len(query1))]
        return union_query

    
    @staticmethod
    def query_bounding(distribution_query):
        '''
        for the distribution queries, we union them if there exist an overlap
        '''
        bounded_query = []
        for query in distribution_query:
            # check if it overlap with any query in the bounded query
            union_tag = False
            while True:
                for i, bq in enumerate(bounded_query):
                    if NORA.is_overlap(query, bq):
                        # union these 2 queris and remove the overlap queries
                        query = NORA.union_query(query, bq)
                        del bounded_query[i]
                        union_tag = True
                        break
                if union_tag:
                    union_tag = False
                else:
                    break
            bounded_query.append(query)
            #print("current bounded queries: ", bounded_query)

        return bounded_query
    
    
    def initialization(self):
        
        self.filtered_qids = self.query_hist.filter_distribution_query()
        filtered_queries = np.array(self.queryset)[self.filtered_qids]
        bounded_filtered_queries = self.query_bounding(filtered_queries)
        self.bounded_filtered_queries = np.array(bounded_filtered_queries)
        
        #self.bounded_filtered_queries = np.array(self.queryset)
        
        root_node = [self.domains, len(self.dataset), 0, -1, -1, -1]
        self.kdnode_dict.update({0: root_node})
        self.akd_kdnodes = self.apply_AKD(self.bounded_filtered_queries, [root_node])
        self.lkd_kdnodes = self.apply_LKD(self.akd_kdnodes)
        self.kdt_kdnodes = self.apply_KDT(self.lkd_kdnodes)
        
        # insert the kdnodes to index
        for i in range(len(self.kdt_kdnodes)):
            self.kdnode_idx.insert(self.kdt_kdnodes[i][-4], NORA.kdnode_2_border(self.kdt_kdnodes[i]), self.kdt_kdnodes[i])
            
        self.kdnodes = self.kdt_kdnodes
        return self.kdt_kdnodes
    
    def run_competitor_QDT(self): 
        '''which is the LKD in this class
        '''
        candidate_cut_pos = self.generate_candidate_cut_pos()
        root_node = [self.domains, len(self.dataset), 0, -1, -1, -1]
        use_hist = True
        if self.data_hist == None:
            use_hist = False
        competitor_qdt_kdnodes = PartitionAlgorithm.LKD(candidate_cut_pos, self.query_idx, use_hist, self.data_hist, self.dataset, 
                                                        self.queryset, self.domains, self.data_threshold, [0], {0:root_node}, root_node)
         
        return competitor_qdt_kdnodes
    
    def run_competitor_KDT(self):
        root_node = [self.domains, len(self.dataset), 0, -1, -1, -1]
        competitor_kdt_kdnodes = PartitionAlgorithm.KDT(self.dataset, 0, self.data_threshold, root_node, {}, [0])
        return competitor_kdt_kdnodes

In [6]:
# # - - - Test Case 1 - - -
# # Target: 3 dimensional NORA using hist_type 0
# # Result: PASS
# # ... generate dataset and queryset here

# queryset = training_set
# data_threshold = block_size
# dim_probs = dimension_probability
# use_data_hist = True
# data_hist_buckets = [300, 300, 300]
# data_hist_type = 0
# use_query_hist = True
# query_hist_buckets = [300, 300, 300]
# query_hist_type = 0
# query_threshold_up = 2
# query_threshold_low = 2

# Nora = NORA(dataset, queryset, domains, data_threshold, dim_probs, use_data_hist, data_hist_buckets, data_hist_type,
#            use_query_hist, query_hist_buckets, query_hist_type, query_threshold_up, query_threshold_low)
# nora_kdnodes = Nora.initialization()

In [176]:
scale_factor = 100
prob_threshold = 1 / scale_factor
total_dims = 16 # the dimensions of lineitem table
domain_dims = 8 # the dimensions we used and maintain min max for
chunk_size = 100000 # 0.1M 

# base_table_path = 'C:/Users/Cloud/iCloudDrive/HUAWEI_LKD/9a84f6cd-727f-4f10-ae95-10a0214e10a4-tpc-h-tool/2.18.0_rc2/dbgen/lineitem_'
# table_path = base_table_path + str(scale_factor) + '.tbl'

base_save_path = 'C:/Users/Cloud/iCloudDrive/HUAWEI_LKD/Dataset/Robust/dataset/lineitem_'
save_path_data = base_save_path + str(scale_factor) + '_' + str(prob_threshold) + '.csv'
save_path_domain = base_save_path + str(scale_factor) + '_' + str(prob_threshold) + '_domains.csv'

# by default, the sampled size always equal to 6M (i.e., using scale factor 1), then for a higher scale factor, we need to divide it
block_size = 1000000 // scale_factor # in original file, 1M rows take approximately 128MB

# = = = Data Loading = = =
dataset = np.genfromtxt(save_path_data, delimiter=',') # the sampled subset
domains = np.genfromtxt(save_path_domain, delimiter=',') # the domain of that scale

# Configuration
used_dimensions = [1,2,3,4] # the second and third dimensions

dataset = dataset[:,used_dimensions]
domains = domains[used_dimensions]

In [177]:
# = = = Query Loading = = =
from QueryGeneration import *

# = = = Query Generation = = =

total_queries = 100
random_percentage = 0.2
maximum_random_range = [(domains[i,1] - domains[i,0])*0.1 for i in range(len(domains))]
cluster_amount = 5
maximum_range_dis = [(domains[i,1] - domains[i,0])*0.1 for i in range(len(domains))]
sigmas_percentage = [0.2, 0.2, 0.2, 0.2]
dimension_probability = [0.8, 0.8, 0.8, 0.8] # [0.5, 0.5, 0.5, 0.5] # [1.0, 1.0, 1.0, 1.0] # [0.8, 0.8, 0.8, 0.8]  # [1.0, 0.8, 0.5, 0.6]
random_shift = False
return_seperate = True

# COMMENT THIS IF NOT GENERATING QUERIES
# mixed_queris = generate_query_with_random(total_queries, random_percentage, domains, maximum_random_range, 
#                                           cluster_amount, maximum_range_dis, sigmas_percentage, random_shift, return_seperate)
mixed_queris = generate_query_with_random(total_queries, random_percentage, domains, maximum_random_range, 
                                          cluster_amount, maximum_range_dis, sigmas_percentage, random_shift, 
                                          return_seperate, dimension_probability)

#plot_queries_2d_distribution_and_random(mixed_queris[1], mixed_queris[0], domains)
random_query = mixed_queris[0]
distribution_query = mixed_queris[1]

# = = = Query Saving / Loading = = =

# Configuration
query_base_path = 'C:/Users/Cloud/iCloudDrive/NORA_experiments/queryset/prob1/'
sub_directory = 'default' # 'default'  alpha'  'num_dims'  'prob_dims'  'num_X'

variation_dict = {0: 'default', 1: 'alpha', 2: 'num_dims', 3: 'prob_dims', 4: 'num_X'}

variation_index = 0 # use to specify the key in variation_dict
variation_value = 0 # would better to use %, i.e., *100

distribution_path = query_base_path + '/' +  variation_dict[variation_index] + '/' + variation_dict[variation_index] \
                    + '_' + str(variation_value) + '_distribution.csv'
random_path = query_base_path + '/' +  variation_dict[variation_index] + '/' + variation_dict[variation_index] \
                    + '_' + str(variation_value) + '_random.csv'

# distribution_path = query_base_path + 'alpha_' + str(int(random_percentage*100)) + '_dims_' + \
#     str(len(used_dimensions)) + '_distribution.csv'
# random_path = query_base_path + 'alpha_' + str(int(random_percentage*100))  + '_dims_' + \
#     str(len(used_dimensions)) + '_random.csv'

save_query(random_query, random_path) # mixed_queris[0]
save_query(distribution_query, distribution_path) # mixed_queris[1]
    
random_segmentation = int(total_queries * random_percentage / 2)
distribution_segmentation = int(total_queries * (1 - random_percentage) / 2)
training_set = np.concatenate((random_query[0:random_segmentation], distribution_query[0:distribution_segmentation]), axis=0)
testing_set = np.concatenate((random_query[random_segmentation:], distribution_query[distribution_segmentation:]), axis = 0)

In [132]:
# - - - Test Case 3 - - -
# Target: 4 dimensional distribution query classification using hist_type 1
# Result: 
# ... generate queryset here
queryset = training_set
buckets = [500, 500, 500, 500]
is_query_hist = True 
hist_type = 1
dim_probs = dimension_probability
query_hist = MyHist(queryset, buckets, domains, is_query_hist, hist_type, dim_probs)
disqids = query_hist.filter_distribution_query()
disqids.sort()
print(disqids) # should contains the distribution queries in training set

[19]


In [126]:
len(disqids)

38

In [95]:
query_hist.buckets
query_hist.query_threshold_up_each_dims

[]

In [24]:
# show_queries_3d(mixed_queris[1], mixed_queris[0])

In [25]:
# show_queries_3d(mixed_queris[1], mixed_queris[0], 1, 2, 3)

In [26]:
# # - - - Test Case 5 - - -
# # Target: 4 dimensional distribution query classification
# queryset = training_set
# buckets = [300, 300, 300, 300]
# is_query_hist = True 
# hist_type = 1
# query_hist = MyHist(queryset, buckets, domains, is_query_hist, hist_type, dimension_probability)
# disqids = query_hist.filter_distribution_query()
# disqids.sort()
# print(disqids) # should contains the distribution queries in training set
# print(len(disqids))
# print(np.count_nonzero(np.array(disqids)[:] < 100))

In [127]:
# - - - Test Case 2 - - -
# Target: 4 dimensional NORA using hist_type 1
# Result:
# ... generate dataset and queryset here

queryset = training_set
data_threshold = block_size
dim_probs = dimension_probability
use_data_hist = False
data_hist_buckets = [500, 500, 500, 500]
data_hist_type = 0
use_query_hist = True
query_hist_buckets = [500, 500, 500, 500]
query_hist_type = 1
query_threshold_up = 1
query_threshold_low = 1

Nora = NORA(dataset, queryset, domains, data_threshold, dim_probs, use_data_hist, data_hist_buckets, data_hist_type,
           use_query_hist, query_hist_buckets, query_hist_type, query_threshold_up, query_threshold_low)
nora_kdnodes = Nora.initialization()

In [28]:
# from Utils import *
# visualize_kdnodes_and_query_3(nora_kdnodes, [], [], domains)
# visualize_kdnodes_and_query_3(Nora.akd_kdnodes, [], [], domains)

In [110]:
from Query import *

In [128]:
nora_cost = Query(testing_set, nora_kdnodes, random_percentage)

Query 0  Cost:  35917  Intersected Partitions: [105, 185]
Query 1  Cost:  26290  Intersected Partitions: [153, 155]
Query 2  Cost:  30942  Intersected Partitions: [133, 134]
Query 3  Cost:  70391  Intersected Partitions: [192, 222, 224]
Query 4  Cost:  138050  Intersected Partitions: [170, 172, 174, 176, 202, 206]
Query 5  Cost:  67531  Intersected Partitions: [170, 172, 202]
Query 6  Cost:  66032  Intersected Partitions: [62, 63, 64, 65]
Query 7  Cost:  256188  Intersected Partitions: [61, 76, 91, 93]
Query 8  Cost:  128105  Intersected Partitions: [59, 91]
Query 9  Cost:  170618  Intersected Partitions: [75, 76, 77, 78]
Query 10  Cost:  13217  Intersected Partitions: [299]
Query 11  Cost:  12250  Intersected Partitions: [89]
Query 12  Cost:  12250  Intersected Partitions: [89]
Query 13  Cost:  10664  Intersected Partitions: [231]
Query 14  Cost:  13217  Intersected Partitions: [299]
Query 15  Cost:  12250  Intersected Partitions: [89]
Query 16  Cost:  12250  Intersected Partitions: [

In [31]:
print(len(Nora.akd_kdnodes))
print(len(Nora.lkd_kdnodes))
print(len(Nora.kdt_kdnodes))

14
186
303


In [113]:
# Comparision Group
competitor_qdt_kdnodes = Nora.run_competitor_QDT()
competitor_kdt_kdnodes = Nora.run_competitor_KDT()

In [114]:
print(len(competitor_qdt_kdnodes))
print(len(competitor_kdt_kdnodes))

49
224


In [115]:
# visualize_kdnodes_and_query_3(competitor_qdt_kdnodes, [], [], domains)
# visualize_kdnodes_and_query_3(competitor_kdt_kdnodes, [], [], domains)

In [122]:
qdtree_cost = Query(testing_set, competitor_qdt_kdnodes, random_percentage)
print("\n = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = \n")
kdtree_cost = Query(testing_set, competitor_kdt_kdnodes, random_percentage)

Query 0  Cost:  1014900  Intersected Partitions: [11]
Query 1  Cost:  39780  Intersected Partitions: [39, 45]
Query 2  Cost:  1645113  Intersected Partitions: [4, 28]
Query 3  Cost:  1014900  Intersected Partitions: [11]
Query 4  Cost:  177470  Intersected Partitions: [7]
Query 5  Cost:  13409  Intersected Partitions: [41]
Query 6  Cost:  1527577  Intersected Partitions: [4]
Query 7  Cost:  1678164  Intersected Partitions: [8, 11]
Query 8  Cost:  1713638  Intersected Partitions: [4, 6, 29]
Query 9  Cost:  1527577  Intersected Partitions: [4]
Query 10  Cost:  346517  Intersected Partitions: [5, 9]
Query 11  Cost:  123072  Intersected Partitions: [9]
Query 12  Cost:  123072  Intersected Partitions: [9]
Query 13  Cost:  123072  Intersected Partitions: [9]
Query 14  Cost:  123072  Intersected Partitions: [9]
Query 15  Cost:  1014900  Intersected Partitions: [11]
Query 16  Cost:  123072  Intersected Partitions: [9]
Query 17  Cost:  1014900  Intersected Partitions: [11]
Query 18  Cost:  1014

In [23]:
cost_ratio_qdt = qdtree_cost / nora_cost
cost_ratio_kdt = kdtree_cost / nora_cost
print("cost_ratio_qdt: ", cost_ratio_qdt)
print("cost_ratio_kdt: ", cost_ratio_kdt)

cost_ratio_qdt:  2.4005328087446394
cost_ratio_kdt:  0.8522337520888736


In [71]:
def run_experiments(distribution_query = [], random_query = []):
     
    if len(distribution_query) == 0 and len(random_query) == 0:
    
        mixed_queris = generate_query_with_random(total_queries, random_percentage, domains, maximum_random_range, 
                                              cluster_amount, maximum_range_dis, sigmas_percentage, random_shift, 
                                              return_seperate, dimension_probability)
        random_query = mixed_queris[0]
        distribution_query = mixed_queris[1]

    # = = = Query Saving / Loading = = =
    random_segmentation = int(total_queries * random_percentage / 2)
    distribution_segmentation = int(total_queries * (1 - random_percentage) / 2)
    training_set = np.concatenate((random_query[0:random_segmentation], distribution_query[0:distribution_segmentation]), axis=0)
    testing_set = np.concatenate((random_query[random_segmentation:], distribution_query[distribution_segmentation:]), axis = 0)
    
    queryset = training_set
    data_threshold = block_size
    dim_probs = dimension_probability
    use_data_hist = False
    data_hist_buckets = [500, 500, 500, 500]
    data_hist_type = 0
    use_query_hist = True
    query_hist_buckets = [500, 500, 500, 500]
    query_hist_type = 1
    query_threshold_up = 1
    query_threshold_low = 1

    Nora = NORA(dataset, queryset, domains, data_threshold, dim_probs, use_data_hist, data_hist_buckets, data_hist_type,
           use_query_hist, query_hist_buckets, query_hist_type, query_threshold_up, query_threshold_low)
    
    nora_kdnodes = Nora.initialization()
    competitor_qdt_kdnodes = Nora.run_competitor_QDT()
    competitor_kdt_kdnodes = Nora.run_competitor_KDT()
    
    nora_cost = Query(testing_set, nora_kdnodes, random_percentage)
    print("\n = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = \n")
    qdtree_cost = Query(testing_set, competitor_qdt_kdnodes, random_percentage)
    print("\n = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = \n")
    kdtree_cost = Query(testing_set, competitor_kdt_kdnodes, random_percentage)
    print("\n = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = \n")
    
    cost_ratio_qdt = qdtree_cost / nora_cost
    cost_ratio_kdt = kdtree_cost / nora_cost
    print("cost nora: ", nora_cost)
    print("cost qdt: ", qdtree_cost)
    print("cost kdt: ", kdtree_cost)
    print("cost_ratio_qdt: ", cost_ratio_qdt)
    print("cost_ratio_kdt: ", cost_ratio_kdt)
    print("\n = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = \n")
    
    return cost_ratio_qdt, cost_ratio_kdt, random_query, distribution_query

In [66]:
from os import listdir
from os.path import isfile, join
from QueryGeneration import *

def load_exper_query(variation_index = 0, variation_value = 0, query_base_path = 'C:/Users/Cloud/iCloudDrive/NORA_experiments/queryset/prob1/'):
    
    variation_dict = {0: 'default', 1: 'alpha', 2: 'num_dims', 3: 'prob_dims', 4: 'num_X'} # sub_directory
    directory = join(mypath, variation_dict[variation_index]) # create .../..., i.e., join address
    
    files = [f for f in listdir(directory) if isfile(join(directory, f))]
    
    # load the files
    workloads = []
    for f in files:
        parts = f.split('_')
        vary_ind = parts.index(variation_dict[variation_index])
        vary_value_ind = vary_ind + 1
        
        if parts[vary_value_ind] != str(variation_value):
            continue
        
        try:
            ind = parts.index('distribution')
            
            distribution_query_path = join(directory, f)
            distribution_query = load_query(distribution_query_path)
            #print(distribution_query_path)
            
            parts[ind] = 'random'
            random_query_file = '_'.join(parts)
            random_query_path = join(directory, random_query_file)
            random_query = load_query(random_query_path)
            
            workloads.append([distribution_query, random_query])
            
        except:
            continue
    
    return workloads

In [67]:
# load_exper_query(1,2)

In [60]:
def save_exper_query(random_query, distribution_query, variation_index = 0, variation_value = 0, extra_info = ''):

    query_base_path = 'C:/Users/Cloud/iCloudDrive/NORA_experiments/queryset/prob1/'

    variation_dict = {0: 'default', 1: 'alpha', 2: 'num_dims', 3: 'prob_dims', 4: 'num_X'} # sub_directory

    #if extra_info != '':
    extra_info = '_' + extra_info # easy for split
    
    distribution_path = query_base_path + '/' +  variation_dict[variation_index] + '/' + variation_dict[variation_index] \
                        + '_' + str(variation_value) + '_distribution' + extra_info + '.csv'
    random_path = query_base_path + '/' +  variation_dict[variation_index] + '/' + variation_dict[variation_index] \
                        + '_' + str(variation_value) + '_random' + extra_info + '.csv'

    save_query(random_query, random_path) # mixed_queris[0]
    save_query(distribution_query, distribution_path) # mixed_queris[1]

In [140]:
variation_index = 3
variation_value = 50

# find the best queryset!
baseline_1 = 1.1
baseline_2 = 1.1
for i in range(50):
    print('process run',i,'...')
    cost_ratio_qdt, cost_ratio_kdt, random_query, distribution_query = run_experiments()
    
#     if cost_ratio_kdt <= 1.2:
#         save_exper_query(random_query, distribution_query, variation_index, variation_value, str(cost_ratio_qdt)+'_'+str(cost_ratio_kdt))
    
    if cost_ratio_qdt > baseline_1 and cost_ratio_kdt > baseline_2:
        baseline_1 = cost_ratio_qdt
        baseline_2 = cost_ratio_kdt
        save_exper_query(random_query, distribution_query, variation_index, variation_value, str(cost_ratio_qdt)+'_'+str(cost_ratio_kdt))
    elif cost_ratio_qdt > baseline_1 and cost_ratio_kdt >= 2:
        baseline_1 = cost_ratio_qdt
        save_exper_query(random_query, distribution_query, variation_index, variation_value, str(cost_ratio_qdt)+'_'+str(cost_ratio_kdt))
    elif cost_ratio_qdt >=2 and cost_ratio_kdt > baseline_2:
        baseline_2 = cost_ratio_kdt
        save_exper_query(random_query, distribution_query, variation_index, variation_value, str(cost_ratio_qdt)+'_'+str(cost_ratio_kdt))
    elif cost_ratio_qdt >= 1.1 and cost_ratio_kdt >= 1.1:
        # too less
        save_exper_query(random_query, distribution_query, variation_index, variation_value, str(cost_ratio_qdt)+'_'+str(cost_ratio_kdt))

process run 0 ...
Query 0  Cost:  197815  Intersected Partitions: [132, 173, 185, 202, 205, 304, 329, 340, 354, 373, 374, 394, 395]
Query 1  Cost:  487219  Intersected Partitions: [236, 242, 243, 245, 268, 270, 275, 276, 277, 278, 304, 305, 325, 328, 329, 330, 331, 339, 340, 347, 348, 355, 356, 373, 374, 376, 382, 383, 389, 392, 394, 395, 397, 402]
Query 2  Cost:  45618  Intersected Partitions: [123, 141, 181, 208]
Query 3  Cost:  1973570  Intersected Partitions: [110, 113, 114, 115, 116, 117, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 139, 140, 141, 143, 145, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 173, 175, 177, 178, 180, 181, 182, 185, 186, 187, 188, 189, 190, 191, 192, 194, 195, 196, 197, 198, 199, 200, 202, 206, 209, 210, 211, 212, 213, 214, 215, 226, 227, 229, 230, 245, 263, 264, 266, 271, 273, 281, 283, 286, 288, 292, 294, 297, 299, 301, 304, 307, 3

Query 0  Cost:  138582  Intersected Partitions: [242, 247, 255, 259, 260, 262, 266, 268, 271]
Query 1  Cost:  1828473  Intersected Partitions: [1, 4, 6, 17, 18, 26, 38, 39, 40, 44, 45, 51, 52, 93, 94, 95, 96, 97, 98, 99, 106, 107, 120, 121, 126, 127, 134, 137, 138, 145, 146, 153, 154, 155, 156, 161, 162, 171, 173, 174, 177, 178, 179, 184, 186, 188, 189, 190, 191, 192, 196, 197, 203, 217, 218, 220, 221, 222, 227, 228, 230, 231, 235, 237, 238, 239, 241, 243, 245, 246, 250, 254, 255, 256, 260, 263, 265, 266, 267, 270, 276, 277, 278, 279, 280, 282, 284, 286, 288, 289, 291, 293, 296, 298, 299, 301, 303, 304, 305, 306, 307, 308, 315, 316, 319, 321, 322, 323, 327, 328, 329, 331, 332, 334, 335, 337, 344, 345, 349, 353, 354, 362, 366, 367, 375, 377, 383, 384, 388, 391, 392, 393, 396]
Query 2  Cost:  658839  Intersected Partitions: [13, 20, 26, 33, 34, 35, 41, 50, 51, 105, 119, 149, 150, 161, 169, 170, 184, 187, 188, 189, 190, 191, 192, 197, 200, 202, 203, 214, 235, 236, 241, 244, 246, 251, 263,

Query 49  Cost:  616148  Intersected Partitions: [170, 171, 174, 175, 188, 189, 191, 193, 194, 195, 197, 199, 212, 213, 215, 217, 218, 219, 221, 223]
average partitions each query overlap(average):  46.22
average records each query retrieve(average):  1244609.12
total records that all the queries retrieve:  62230456
average distribution query cost: 1165616.3
average random query cost: 1560580.4

 = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 

cost nora:  907174.62
cost qdt:  1065370.5
cost kdt:  1244609.12
cost_ratio_qdt:  1.1743830531766861
cost_ratio_kdt:  1.3719620154276364

 = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 

process run 2 ...
Query 0  Cost:  2693052  Intersected Partitions: [4, 18, 23, 24, 33, 34, 35, 36, 42, 44, 45, 48, 51, 54, 56, 61, 62, 63, 66, 70, 71, 72, 73, 79, 83, 88, 91, 92, 93, 94, 95, 101, 103, 104, 105, 106, 114, 115, 116, 117, 118, 119, 121, 124, 125, 127, 129, 132, 133, 134, 135, 136, 1

Query 7  Cost:  555405  Intersected Partitions: [57, 59, 71, 74, 75, 83, 86, 87, 169, 171, 183, 186, 187, 195, 198, 199]
Query 8  Cost:  136734  Intersected Partitions: [12, 13, 24, 25, 124, 125, 136, 137]
Query 9  Cost:  2786511  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119, 168, 169, 170, 171, 172, 173, 174, 175]
Query 10  Cost:  1500327  Intersected Partitions: [112, 113, 116, 117, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 168, 169, 172, 173, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211]
Query 11  Cost:  1232746  Intersected Partitions: [112, 113, 116, 117, 120, 121, 123, 125, 126, 127, 129, 131, 144, 145, 147, 149, 150, 151, 153, 155, 168, 169, 172, 173, 176, 177, 179, 181, 182, 183, 185, 187, 200, 201, 203, 205, 206, 207, 209, 211]
Query 12  Cost:  1232746  

Query 25  Cost:  240721  Intersected Partitions: [2, 10, 18, 26, 34, 42, 50, 58, 73, 81, 112, 125, 149, 200, 205, 212, 251]
Query 26  Cost:  145667  Intersected Partitions: [2, 6, 18, 22, 50, 54, 85, 125, 127, 205, 206]
Query 27  Cost:  171257  Intersected Partitions: [34, 221, 252, 257, 261]
Query 28  Cost:  678479  Intersected Partitions: [20, 23, 43, 46, 47, 52, 54, 55, 59, 69, 74, 83, 85, 86, 93, 94, 101, 107, 109, 114, 119, 120, 127, 141, 154, 158, 165, 171, 173, 175, 184, 185, 190, 192, 193, 194, 196, 197, 206, 234, 243, 258]
Query 29  Cost:  55644  Intersected Partitions: [34, 73, 125, 200]
Query 30  Cost:  1230316  Intersected Partitions: [12, 19, 22, 32, 33, 34, 38, 43, 44, 51, 59, 64, 72, 75, 79, 81, 82, 85, 90, 97, 101, 104, 106, 107, 111, 112, 113, 116, 118, 137, 138, 144, 150, 159, 163, 168, 169, 176, 178, 184, 185, 187, 188, 189, 192, 193, 203, 217, 218, 225, 226, 230, 235, 242, 252, 256, 261]
Query 31  Cost:  153586  Intersected Partitions: [46, 54, 85, 93, 101, 127, 158

Query 0  Cost:  96135  Intersected Partitions: [12, 27, 179, 197, 224, 250, 274, 325]
Query 1  Cost:  331508  Intersected Partitions: [35, 84, 86, 118, 124, 127, 143, 163, 164, 166, 167, 210, 215, 240, 245, 264, 271, 272, 282, 297, 318, 323, 337, 340]
Query 2  Cost:  1405750  Intersected Partitions: [9, 12, 17, 19, 24, 31, 35, 38, 39, 41, 46, 57, 59, 64, 66, 77, 80, 81, 83, 85, 86, 91, 95, 97, 102, 105, 118, 123, 129, 133, 135, 140, 143, 155, 157, 159, 162, 168, 171, 172, 176, 177, 186, 192, 194, 195, 198, 199, 210, 214, 216, 223, 225, 227, 229, 230, 231, 257, 264, 266, 269, 273, 274, 279, 280, 281, 282, 283, 290, 299, 301, 306, 309, 311, 312, 316, 317, 318, 319, 320, 323, 324, 326, 327, 342, 353, 356, 360, 361, 362, 363, 367, 368, 369, 370, 371, 372, 374, 375, 376, 377, 378, 379, 383, 387, 388]
Query 3  Cost:  628344  Intersected Partitions: [48, 55, 75, 83, 88, 107, 114, 116, 123, 125, 127, 131, 148, 150, 159, 164, 167, 179, 184, 193, 208, 213, 214, 218, 238, 242, 245, 251, 261, 263,

Query 41  Cost:  6001309  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211,

Query 44  Cost:  820083  Intersected Partitions: [9, 11, 15, 25, 27, 29, 31, 37, 41, 43, 47, 57, 59, 61, 63, 72, 78, 83, 86, 88, 98, 107, 117, 119, 121, 123, 129, 136, 138, 140, 145, 156, 158, 162, 188, 207, 215, 219, 224, 227, 243, 252, 255, 269, 284, 286, 306]
Query 45  Cost:  51568  Intersected Partitions: [22, 114, 208]
Query 46  Cost:  155980  Intersected Partitions: [11, 12, 14, 15, 31, 43, 78, 91, 170, 216, 237]
Query 47  Cost:  94544  Intersected Partitions: [14, 78, 81, 83, 106, 107, 133]
Query 48  Cost:  1035479  Intersected Partitions: [3, 7, 20, 22, 27, 28, 39, 46, 47, 52, 55, 60, 62, 63, 67, 70, 84, 88, 96, 99, 106, 107, 111, 112, 114, 119, 122, 123, 134, 138, 139, 149, 158, 159, 161, 162, 165, 175, 186, 189, 190, 196, 200, 201, 205, 208, 225, 231, 240, 242, 251, 260, 266, 270, 273, 276, 277, 280, 289, 300, 308, 323]
Query 49  Cost:  47030  Intersected Partitions: [42, 64, 66, 150]
average partitions each query overlap(average):  57.44
average records each query retrieve(a

Query 0  Cost:  1701426  Intersected Partitions: [25, 27, 29, 30, 31, 32, 33, 34, 40, 42, 43, 54, 59, 60, 99, 100, 104, 110, 112, 113, 119, 120, 125, 127, 130, 131, 136, 137, 140, 141, 142, 145, 149, 157, 158, 160, 163, 164, 171, 172, 179, 183, 188, 189, 196, 197, 198, 202, 203, 207, 208, 212, 213, 217, 218, 219, 220, 221, 222, 223, 226, 229, 236, 237, 240, 241, 245, 246, 248, 249, 255, 256, 260, 263, 264, 279, 282, 290, 291, 294, 295, 296, 299, 301, 303, 316, 318, 323, 324, 325, 329, 333, 334, 336, 337, 338, 341, 344, 350]
Query 1  Cost:  104969  Intersected Partitions: [104, 131, 212, 290]
Query 2  Cost:  128927  Intersected Partitions: [31, 33, 142, 144, 223, 325, 344]
Query 3  Cost:  5670223  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,

Query 0  Cost:  157916  Intersected Partitions: [218, 245, 249, 252, 303, 313, 321]
Query 1  Cost:  556663  Intersected Partitions: [222, 224, 226, 227, 228, 230, 231, 232, 266, 268, 270, 272, 273, 274, 276, 281, 283, 285, 287, 288, 289, 293, 295, 297, 314, 316, 318]
Query 2  Cost:  874573  Intersected Partitions: [0, 1, 2, 3, 9, 16, 17, 18, 24, 30, 35, 39, 52, 53, 58, 63, 73, 74, 79, 80, 83, 86, 91, 95, 96, 97, 98, 99, 101, 103, 106, 107, 112, 160, 162, 166, 168, 179, 180, 215, 217, 221, 242, 244, 246, 248, 251, 252, 300, 302]
Query 3  Cost:  597943  Intersected Partitions: [8, 14, 19, 21, 33, 34, 44, 59, 66, 67, 72, 84, 88, 95, 101, 102, 136, 137, 138, 139, 156, 158, 159, 178, 203, 213, 233, 234, 235, 236, 269, 270, 271, 272, 275, 276, 305]
Query 4  Cost:  13346  Intersected Partitions: [204]
Query 5  Cost:  129099  Intersected Partitions: [7, 58, 83, 97, 99, 131, 132, 204, 206]
Query 6  Cost:  2192276  Intersected Partitions: [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 18, 1

Query 0  Cost:  32652  Intersected Partitions: [320, 394]
Query 1  Cost:  337069  Intersected Partitions: [4, 12, 14, 35, 46, 54, 105, 106, 112, 126, 145, 152, 153, 158, 164, 171, 175, 185, 191, 228, 236, 240, 246, 251]
Query 2  Cost:  712095  Intersected Partitions: [4, 11, 12, 14, 16, 20, 35, 38, 46, 53, 54, 64, 105, 106, 114, 116, 126, 143, 145, 152, 153, 158, 159, 164, 166, 175, 185, 186, 191, 195, 207, 225, 227, 228, 229, 230, 231, 236, 240, 245, 246, 251, 277, 278, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289]
Query 3  Cost:  5670223  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,

Query 47  Cost:  52689  Intersected Partitions: [4, 32, 33, 88]
Query 48  Cost:  594693  Intersected Partitions: [0, 3, 4, 12, 16, 19, 20, 32, 33, 35, 36, 43, 47, 48, 51, 52, 63, 65, 66, 77, 101, 102, 130, 136, 137, 146, 155, 164, 166, 174, 197, 205, 213, 234, 235, 238, 239, 259]
Query 49  Cost:  495600  Intersected Partitions: [0, 2, 6, 8, 16, 18, 22, 24, 32, 34, 37, 64, 69, 75, 79, 115, 121, 122, 128, 131, 154, 157, 184, 189, 194, 204, 225, 227, 228, 233, 236, 248, 249, 285, 286, 297]
average partitions each query overlap(average):  48.66
average records each query retrieve(average):  847271.22
total records that all the queries retrieve:  42363561
average distribution query cost: 909409.65
average random query cost: 598717.5

 = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 

Query 0  Cost:  243769  Intersected Partitions: [169, 173, 183, 187, 206, 209]
Query 1  Cost:  553903  Intersected Partitions: [0, 2, 9, 12, 13, 21, 24, 25, 112, 114, 121, 124, 1

Query 25  Cost:  5670223  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211,

Query 0  Cost:  1578187  Intersected Partitions: [278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 347, 348, 349, 350, 351, 352, 355, 357, 359, 361, 363, 364, 365, 366, 367, 369, 370, 373, 374, 378, 379, 380, 381, 382, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 396, 397, 398, 399, 400, 403, 404, 405, 406, 407, 408, 412, 413, 414, 415, 416]
Query 1  Cost:  1496531  Intersected Partitions: [278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 290, 291, 293, 294, 295, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 308, 309, 310, 311, 314, 315, 317, 319, 320, 321, 322, 323, 324, 327, 328, 329, 331, 332, 333, 334, 335, 336, 338, 339, 340, 341, 342, 343, 345, 346, 347, 348, 349, 350, 351, 352, 353, 

Query 37  Cost:  979977  Intersected Partitions: [4, 5, 6, 7, 33, 37, 39, 43, 45, 48, 51, 54, 60, 61, 62, 63, 89, 93, 95, 99, 101, 104, 107, 110]
Query 38  Cost:  1232746  Intersected Partitions: [112, 113, 116, 117, 120, 121, 123, 125, 126, 127, 129, 131, 144, 145, 147, 149, 150, 151, 153, 155, 168, 169, 172, 173, 176, 177, 179, 181, 182, 183, 185, 187, 200, 201, 203, 205, 206, 207, 209, 211]
Query 39  Cost:  489417  Intersected Partitions: [4, 5, 33, 37, 39, 43, 60, 61, 89, 93, 95, 99]
Query 40  Cost:  121894  Intersected Partitions: [116, 145, 149]
Query 41  Cost:  1232746  Intersected Partitions: [112, 113, 116, 117, 120, 121, 123, 125, 126, 127, 129, 131, 144, 145, 147, 149, 150, 151, 153, 155, 168, 169, 172, 173, 176, 177, 179, 181, 182, 183, 185, 187, 200, 201, 203, 205, 206, 207, 209, 211]
Query 42  Cost:  349151  Intersected Partitions: [61, 63, 173, 175]
Query 43  Cost:  348640  Intersected Partitions: [5, 7, 117, 119]
Query 44  Cost:  348812  Intersected Partitions: [118, 11

Query 18  Cost:  1353708  Intersected Partitions: [15, 23, 29, 36, 37, 42, 44, 46, 47, 52, 53, 55, 58, 60, 61, 68, 71, 89, 92, 93, 94, 99, 101, 104, 106, 108, 109, 113, 114, 116, 119, 121, 122, 124, 135, 141, 149, 154, 156, 160, 161, 163, 165, 167, 168, 176, 182, 187, 191, 192, 196, 198, 202, 204, 208, 209, 210, 211, 213, 214, 215, 227, 228, 229, 238, 240, 242, 254, 262, 263, 274, 280, 283, 286, 287, 289, 290, 318, 329, 335]
Query 19  Cost:  475738  Intersected Partitions: [12, 18, 22, 26, 30, 50, 54, 72, 76, 85, 87, 89, 91, 106, 115, 117, 119, 121, 123, 143, 151, 162, 165, 177, 188, 190, 191, 199, 202, 211, 213, 214, 256, 261, 275, 289, 327]
Query 20  Cost:  206750  Intersected Partitions: [4, 5, 7, 12, 14, 30, 36, 39, 42, 76, 89, 104, 128]
Query 21  Cost:  6001309  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,

Query 0  Cost:  386163  Intersected Partitions: [19, 26, 39, 47, 65, 73, 74, 81, 96, 97, 98, 99, 115, 128, 136, 137, 140, 149, 179, 209, 210, 211, 212, 294, 309, 310, 378]
Query 1  Cost:  136549  Intersected Partitions: [243, 288, 355, 356, 364, 365, 384, 387, 388]
Query 2  Cost:  48401  Intersected Partitions: [79, 202, 209, 228]
Query 3  Cost:  2730501  Intersected Partitions: [0, 3, 5, 7, 8, 10, 11, 13, 15, 18, 20, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 36, 38, 40, 45, 46, 48, 49, 50, 51, 53, 55, 57, 58, 61, 62, 63, 64, 66, 68, 70, 72, 75, 76, 77, 78, 80, 82, 83, 84, 85, 90, 92, 93, 95, 100, 103, 104, 106, 107, 109, 110, 111, 114, 116, 125, 126, 127, 129, 134, 135, 138, 139, 141, 145, 147, 148, 150, 153, 154, 156, 157, 159, 160, 161, 162, 166, 167, 168, 169, 174, 175, 176, 177, 180, 182, 184, 185, 186, 187, 188, 189, 190, 194, 195, 203, 205, 206, 207, 217, 218, 223, 225, 226, 227, 229, 230, 231, 232, 234, 235, 236, 237, 238, 240, 246, 247, 248, 253, 254, 256, 257, 259, 262, 263, 26

Query 6  Cost:  2786511  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119, 168, 169, 170, 171, 172, 173, 174, 175]
Query 7  Cost:  6001309  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174,

Query 0  Cost:  28025  Intersected Partitions: [345, 364]
Query 1  Cost:  214611  Intersected Partitions: [276, 278, 283, 284, 286, 296, 299, 306, 313, 320, 321, 323, 330, 331, 334, 335]
Query 2  Cost:  1636123  Intersected Partitions: [273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 379, 380, 381, 386, 387, 388, 389, 390, 391, 393, 394, 395, 396, 397, 398, 399, 400, 403, 404, 405]
Query 3  Cost:  65705  Intersected Partitions: [33, 102, 224, 373]
Query 4  Cost:  375944  Intersected Partitions: [0, 1, 6, 14, 16, 18, 23, 26, 34, 43, 44, 88, 11

Query 0  Cost:  163381  Intersected Partitions: [215, 217, 223, 226, 233, 242, 245, 251, 256, 262, 278]
Query 1  Cost:  421441  Intersected Partitions: [50, 51, 53, 55, 57, 59, 60, 61, 62, 66, 67, 68, 69, 70, 71, 72, 73, 74, 77, 79, 81, 82, 83, 85, 86, 87, 89, 92, 93, 94]
Query 2  Cost:  1873492  Intersected Partitions: [207, 208, 210, 211, 212, 215, 217, 219, 220, 221, 223, 224, 226, 228, 229, 230, 233, 234, 235, 239, 241, 242, 243, 245, 247, 248, 251, 253, 254, 255, 256, 257, 258, 259, 262, 263, 265, 266, 268, 270, 271, 272, 275, 276, 277, 278, 279, 280, 282, 283, 286, 287, 288, 290, 291, 292, 295, 296, 297, 298, 299, 302, 304, 306, 307, 308, 309, 311, 313, 314, 315, 318, 320, 322, 323, 325, 326, 327, 328, 329, 330, 332, 333, 336, 337, 338, 339, 341, 343, 344, 345, 346, 347, 350, 351, 353, 354, 356, 357, 358, 360, 361, 362, 363, 365, 366, 367, 370, 371, 372, 374, 375, 378, 380, 381, 382, 383, 384, 386, 387, 388, 390, 391, 392, 393, 396, 397, 398, 399, 400, 401, 402]
Query 3  Cost:  4

Query 0  Cost:  578459  Intersected Partitions: [211, 214, 217, 218, 219, 221, 222, 223, 225, 230, 232, 233, 234, 235, 237, 238, 243, 244, 247, 248, 250, 251, 252, 253, 254, 311, 312, 315, 316, 319, 320, 324, 325, 328, 334, 338, 343, 349, 350, 353, 354, 356]
Query 1  Cost:  5670223  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 1

Query 0  Cost:  96700  Intersected Partitions: [196, 197, 198, 199, 206, 311, 323, 338]
Query 1  Cost:  114546  Intersected Partitions: [12, 13, 17, 28, 49, 51, 61, 86]
Query 2  Cost:  1599330  Intersected Partitions: [26, 37, 38, 39, 46, 47, 48, 55, 57, 63, 65, 66, 68, 70, 71, 72, 73, 74, 75, 79, 82, 83, 84, 85, 86, 89, 90, 91, 92, 93, 95, 96, 97, 99, 100, 102, 103, 104, 109, 113, 114, 120, 123, 125, 128, 129, 138, 141, 142, 148, 149, 150, 151, 152, 153, 156, 174, 175, 176, 181, 182, 183, 184, 196, 197, 198, 199, 200, 201, 205, 206, 207, 208, 233, 234, 235, 236, 238, 242, 246, 250, 273, 274, 275, 276, 288, 289, 290, 291, 296, 297, 298, 299, 311, 312, 322, 323, 324, 325, 327, 328, 329, 330, 332, 338, 340, 341, 344, 346, 347, 348, 349]
Query 3  Cost:  30971  Intersected Partitions: [163, 170]
Query 4  Cost:  204541  Intersected Partitions: [3, 6, 14, 15, 18, 37, 40, 42, 53, 67, 78, 83, 84, 99]
Query 5  Cost:  490812  Intersected Partitions: [19, 47, 48, 57, 58, 69, 73, 75, 79, 89, 90, 9

Query 4  Cost:  135657  Intersected Partitions: [60, 88, 90, 91]
Query 5  Cost:  964839  Intersected Partitions: [0, 2, 4, 6, 8, 9, 20, 21, 32, 33, 44, 45, 112, 114, 116, 118, 120, 121, 132, 133, 144, 145, 156, 157]
Query 6  Cost:  6001309  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,

Query 0  Cost:  1034995  Intersected Partitions: [13, 18, 30, 31, 36, 45, 47, 52, 83, 89, 94, 95, 101, 110, 141, 142, 146, 150, 154, 180, 194, 195, 212, 213, 217, 225, 227, 249, 251, 255, 256, 257, 258, 261, 278, 281, 282, 297, 298, 301, 303, 304, 305, 309, 317, 318, 323, 324, 332, 334, 339, 340, 341, 342, 343, 345, 346, 362, 363, 366, 369, 370, 372, 373, 374, 384, 385, 387, 388, 389, 395, 396, 397, 398, 403, 404]
Query 1  Cost:  26285  Intersected Partitions: [16, 136]
Query 2  Cost:  29106  Intersected Partitions: [5, 7]
Query 3  Cost:  94171  Intersected Partitions: [3, 7, 12, 39, 43, 114, 198]
Query 4  Cost:  128346  Intersected Partitions: [52, 227, 258, 282, 318, 323, 374, 385, 389]
Query 5  Cost:  695045  Intersected Partitions: [0, 4, 8, 9, 18, 22, 26, 30, 31, 36, 40, 44, 51, 58, 62, 63, 71, 77, 79, 85, 90, 109, 116, 118, 129, 134, 140, 146, 149, 161, 172, 211, 212, 213, 217, 220, 222, 224, 230, 240, 255, 256, 260, 317, 318, 337, 340, 342]
Query 6  Cost:  203371  Intersected Pa

Query 0  Cost:  1844923  Intersected Partitions: [0, 8, 11, 12, 13, 18, 20, 23, 24, 27, 28, 29, 30, 31, 32, 34, 47, 50, 51, 54, 57, 59, 60, 61, 62, 64, 65, 67, 68, 69, 70, 71, 73, 74, 75, 77, 78, 79, 83, 84, 88, 89, 94, 96, 97, 98, 99, 100, 101, 104, 105, 120, 121, 130, 134, 135, 138, 141, 145, 146, 148, 149, 156, 157, 174, 179, 180, 181, 182, 183, 184, 185, 194, 195, 204, 206, 207, 209, 210, 211, 212, 216, 221, 222, 224, 225, 227, 228, 229, 230, 231, 236, 239, 241, 243, 244, 245, 248, 249, 250, 252, 253, 254, 256, 257, 258, 259, 260, 263, 267, 273, 274, 276, 277, 278, 283, 288, 292, 297, 298, 315, 317, 319, 320, 322, 327, 329, 332, 340, 343, 349, 355, 359, 364, 366, 368, 379]
Query 1  Cost:  626705  Intersected Partitions: [1, 7, 26, 37, 64, 65, 66, 68, 69, 70, 79, 80, 95, 106, 114, 115, 136, 140, 161, 175, 188, 189, 200, 210, 215, 226, 239, 240, 256, 257, 258, 266, 270, 271, 287, 291, 301, 318, 321, 337, 338, 354, 362, 372, 382, 390, 397, 412]
Query 2  Cost:  1800789  Intersected Par

Query 11  Cost:  141567  Intersected Partitions: [36, 37, 48, 49, 148, 149, 160, 161]
Query 12  Cost:  268710  Intersected Partitions: [120, 121, 126, 127, 144, 145, 150, 151, 176, 177, 182, 183, 200, 201, 206, 207]
Query 13  Cost:  141567  Intersected Partitions: [36, 37, 48, 49, 148, 149, 160, 161]
Query 14  Cost:  1232739  Intersected Partitions: [112, 113, 116, 117, 120, 121, 122, 124, 126, 127, 128, 130, 144, 145, 146, 148, 150, 151, 152, 154, 168, 169, 172, 173, 176, 177, 178, 180, 182, 183, 184, 186, 200, 201, 202, 204, 206, 207, 208, 210]
Query 15  Cost:  6001309  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 1

Query 0  Cost:  274727  Intersected Partitions: [15, 36, 131, 208, 211, 251, 258, 259, 285, 301, 305, 319, 322, 357, 362, 392, 410, 411]
Query 1  Cost:  5670223  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184

Query 44  Cost:  2465233  Intersected Partitions: [2, 3, 6, 7, 20, 21, 23, 25, 26, 27, 29, 31, 44, 45, 47, 49, 50, 51, 53, 55, 58, 59, 62, 63, 76, 77, 79, 81, 82, 83, 85, 87, 100, 101, 103, 105, 106, 107, 109, 111, 112, 113, 116, 117, 120, 121, 122, 124, 126, 127, 128, 130, 144, 145, 146, 148, 150, 151, 152, 154, 168, 169, 172, 173, 176, 177, 178, 180, 182, 183, 184, 186, 200, 201, 202, 204, 206, 207, 208, 210]
Query 45  Cost:  347950  Intersected Partitions: [56, 58, 168, 170]
Query 46  Cost:  6001309  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 1

Query 17  Cost:  521683  Intersected Partitions: [7, 13, 15, 17, 23, 29, 31, 33, 39, 41, 45, 47, 55, 57, 61, 63, 71, 80, 86, 88, 92, 94, 107, 111, 115, 120, 122, 144, 154, 191, 197, 199, 208, 246]
Query 18  Cost:  889931  Intersected Partitions: [19, 34, 37, 38, 39, 45, 56, 72, 82, 89, 90, 93, 97, 100, 102, 107, 115, 119, 121, 141, 146, 149, 151, 161, 164, 169, 172, 173, 174, 178, 183, 190, 221, 225, 234, 249, 257]
Query 19  Cost:  835684  Intersected Partitions: [2, 3, 6, 7, 14, 15, 18, 19, 30, 31, 34, 35, 38, 39, 50, 51, 63, 66, 70, 71, 77, 78, 81, 93, 97, 101, 102, 124, 125, 128, 129, 143, 145, 156, 159, 160, 168, 175, 178, 195, 200, 206, 213, 215, 228, 237, 240, 247]
Query 20  Cost:  610431  Intersected Partitions: [2, 3, 6, 7, 14, 15, 18, 19, 30, 31, 34, 35, 38, 39, 50, 51, 63, 70, 71, 77, 78, 81, 93, 101, 102, 124, 125, 129, 143, 145, 159, 160, 168, 175, 195, 200, 213]
Query 21  Cost:  2493449  Intersected Partitions: [9, 20, 21, 24, 28, 29, 36, 41, 44, 45, 53, 56, 57, 60, 61, 68

Query 24  Cost:  2142489  Intersected Partitions: [10, 11, 12, 13, 16, 17, 18, 19, 22, 23, 24, 25, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 43, 46, 47, 48, 49, 52, 53, 54, 55, 66, 67, 68, 69, 72, 73, 74, 75, 78, 79, 80, 81, 84, 85, 86, 87, 90, 91, 92, 93, 96, 97, 98, 99, 102, 103, 104, 105, 108, 109, 110, 111, 122, 123, 124, 125, 128, 129, 130, 131, 134, 135, 136, 137, 140, 141, 142, 143, 146, 147, 148, 149, 152, 153, 154, 155, 158, 159, 160, 161, 164, 165, 166, 167, 178, 179, 180, 181, 184, 185, 186, 187, 190, 191, 192, 193, 196, 197, 198, 199, 202, 203, 204, 205, 208, 209, 210, 211, 214, 215, 216, 217, 220, 221, 222, 223]
Query 25  Cost:  154084  Intersected Partitions: [118, 156, 157, 158, 160]
Query 26  Cost:  1120621  Intersected Partitions: [4, 5, 6, 7, 33, 36, 37, 39, 42, 43, 45, 48, 49, 51, 54, 55, 116, 117, 118, 119, 145, 148, 149, 151, 154, 155, 157, 160, 161, 163, 166, 167]
Query 27  Cost:  17470  Intersected Partitions: [133]
Query 28  Cost:  1072309  Intersected Partiti

Query 41  Cost:  556252  Intersected Partitions: [56, 58, 65, 68, 69, 77, 80, 81, 168, 170, 177, 180, 181, 189, 192, 193]
Query 42  Cost:  1081159  Intersected Partitions: [12, 13, 18, 19, 24, 25, 30, 31, 34, 35, 40, 41, 46, 47, 52, 53, 68, 69, 74, 75, 80, 81, 86, 87, 90, 91, 96, 97, 102, 103, 108, 109, 124, 125, 130, 131, 136, 137, 142, 143, 146, 147, 152, 153, 158, 159, 164, 165, 180, 181, 186, 187, 192, 193, 198, 199, 202, 203, 208, 209, 214, 215, 220, 221]
Query 43  Cost:  2465233  Intersected Partitions: [2, 3, 6, 7, 20, 21, 23, 25, 26, 27, 29, 31, 44, 45, 47, 49, 50, 51, 53, 55, 58, 59, 62, 63, 76, 77, 79, 81, 82, 83, 85, 87, 100, 101, 103, 105, 106, 107, 109, 111, 112, 113, 116, 117, 120, 121, 122, 124, 126, 127, 128, 130, 144, 145, 146, 148, 150, 151, 152, 154, 168, 169, 172, 173, 176, 177, 178, 180, 182, 183, 184, 186, 200, 201, 202, 204, 206, 207, 208, 210]
Query 44  Cost:  562294  Intersected Partitions: [36, 37, 42, 43, 48, 49, 54, 55, 92, 93, 98, 99, 104, 105, 110, 111, 14

Query 0  Cost:  306741  Intersected Partitions: [107, 133, 185, 237, 274, 275, 276, 277, 278, 297, 302, 303, 330, 338, 339, 349, 350, 351, 352, 353, 354, 363]
Query 1  Cost:  11410  Intersected Partitions: [338]
Query 2  Cost:  585700  Intersected Partitions: [24, 25, 30, 31, 80, 81, 84, 85, 90, 162, 163, 166, 167, 170, 224, 225, 230, 295, 335]
Query 3  Cost:  11778  Intersected Partitions: [222]
Query 4  Cost:  2537289  Intersected Partitions: [0, 1, 2, 6, 7, 11, 13, 16, 18, 23, 24, 25, 29, 30, 31, 37, 38, 40, 41, 44, 48, 49, 50, 54, 55, 61, 63, 64, 68, 69, 72, 80, 81, 84, 85, 89, 90, 94, 95, 98, 101, 103, 108, 110, 113, 115, 119, 120, 121, 122, 126, 130, 132, 134, 138, 139, 142, 143, 147, 148, 151, 152, 155, 162, 163, 166, 167, 170, 174, 176, 177, 180, 182, 186, 188, 191, 195, 197, 198, 199, 202, 206, 210, 212, 213, 215, 217, 219, 220, 223, 224, 225, 230, 232, 233, 236, 237, 240, 242, 243, 246, 250, 251, 252, 254, 255, 259, 260, 263, 265, 267, 270, 275, 276, 277, 281, 282, 284, 288, 

Query 0  Cost:  614058  Intersected Partitions: [100, 105, 107, 111, 114, 116, 119, 121, 123, 126, 129, 134, 135, 137, 141, 151, 152, 162, 171, 175, 177, 182, 186, 200, 206, 207, 209, 210, 217, 221, 227, 229, 233, 237, 241, 243, 250, 257, 263, 291, 296, 334]
Query 1  Cost:  5670223  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 1

Query 0  Cost:  5670223  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 

Query 0  Cost:  65803  Intersected Partitions: [321, 331, 332, 339, 340]
Query 1  Cost:  744627  Intersected Partitions: [202, 203, 204, 205, 206, 207, 208, 209, 211, 212, 214, 215, 216, 217, 218, 220, 221, 224, 225, 227, 228, 230, 231, 232, 234, 235, 236, 237, 238, 239, 240, 241, 245, 247, 248, 249, 250, 251, 253, 257, 262, 263, 264, 265, 269, 270, 283, 287, 289, 290, 292, 298, 299]
Query 2  Cost:  5670223  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134

Query 0  Cost:  442896  Intersected Partitions: [19, 20, 32, 42, 43, 72, 81, 82, 101, 116, 124, 143, 156, 157, 161, 163, 165, 183, 194, 195, 215, 216, 241, 248, 260, 262, 283, 284, 292, 303, 310, 320, 327, 366]
Query 1  Cost:  55135  Intersected Partitions: [40, 54, 72, 135]
Query 2  Cost:  438998  Intersected Partitions: [17, 18, 23, 30, 34, 40, 41, 70, 75, 79, 80, 99, 100, 107, 114, 118, 122, 145, 146, 149, 152, 160, 186, 190, 206, 226, 231, 236, 252, 275, 296]
Query 3  Cost:  5670223  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 11

Query 22  Cost:  530348  Intersected Partitions: [34, 35, 40, 41, 46, 47, 52, 53, 90, 91, 96, 97, 102, 103, 108, 109, 146, 147, 152, 153, 158, 159, 164, 165, 202, 203, 208, 209, 214, 215, 220, 221]
Query 23  Cost:  119274  Intersected Partitions: [61, 94, 96]
Query 24  Cost:  249511  Intersected Partitions: [8, 14, 20, 26, 64, 70, 76, 82, 120, 126, 132, 138, 176, 182, 188, 194]
Query 25  Cost:  3000656  Intersected Partitions: [1, 3, 5, 7, 14, 15, 16, 17, 18, 19, 26, 27, 28, 29, 30, 31, 38, 39, 40, 41, 42, 43, 50, 51, 52, 53, 54, 55, 56, 58, 60, 62, 64, 65, 66, 67, 68, 69, 76, 77, 78, 79, 80, 81, 88, 89, 90, 91, 92, 93, 100, 101, 102, 103, 104, 105, 113, 115, 117, 119, 126, 127, 128, 129, 130, 131, 138, 139, 140, 141, 142, 143, 150, 151, 152, 153, 154, 155, 162, 163, 164, 165, 166, 167, 168, 170, 172, 174, 176, 177, 178, 179, 180, 181, 188, 189, 190, 191, 192, 193, 200, 201, 202, 203, 204, 205, 212, 213, 214, 215, 216, 217]
Query 26  Cost:  2241614  Intersected Partitions: [4, 5, 6, 7,

Query 11  Cost:  222169  Intersected Partitions: [0, 8, 18, 19, 33, 49, 50, 63, 80, 91, 119, 145, 200, 237, 263]
Query 12  Cost:  614229  Intersected Partitions: [12, 28, 34, 38, 42, 43, 44, 52, 59, 60, 74, 76, 85, 86, 94, 95, 106, 113, 114, 139, 147, 160, 171, 202, 213, 219, 225, 231, 248, 249, 257, 264, 272, 280, 285]
Query 13  Cost:  177021  Intersected Partitions: [12, 28, 44, 52, 59, 60, 74, 94, 106, 113, 114, 139]
Query 14  Cost:  980435  Intersected Partitions: [45, 100, 114, 116, 132, 142, 151, 156, 159, 169, 175, 177, 186, 211, 221, 224, 233, 238, 239, 243, 244, 247, 249, 253, 259, 260, 265, 271, 277, 282, 286, 288, 290]
Query 15  Cost:  1901000  Intersected Partitions: [14, 16, 20, 28, 30, 45, 47, 54, 55, 56, 61, 70, 89, 95, 98, 100, 102, 105, 108, 113, 114, 115, 116, 126, 132, 135, 136, 140, 142, 144, 146, 151, 155, 156, 159, 161, 165, 166, 169, 170, 171, 175, 176, 177, 179, 181, 183, 186, 189, 195, 199, 206, 210, 211, 215, 217, 220, 221, 224, 230, 232, 233, 238, 239, 242, 2

Query 0  Cost:  406193  Intersected Partitions: [4, 6, 9, 30, 55, 77, 95, 101, 148, 149, 150, 167, 180, 225, 234, 255, 264, 272, 301, 314, 315, 320, 321, 333, 334, 338, 339]
Query 1  Cost:  1017254  Intersected Partitions: [14, 16, 23, 32, 39, 41, 45, 59, 61, 66, 67, 69, 80, 81, 87, 90, 93, 114, 115, 117, 138, 143, 144, 147, 152, 162, 163, 171, 173, 176, 177, 179, 186, 187, 188, 189, 196, 197, 207, 208, 209, 210, 214, 215, 217, 222, 226, 229, 231, 252, 253, 254, 261, 262, 265, 268, 269, 270, 271, 279, 280, 281, 282, 287, 294, 295, 296, 297, 299, 300, 304, 307, 313, 317, 332, 337, 348, 350]
Query 2  Cost:  75341  Intersected Partitions: [239, 240, 265, 276, 277, 302]
Query 3  Cost:  59915  Intersected Partitions: [6, 50, 61, 142]
Query 4  Cost:  887527  Intersected Partitions: [10, 44, 56, 97, 98, 99, 100, 121, 124, 125, 126, 127, 134, 135, 136, 137, 155, 156, 157, 158, 160, 168, 184, 191, 238, 241, 244, 245, 259, 274, 275, 276, 277, 285, 289, 292, 302, 311, 312, 323, 326, 327, 328, 340

Query 0  Cost:  229428  Intersected Partitions: [264, 267, 270, 271, 272, 273, 276, 277, 278, 279, 283, 284, 286, 287, 288, 289, 290, 291]
Query 1  Cost:  73927  Intersected Partitions: [312, 316, 324, 334, 343, 345]
Query 2  Cost:  5670223  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168

Query 43  Cost:  283747  Intersected Partitions: [33, 39, 45, 51, 89, 95, 101, 107, 145, 151, 157, 163, 201, 207, 213, 219]
Query 44  Cost:  2142489  Intersected Partitions: [10, 11, 12, 13, 16, 17, 18, 19, 22, 23, 24, 25, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 43, 46, 47, 48, 49, 52, 53, 54, 55, 66, 67, 68, 69, 72, 73, 74, 75, 78, 79, 80, 81, 84, 85, 86, 87, 90, 91, 92, 93, 96, 97, 98, 99, 102, 103, 104, 105, 108, 109, 110, 111, 122, 123, 124, 125, 128, 129, 130, 131, 134, 135, 136, 137, 140, 141, 142, 143, 146, 147, 148, 149, 152, 153, 154, 155, 158, 159, 160, 161, 164, 165, 166, 167, 178, 179, 180, 181, 184, 185, 186, 187, 190, 191, 192, 193, 196, 197, 198, 199, 202, 203, 204, 205, 208, 209, 210, 211, 214, 215, 216, 217, 220, 221, 222, 223]
Query 45  Cost:  16114  Intersected Partitions: [78]
Query 46  Cost:  1072183  Intersected Partitions: [0, 1, 2, 3, 8, 10, 11, 14, 16, 17, 20, 22, 23, 26, 28, 29, 112, 113, 114, 115, 120, 122, 123, 126, 128, 129, 132, 134, 135, 138, 140, 141

Query 43  Cost:  1500327  Intersected Partitions: [112, 113, 116, 117, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 168, 169, 172, 173, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211]
Query 44  Cost:  548531  Intersected Partitions: [61, 63, 94, 96, 97, 106, 108, 109, 173, 175, 206, 208, 209, 218, 220, 221]
Query 45  Cost:  35017  Intersected Partitions: [154, 155]
Query 46  Cost:  548531  Intersected Partitions: [61, 63, 94, 96, 97, 106, 108, 109, 173, 175, 206, 208, 209, 218, 220, 221]
Query 47  Cost:  139407  Intersected Partitions: [42, 43, 54, 55, 154, 155, 166, 167]
Query 48  Cost:  141859  Intersected Partitions: [148, 149, 154, 155, 204, 205, 210, 211]
Query 49  Cost:  1072309  Intersected Partitions: [8, 9, 14, 15, 20, 21, 26, 27, 32, 33, 38, 39, 44, 45, 50, 51, 64, 65, 70, 71, 76, 77, 82, 83, 88, 89, 94, 95, 100, 101, 106, 107, 120

Query 0  Cost:  119364  Intersected Partitions: [332, 344, 349, 353, 361, 366, 373, 383, 392]
Query 1  Cost:  155141  Intersected Partitions: [118, 123, 144, 163, 255, 256, 291, 314, 355, 368, 397]
Query 2  Cost:  512015  Intersected Partitions: [11, 12, 23, 24, 35, 45, 47, 59, 62, 63, 70, 73, 78, 79, 83, 84, 85, 86, 87, 88, 89, 96, 97, 217, 235, 264, 274, 293, 308, 309, 352, 353, 366, 367, 368, 381, 382]
Query 3  Cost:  1080024  Intersected Partitions: [4, 5, 7, 12, 15, 16, 29, 37, 40, 60, 67, 68, 73, 77, 79, 84, 86, 88, 91, 93, 97, 106, 112, 118, 123, 130, 131, 137, 144, 145, 153, 158, 159, 168, 169, 174, 175, 182, 183, 190, 191, 208, 209, 211, 218, 219, 231, 232, 235, 245, 255, 256, 258, 259, 260, 261, 272, 283, 284, 307, 309, 312, 314, 324, 332, 337, 344, 346, 349, 353, 355, 361, 366, 373, 380, 383, 392, 397, 401]
Query 4  Cost:  5670223  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32

Query 0  Cost:  2680843  Intersected Partitions: [0, 1, 2, 4, 6, 7, 8, 9, 11, 14, 16, 18, 19, 20, 21, 23, 26, 28, 30, 32, 34, 38, 39, 40, 41, 42, 43, 48, 49, 51, 61, 62, 63, 65, 67, 70, 77, 78, 79, 81, 91, 93, 105, 106, 107, 108, 111, 112, 113, 114, 115, 120, 121, 126, 130, 132, 134, 138, 142, 144, 147, 149, 150, 151, 153, 154, 157, 159, 160, 161, 162, 165, 170, 171, 177, 178, 179, 180, 181, 187, 192, 197, 199, 200, 201, 202, 203, 209, 213, 214, 220, 221, 227, 228, 229, 230, 231, 232, 233, 234, 236, 238, 239, 240, 241, 242, 243, 244, 245, 247, 248, 250, 252, 253, 254, 255, 257, 260, 264, 266, 267, 272, 274, 275, 279, 282, 283, 284, 285, 286, 287, 288, 290, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 303, 305, 307, 308, 311, 314, 316, 318, 319, 321, 322, 324, 326, 330, 333, 337, 338, 339, 345]
Query 1  Cost:  80487  Intersected Partitions: [6, 21, 38, 39, 79, 113]
Query 2  Cost:  1504812  Intersected Partitions: [6, 13, 14, 15, 23, 24, 31, 32, 33, 38, 40, 45, 49, 50, 53, 60, 63, 6

Query 0  Cost:  142616  Intersected Partitions: [176, 177, 178, 179, 180, 183, 184, 187, 188, 191, 192]
Query 1  Cost:  148309  Intersected Partitions: [15, 30, 54, 65, 66, 100, 173, 178, 180, 234, 242, 247]
Query 2  Cost:  194257  Intersected Partitions: [2, 5, 18, 21, 35, 48, 57, 104]
Query 3  Cost:  788524  Intersected Partitions: [7, 9, 38, 39, 61, 62, 71, 72, 73, 77, 78, 79, 83, 85, 87, 96, 115, 118, 119, 183, 185, 189, 195, 203, 206, 215, 220, 221, 244, 245, 253, 257, 258, 282, 289, 290, 291, 299, 300, 309, 317, 328, 336, 337, 338, 344, 345, 347, 351, 355, 364, 367, 368, 373, 374, 377]
Query 4  Cost:  494049  Intersected Partitions: [10, 15, 16, 31, 32, 43, 65, 66, 90, 91, 173, 178, 196, 214, 219, 234, 247, 280, 281, 285, 298, 302, 318, 322, 328, 329, 336, 337, 338, 347, 351, 357, 364, 367, 368, 373, 374, 377]
Query 5  Cost:  565258  Intersected Partitions: [4, 5, 13, 14, 19, 20, 21, 22, 42, 59, 60, 89, 102, 105, 106, 107, 108, 116, 134, 135, 144, 162, 163, 165, 272, 279, 286, 29

Query 11  Cost:  741524  Intersected Partitions: [56, 58, 64, 65, 66, 67, 68, 69, 76, 77, 78, 79, 80, 81, 168, 170, 176, 177, 178, 179, 180, 181, 188, 189, 190, 191, 192, 193]
Query 12  Cost:  535901  Intersected Partitions: [66, 67, 68, 69, 78, 79, 80, 81, 90, 91, 92, 93, 102, 103, 104, 105, 178, 179, 180, 181, 190, 191, 192, 193, 202, 203, 204, 205, 214, 215, 216, 217]
Query 13  Cost:  1395573  Intersected Partitions: [4, 5, 6, 7, 60, 61, 62, 63, 116, 117, 118, 119, 172, 173, 174, 175]
Query 14  Cost:  140885  Intersected Partitions: [149, 155, 160, 166, 205, 211, 216, 222]
Query 15  Cost:  348811  Intersected Partitions: [116, 117, 172, 173]
Query 16  Cost:  1395573  Intersected Partitions: [4, 5, 6, 7, 60, 61, 62, 63, 116, 117, 118, 119, 172, 173, 174, 175]
Query 17  Cost:  260882  Intersected Partitions: [112, 116, 117]
Query 18  Cost:  1500329  Intersected Partitions: [2, 3, 6, 7, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 

Query 44  Cost:  134038  Intersected Partitions: [133, 139, 156, 162, 189, 195, 212, 218]
Query 45  Cost:  562294  Intersected Partitions: [36, 37, 42, 43, 48, 49, 54, 55, 92, 93, 98, 99, 104, 105, 110, 111, 148, 149, 154, 155, 160, 161, 166, 167, 204, 205, 210, 211, 216, 217, 222, 223]
Query 46  Cost:  3214798  Intersected Partitions: [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 176, 177, 178, 179, 180, 18

Query 0  Cost:  1779168  Intersected Partitions: [2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 20, 33, 39, 40, 47, 52, 53, 54, 55, 56, 58, 59, 60, 61, 62, 65, 66, 68, 69, 70, 72, 73, 108, 109, 110, 111, 117, 118, 119, 120, 123, 127, 129, 130, 133, 135, 136, 139, 141, 142, 145, 147, 148, 154, 169, 170, 171, 172, 175, 178, 179, 180, 191, 192, 193, 194, 197, 198, 199, 201, 204, 208, 210, 222, 223, 224, 225, 231, 239, 241, 243, 246, 248, 249, 250, 251, 259, 260, 261, 262, 296, 300, 301, 303, 304, 306, 307, 308, 309, 310, 311, 312, 315, 316, 317, 319, 320, 324, 327, 328, 329, 330, 332, 333, 334, 335, 337, 339, 340, 341, 342, 344]
Query 1  Cost:  83278  Intersected Partitions: [35, 40, 56, 58, 60]
Query 2  Cost:  84067  Intersected Partitions: [270, 272, 276, 280, 293, 294]
Query 3  Cost:  1450114  Intersected Partitions: [0, 1, 2, 3, 6, 7, 10, 11, 12, 13, 16, 18, 19, 21, 23, 25, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 39, 41, 42, 45, 46, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 74, 75, 78, 79, 81

Query 0  Cost:  1298206  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101]
Query 1  Cost:  238681  Intersected Partitions: [169, 170, 171, 172, 185, 186, 187, 194, 195, 199, 200, 201, 202, 204, 205]
Query 2  Cost:  300353  Intersected Partitions: [1, 3, 7, 8, 23, 104, 105, 119, 130, 150, 208, 220, 229, 232, 239, 250, 251, 255, 279, 285]
Query 3  Cost:  103026  Intersected Partitions: [54, 55, 57, 61, 62, 63, 64, 72]
Query 4  Cost:  252578  Intersected Partitions: [345, 346, 347, 348, 349, 351, 353, 354, 355, 358, 360, 361, 364, 368, 369, 373, 374]
Query 5  Cost:  533943  Intersected Partitions: [4, 13, 17, 58, 63, 69, 86, 89, 94, 98, 101, 124, 147,

Query 43  Cost:  15038  Intersected Partitions: [387]
Query 44  Cost:  5670223  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 

Query 0  Cost:  32719  Intersected Partitions: [349, 403]
Query 1  Cost:  357067  Intersected Partitions: [6, 17, 25, 37, 59, 83, 84, 86, 100, 191, 192, 215, 222, 246, 266, 289, 302, 306, 316, 317, 329, 351, 355, 356, 373, 380, 400]
Query 2  Cost:  864379  Intersected Partitions: [5, 7, 11, 13, 24, 41, 43, 46, 52, 61, 62, 65, 68, 74, 85, 88, 101, 123, 124, 125, 126, 133, 137, 139, 142, 149, 159, 160, 163, 164, 171, 201, 212, 224, 231, 237, 238, 239, 240, 252, 253, 269, 278, 280, 284, 295, 297, 318, 320, 335, 339, 341, 343, 359, 364, 367, 372, 376, 377, 379, 385, 388, 389]
Query 3  Cost:  394107  Intersected Partitions: [198, 207, 208, 210, 214, 215, 221, 222, 224, 225, 226, 229, 230, 231, 235, 236, 238, 239, 240, 246, 248, 250, 253, 254, 256, 261, 262, 266, 270]
Query 4  Cost:  2805511  Intersected Partitions: [1, 6, 8, 10, 12, 17, 25, 31, 33, 37, 44, 45, 49, 50, 51, 59, 63, 64, 66, 67, 77, 78, 83, 84, 86, 87, 96, 97, 100, 102, 103, 104, 105, 106, 110, 111, 113, 114, 115, 116, 118, 119

Query 39  Cost:  121735  Intersected Partitions: [168, 177, 181]
Query 40  Cost:  6001309  Intersected Partitions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198,

In [56]:
# replay the saved queryset
workloads = load_exper_query(0)

for distribution_query, random_query in workloads:
    run_experiments(distribution_query, random_query)

query_index_min[k]: -3 query_index_max[k]: 10
(slice(query_index_min[k], query_index_max[k], 1),): (slice(-3, 10, 1),)
Query 0  Cost:  68036  Intersected Partitions: [5, 6, 10, 52, 73]
Query 1  Cost:  550946  Intersected Partitions: [41, 43, 45, 47, 66, 67, 114, 115, 117, 123, 124, 128, 130, 132, 135, 136, 141, 142, 152, 154, 155, 159, 160, 162, 163, 231, 232, 233, 234, 235, 237, 239, 240, 270, 311, 313, 337, 344, 348, 374, 380, 391]
Query 2  Cost:  10612  Intersected Partitions: [66]
Query 3  Cost:  43039  Intersected Partitions: [395, 396, 399]
Query 4  Cost:  354783  Intersected Partitions: [255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 267, 268, 286, 290, 293, 297, 320, 322, 323, 325, 351, 397, 398, 399, 400, 401]
Query 5  Cost:  203655  Intersected Partitions: [101, 105, 109, 113, 119, 120, 121, 122, 140, 147, 150, 151, 157, 178, 181]
Query 6  Cost:  155373  Intersected Partitions: [70, 75, 334, 336, 343, 347, 348, 367, 368, 374, 380]
Query 7  Cost:  129775  Intersected Partit

Query 0  Cost:  39952  Intersected Partitions: [143, 201, 202]
Query 1  Cost:  56436  Intersected Partitions: [57, 142]
Query 2  Cost:  36908  Intersected Partitions: [148, 224, 246]
Query 3  Cost:  189415  Intersected Partitions: [50, 56, 163, 270]
Query 4  Cost:  17724  Intersected Partitions: [305]
Query 5  Cost:  97958  Intersected Partitions: [46, 128, 139, 235, 239, 268, 300, 322]
Query 6  Cost:  22293  Intersected Partitions: [252, 253]
Query 7  Cost:  10280  Intersected Partitions: [87]
Query 8  Cost:  86341  Intersected Partitions: [125, 126]
Query 9  Cost:  74904  Intersected Partitions: [120, 121, 197, 198, 233]
Query 10  Cost:  18113  Intersected Partitions: [47]
Query 11  Cost:  86933  Intersected Partitions: [128, 130, 223, 316, 318, 326]
Query 12  Cost:  17380  Intersected Partitions: [147]
Query 13  Cost:  946355  Intersected Partitions: [11, 12, 18, 19, 25, 26, 32, 33, 43, 44, 45, 46, 47, 81, 82, 86, 90, 96, 102, 108, 113, 114, 115, 116, 130, 134, 147, 166, 172, 178, 1

Query 0  Cost:  22774  Intersected Partitions: [121, 122]
Query 1  Cost:  32267  Intersected Partitions: [363, 365]
Query 2  Cost:  66160  Intersected Partitions: [255, 274, 306, 308]
Query 3  Cost:  31368  Intersected Partitions: [22, 26]
Query 4  Cost:  1784908  Intersected Partitions: [0, 2, 6, 7, 12, 14, 15, 18, 19, 34, 35, 38, 41, 47, 55, 57, 59, 60, 62, 64, 65, 67, 69, 71, 72, 75, 76, 79, 80, 83, 84, 85, 86, 87, 88, 89, 90, 99, 100, 101, 102, 103, 104, 105, 106, 133, 134, 183, 185, 187, 188, 189, 190, 191, 194, 195, 196, 197, 198, 199, 207, 210, 211, 212, 213, 217, 218, 219, 223, 224, 225, 226, 227, 229, 231, 233, 246, 247, 248, 249, 251, 253, 255, 268, 269, 274, 277, 279, 281, 283, 284, 285, 286, 287, 289, 291, 295, 296, 297, 298, 303, 304, 305, 306, 309, 310, 311, 315, 316, 319, 320, 327, 329, 330]
Query 5  Cost:  302992  Intersected Partitions: [257, 262, 263, 276, 333, 334, 339, 340, 341, 348, 350, 351, 353]
Query 6  Cost:  39647  Intersected Partitions: [21, 32, 37]
Query 7 

Query 0  Cost:  190778  Intersected Partitions: [12, 150]
Query 1  Cost:  59136  Intersected Partitions: [203, 230, 240]
Query 2  Cost:  26003  Intersected Partitions: [231]
Query 3  Cost:  19328  Intersected Partitions: [181]
Query 4  Cost:  250216  Intersected Partitions: [151, 153, 157, 159, 171, 174, 176, 178, 180, 190, 192]
Query 5  Cost:  23022  Intersected Partitions: [212, 218]
Query 6  Cost:  44640  Intersected Partitions: [85, 87, 136]
Query 7  Cost:  170354  Intersected Partitions: [1, 9]
Query 8  Cost:  26002  Intersected Partitions: [228]
Query 9  Cost:  78850  Intersected Partitions: [153, 156, 157]
Query 10  Cost:  16601  Intersected Partitions: [171]
Query 11  Cost:  32777  Intersected Partitions: [51, 167, 211]
Query 12  Cost:  14399  Intersected Partitions: [202]
Query 13  Cost:  61843  Intersected Partitions: [8, 25, 171, 199]
Query 14  Cost:  415103  Intersected Partitions: [3, 8, 15, 25, 171, 198, 199]
Query 15  Cost:  32777  Intersected Partitions: [51, 167, 211]


Query 0  Cost:  61605  Intersected Partitions: [7, 89, 94, 226, 235]
Query 1  Cost:  39843  Intersected Partitions: [17, 19]
Query 2  Cost:  55850  Intersected Partitions: [180, 211, 236]
Query 3  Cost:  23424  Intersected Partitions: [335, 336]
Query 4  Cost:  45160  Intersected Partitions: [158]
Query 5  Cost:  10640  Intersected Partitions: [94]
Query 6  Cost:  74745  Intersected Partitions: [129, 130, 131, 133, 139]
Query 7  Cost:  133929  Intersected Partitions: [145, 155, 157, 177]
Query 8  Cost:  13990  Intersected Partitions: [216]
Query 9  Cost:  99352  Intersected Partitions: [31, 38, 149, 151, 153, 155, 214]
Query 10  Cost:  144548  Intersected Partitions: [278, 279, 280, 281, 283, 284, 285, 286, 288, 289, 290, 291, 292]
Query 11  Cost:  15002  Intersected Partitions: [155]
Query 12  Cost:  12047  Intersected Partitions: [1]
Query 13  Cost:  15002  Intersected Partitions: [155]
Query 14  Cost:  12047  Intersected Partitions: [1]
Query 15  Cost:  14582  Intersected Partitions

Query 0  Cost:  30602  Intersected Partitions: [257, 259]
Query 1  Cost:  97616  Intersected Partitions: [102, 138, 139, 140, 141, 142]
Query 2  Cost:  35666  Intersected Partitions: [146, 153]
Query 3  Cost:  45018  Intersected Partitions: [124, 142, 150]
Query 4  Cost:  44570  Intersected Partitions: [226, 228, 249]
Query 5  Cost:  15804  Intersected Partitions: [152]
Query 6  Cost:  19728  Intersected Partitions: [125]
Query 7  Cost:  52236  Intersected Partitions: [237, 246, 256]
Query 8  Cost:  346226  Intersected Partitions: [14, 16, 20, 22, 128, 130, 134, 136, 137, 143, 144, 150, 151, 152, 156]
Query 9  Cost:  415007  Intersected Partitions: [2, 3, 17, 58, 107, 120, 121, 134, 149, 159, 164, 166]
Query 10  Cost:  18846  Intersected Partitions: [18]
Query 11  Cost:  71468  Intersected Partitions: [30, 31, 61, 219, 233]
Query 12  Cost:  17141  Intersected Partitions: [184]
Query 13  Cost:  223430  Intersected Partitions: [17, 21, 54, 64, 104, 164, 165, 174, 175, 182, 184]
Query 14 

Query 0  Cost:  44344  Intersected Partitions: [260, 355, 356]
Query 1  Cost:  14929  Intersected Partitions: [134]
Query 2  Cost:  350718  Intersected Partitions: [280, 281, 282, 283, 284, 285, 286, 316, 323, 324, 326, 327, 328, 329, 330, 344, 353, 354, 355, 356, 358]
Query 3  Cost:  10715  Intersected Partitions: [111]
Query 4  Cost:  17106  Intersected Partitions: [79]
Query 5  Cost:  15012  Intersected Partitions: [198]
Query 6  Cost:  36447  Intersected Partitions: [330, 333, 335]
Query 7  Cost:  14911  Intersected Partitions: [85]
Query 8  Cost:  15924  Intersected Partitions: [95]
Query 9  Cost:  19757  Intersected Partitions: [235]
Query 10  Cost:  23905  Intersected Partitions: [216, 238]
Query 11  Cost:  10694  Intersected Partitions: [102]
Query 12  Cost:  10843  Intersected Partitions: [171]
Query 13  Cost:  35636  Intersected Partitions: [304, 314]
Query 14  Cost:  168658  Intersected Partitions: [99, 102, 105, 109, 226, 227, 229, 235, 328, 330]
Query 15  Cost:  17539  Int

Query 0  Cost:  376417  Intersected Partitions: [42, 44, 46, 62, 65, 67, 108, 110, 111, 120, 122, 123, 221, 222, 254, 256, 257, 262, 263, 304, 306, 307, 316, 318, 319]
Query 1  Cost:  147101  Intersected Partitions: [173, 175, 176, 177, 178, 181, 182, 184, 187]
Query 2  Cost:  172591  Intersected Partitions: [101, 139, 164, 178, 184, 187, 189]
Query 3  Cost:  26377  Intersected Partitions: [281]
Query 4  Cost:  39925  Intersected Partitions: [6, 132, 133]
Query 5  Cost:  15725  Intersected Partitions: [64]
Query 6  Cost:  39754  Intersected Partitions: [342]
Query 7  Cost:  172701  Intersected Partitions: [105, 106, 123, 129, 144, 146, 148, 194, 195, 205, 206, 221]
Query 8  Cost:  37675  Intersected Partitions: [317, 323]
Query 9  Cost:  136962  Intersected Partitions: [298, 299, 301, 302, 318, 320, 324, 326, 333, 348]
Query 10  Cost:  12124  Intersected Partitions: [77]
Query 11  Cost:  12124  Intersected Partitions: [77]
Query 12  Cost:  55897  Intersected Partitions: [172, 175, 197,

Query 0  Cost:  94041  Intersected Partitions: [200, 202, 204, 206, 211, 213]
Query 1  Cost:  54435  Intersected Partitions: [143, 154, 159]
Query 2  Cost:  50546  Intersected Partitions: [262, 263, 264, 265, 266]
Query 3  Cost:  254635  Intersected Partitions: [101, 103, 105, 107, 108, 214, 215, 238, 239, 320, 321, 336, 337]
Query 4  Cost:  31979  Intersected Partitions: [313, 314]
Query 5  Cost:  11687  Intersected Partitions: [229]
Query 6  Cost:  23191  Intersected Partitions: [150, 152]
Query 7  Cost:  138171  Intersected Partitions: [74, 75, 86, 87, 93, 94, 208, 213, 312, 314]
Query 8  Cost:  13919  Intersected Partitions: [89]
Query 9  Cost:  30495  Intersected Partitions: [320, 321]
Query 10  Cost:  40071  Intersected Partitions: [27, 39, 42]
Query 11  Cost:  352983  Intersected Partitions: [4, 6, 7, 10, 16, 17, 20, 21, 22, 166, 168, 169, 171, 172, 173, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286]
Query 12  Cost:  13445  Intersected Partitions: [134]
Query 13  Cost:  11348

In [178]:
# test some simple typical supervised classification algorihtm
from sklearn import neural_network
from sklearn.datasets import load_svmlight_file
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
import time

In [179]:
X = training_set.reshape(training_set.shape[0],-1)
y = np.array([0] * random_segmentation + [1] * (len(X) - random_segmentation))

In [180]:
nn = neural_network.MLPClassifier(hidden_layer_sizes=(64, 64, 64), max_iter=500)

start_training_time = time.time()

nn.fit(X, y)

end_training_time = time.time()

print("training time: ", end_training_time - start_training_time)

training time:  0.36500000953674316




In [181]:
X_test = testing_set.reshape(testing_set.shape[0],-1)
y_test = np.array([0] * (len(random_query) - random_segmentation) + [1] * (len(X_test) - (len(random_query) - random_segmentation)))

In [182]:
start_testing_time = time.time()
y_pred = nn.predict(X_test)
end_testing_time = time.time()
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
F1 = f1_score(y_test, y_pred)
print("accuracy: ", accuracy)
print("recall: ", recall)
print("F1: ", F1)
print("testing time: ", end_testing_time - start_testing_time)

accuracy:  0.82
recall:  0.875
F1:  0.8860759493670887
testing time:  0.0009706020355224609


In [183]:
y_pred # this make no sense in dim_prob = 0.5

array([1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1])