In [7]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import morton
import math
from hilbertcurve.hilbertcurve import HilbertCurve

In [8]:
# Create 2D data
df = pd.DataFrame(np.random.randint(0,65,size=(100, 2)), columns=list('AB'))
df.describe()

Unnamed: 0,A,B
count,100.0,100.0
mean,33.33,35.38
std,18.070599,18.810474
min,1.0,1.0
25%,17.75,20.0
50%,33.0,40.0
75%,48.25,51.25
max,64.0,64.0


In [9]:
# Morton algorithm
m = morton.Morton(dimensions=2, bits=64)
def set_value(data,key1,key2):    
    return m.pack(int(data[key1]), int(data[key2]))


In [10]:
hilbert_curve = HilbertCurve(64, 2, n_procs=-1)
def set_value_Hcurve(data,key1,key2,key3):
    points = data[[key1,key2]].to_numpy()
    distances = hilbert_curve.distances_from_points(points)
    data[key3] = pd.DataFrame(distances )
    return data

# Offset Experiment
H0: There is no offset value that could move the CSP consistently 
HA: There is one offset value that could move the CSP consistently

In [11]:
def get_std_with_different_offset(data,key1,key2,algorithm,offset):
    key1_offset=key1+"_offset_"+str(offset)
    key2_offset=key2+"_offset_"+str(offset)
    data[key1_offset]=data[key1].add(offset)
    data[key2_offset]=data[key2].add(offset)
    if algorithm=='morton':
        data['morton_origin']=data.apply(set_value,args=(key1,key2),axis=1)
        data['morton_offset']=data.apply(set_value,args=(key1_offset,key2_offset),axis=1)
        data['offset_delta']=data['morton_offset']-data['morton_origin']
        return data['offset_delta'].std()
    elif algorithm=='hilbert':
        data=set_value_Hcurve(data,key1,key2,'hilbert_origin')
        data=set_value_Hcurve(data,key1_offset,key2_offset,'hilbert_offset')
        data['offset_delta']=data['hilbert_offset']-data['hilbert_origin']
        return data['offset_delta'].std()
    else:
        print("Please use either morton or hibert as algorithm parameter")

## Morton offset

In [14]:
for i in range(2,200):
    std = get_std_with_different_offset(df,'A','B','morton',i)
    if std==0:
        print('Found the offset that reject null hypothesis: '+str(i))
        break
    print('offset: '+str(i)+', std is: '+str(std) )


Found the offset that reject null hypothesis: 128


## Hilbert offset

In [15]:
for i in range(2,500):
    std = get_std_with_different_offset(df,'A','B','hilbert',i)
    if std==0:
        print('Found the offset that reject null hypothesis: '+str(i))
        break
    print('offset: '+str(i)+', std is: '+str(std) )


Found the offset that reject null hypothesis: 384


# Multiplier Experiment