In [7]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import morton
import math
from hilbertcurve.hilbertcurve import HilbertCurve

In [8]:
# Create 2D data
df = pd.DataFrame(np.random.randint(0,65,size=(100, 2)), columns=list('AB'))
df.describe()

Unnamed: 0,A,B
count,100.0,100.0
mean,33.33,35.38
std,18.070599,18.810474
min,1.0,1.0
25%,17.75,20.0
50%,33.0,40.0
75%,48.25,51.25
max,64.0,64.0


In [9]:
# Morton algorithm
m = morton.Morton(dimensions=2, bits=64)
def set_value(data,key1,key2):    
    return m.pack(int(data[key1]), int(data[key2]))


In [10]:
hilbert_curve = HilbertCurve(64, 2, n_procs=-1)
def set_value_Hcurve(data,key1,key2,key3):
    points = data[[key1,key2]].to_numpy()
    distances = hilbert_curve.distances_from_points(points)
    data[key3] = pd.DataFrame(distances )
    return data

# Offset Experiment
H0: There is no offset value that could move the CSP consistently 
HA: There is one offset value that could move the CSP consistently

In [11]:
def get_std_with_different_offset(data,key1,key2,algorithm,offset):
    key1_offset=key1+"_offset_"+str(offset)
    key2_offset=key2+"_offset_"+str(offset)
    data[key1_offset]=data[key1].add(offset)
    data[key2_offset]=data[key2].add(offset)
    if algorithm=='morton':
        data['morton_origin']=data.apply(set_value,args=(key1,key2),axis=1)
        data['morton_offset']=data.apply(set_value,args=(key1_offset,key2_offset),axis=1)
        data['offset_delta']=data['morton_offset']-data['morton_origin']
        return data['offset_delta'].std()
    elif algorithm=='hilbert':
        data=set_value_Hcurve(data,key1,key2,'hilbert_origin')
        data=set_value_Hcurve(data,key1_offset,key2_offset,'hilbert_offset')
        data['offset_delta']=data['hilbert_offset']-data['hilbert_origin']
        return data['offset_delta'].std()
    else:
        print("Please use either morton or hibert as algorithm parameter")

## Morton offset

In [29]:
result_data=[]
for i in range(2,200):
    std = get_std_with_different_offset(df,'A','B','morton',i)
    # print('offset: '+str(i)+', std is: '+str(std) )
    result_data.append([i,std])
    if std==0:
        print('Found the offset that reject null hypothesis: '+str(i))
        break
result_df=pd.DataFrame(result_data,columns=['offset','std'])
result_df

Found the offset that reject null hypothesis: 128


Unnamed: 0,offset,std
0,2,1076.310898
1,3,1489.954738
2,4,1714.275493
3,5,1839.907088
4,6,1937.337546
...,...,...
122,124,5071.185675
123,125,4019.215086
124,126,2700.702770
125,127,649.905669


## Hilbert offset

In [30]:
result_data=[]
for i in range(2,500):
    std = get_std_with_different_offset(df,'A','B','hilbert',i)
    result_data.append([i,std])
    if std==0:
        print('Found the offset that reject null hypothesis: '+str(i))
        break
result_df=pd.DataFrame(result_data,columns=['offset','std'])
result_df

Found the offset that reject null hypothesis: 384


Unnamed: 0,offset,std
0,2,2418.888366
1,3,3425.919036
2,4,3676.331836
3,5,4004.119777
4,6,4250.963797
...,...,...
378,380,4098.814884
379,381,3488.276941
380,382,3292.330310
381,383,1138.667201


# Find the pattern
with different max value of the dataset, find the respective offset for both hilbert and morton curve.

In [31]:
def find_offset_and_max_value_relation(data_max_value,search_max_value):
    test_df = pd.DataFrame(np.random.randint(0,data_max_value,size=(25, 2)), columns=list('AB'))
    max_value=test_df.max()
    relation_result=[]
    for i in range(1,search_max_value):
        std_m = get_std_with_different_offset(test_df,'A','B','morton',i)
        if std_m==0:
            relation_result.append([data_max_value,max_value,i,'morton'])
        std_h = get_std_with_different_offset(test_df,'A','B','hilbert',i)
        if std_h==0:
            relation_result.append([data_max_value,max_value,i,'hilbert'])
    return pd.DataFrame(relation_result,columns=['input_max_value','df_max_value','offset','alogrithm'])


In [32]:
relation_df=pd.DataFrame()
for i in range(1,100):
    relation_df= pd.concat([relation_df,find_offset_and_max_value_relation(i,1000)],axis=1)
relation_df

KeyboardInterrupt: 

# Multiplier Experiment