Reference: https://geodacenter.github.io/workbook/6a_local_auto/lab6a.html#local-geary

# Univariate local geary

Interestingly, following [this equation](https://www.biomedware.com/files/documentation/spacestat/Statistics/Gearys_C/Geary_s_C_statistic.htm) which explicitly calls for standardization of input data. We also do NOT divide by 2.

$$ c_i = \sum_j w_{ij} (z_i - z_j)^2 $$ 

where: 

$z_i = x_i - \bar{x}$ and $z_j = x_j - \bar{x}$, and $w_{ij}$ are the elements of the row-standardized binary symmetric spatial weight matrix W. 

or, $$ c_i = (1/m^2) * \sum_j w_{ij} (x_i - x_j)^2 $$

where,

$$ m^2 = \sum_i (x_i−\bar{x})^2/n $$

## Load in example data

In [1]:
import libpysal as lp
import geopandas as gpd
from scipy import stats
import numpy as np
guerry = lp.examples.load_example('Guerry')
guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp'))

In [2]:
wq = lp.weights.Queen.from_dataframe(guerry_ds)

In [3]:
wq[0]

{66: 1.0, 35: 1.0, 68: 1.0, 36: 1.0}

In [4]:
wq.transform = 'r'
wq[0]

{66: 0.25, 35: 0.25, 68: 0.25, 36: 0.25}

In [5]:
x = guerry_ds['Donatns']

print("x_i is", x[0])
print("x_j are", x[66], x[35], x[68], x[36])

x_i is 5098
x_j are 1983 4077 3710 3012


In [6]:
# Calculate score
zscore_x = (x - np.mean(x))/np.std(x)
zscore_x

0    -0.336188
1     0.450441
2     0.879023
3    -0.825375
4     0.049370
        ...   
80    1.512380
81    0.454785
82    1.467288
83   -0.555029
84   -0.506214
Name: Donatns, Length: 85, dtype: float64

# Build observed local geary values

In [7]:
adj_list = wq.to_adjlist(remove_symmetric=False)
adj_list.head()

Unnamed: 0,focal,neighbor,weight
0,0,66,0.25
1,0,35,0.25
2,0,68,0.25
3,0,36,0.25
4,1,48,0.166667


In [89]:
import pandas as pd
zseries = pd.Series(zscore_x, index=wq.id_order)
zseries[0:5]

0   -0.336188
1    0.450441
2    0.879023
3   -0.825375
4    0.049370
Name: Donatns, dtype: float64

In [9]:
# Define z_i
zi = zseries.loc[adj_list.focal].values
zi[0:5]

array([-0.33618783, -0.33618783, -0.33618783, -0.33618783,  0.45044136])

In [10]:
# Define zj
zj = zseries.loc[adj_list.neighbor].values
zj[0:5]

array([-0.98050808, -0.54737594, -0.62328783, -0.76766521, -0.5709562 ])

In [11]:
(zi-zj)[0:5]

array([0.64432025, 0.21118812, 0.2871    , 0.43147738, 1.02139756])

In [54]:
#(zi-zj)**2

Multiply by spatial weights

In [71]:
sum(list(wq.weights.values()), [])

[0.25,
 0.25,
 0.25,
 0.25,
 0.16666666666666666,
 0.16666666666666666,
 0.16666666666666666,
 0.16666666666666666,
 0.16666666666666666,
 0.16666666666666666,
 0.16666666666666666,
 0.16666666666666666,
 0.16666666666666666,
 0.16666666666666666,
 0.16666666666666666,
 0.16666666666666666,
 0.25,
 0.25,
 0.25,
 0.25,
 0.3333333333333333,
 0.3333333333333333,
 0.3333333333333333,
 0.14285714285714285,
 0.14285714285714285,
 0.14285714285714285,
 0.14285714285714285,
 0.14285714285714285,
 0.14285714285714285,
 0.14285714285714285,
 0.3333333333333333,
 0.3333333333333333,
 0.3333333333333333,
 0.3333333333333333,
 0.3333333333333333,
 0.3333333333333333,
 0.2,
 0.2,
 0.2,
 0.2,
 0.2,
 0.2,
 0.2,
 0.2,
 0.2,
 0.2,
 0.14285714285714285,
 0.14285714285714285,
 0.14285714285714285,
 0.14285714285714285,
 0.14285714285714285,
 0.14285714285714285,
 0.14285714285714285,
 0.3333333333333333,
 0.3333333333333333,
 0.3333333333333333,
 0.3333333333333333,
 0.3333333333333333,
 0.333333333333333

In [69]:
# sum(list(wq.weights.values()), []) * (zi-zj)**2

array([1.03787147e-01, 1.11501050e-02, 2.06066031e-02, 4.65431831e-02,
       1.73875498e-01, 4.46029515e-02, 9.23120791e-02, 8.24316606e-02,
       5.62651775e-02, 1.10526659e-01, 1.42169229e-02, 3.76156268e-01,
       4.10732150e-06, 1.93558239e-03, 4.03998774e-01, 1.78982952e-01,
       1.91294659e-01, 2.36509947e-02, 8.62708652e-04, 9.85757159e-05,
       1.18701948e-01, 2.55059545e-01, 2.43611071e-01, 4.83050957e-03,
       7.87731699e-04, 4.06844863e-04, 1.19408148e-03, 1.19796877e-04,
       8.05514201e-03, 2.30508994e-02, 8.46988022e-02, 8.92059030e-02,
       6.92770507e-02, 1.31434288e-02, 2.24980817e-02, 9.36161309e-01,
       1.07838582e-03, 1.40354877e-03, 9.76022807e-03, 3.81830297e-03,
       2.45842445e-02, 6.96194416e-03, 7.88605727e-03, 6.43214247e-03,
       7.49723084e-04, 7.02692918e-01, 4.75473805e-03, 1.10898230e-02,
       1.62392001e-04, 2.40344868e-02, 3.46212974e-04, 8.38114231e-03,
       1.43264902e-02, 7.68348646e-03, 2.59916438e-04, 1.62670468e-02,
      

In [56]:
test = sum(list(wq.weights.values()), []) * (zi-zj)**2

In [57]:
test 

array([1.03787147e-01, 1.11501050e-02, 2.06066031e-02, 4.65431831e-02,
       1.73875498e-01, 4.46029515e-02, 9.23120791e-02, 8.24316606e-02,
       5.62651775e-02, 1.10526659e-01, 1.42169229e-02, 3.76156268e-01,
       4.10732150e-06, 1.93558239e-03, 4.03998774e-01, 1.78982952e-01,
       1.91294659e-01, 2.36509947e-02, 8.62708652e-04, 9.85757159e-05,
       1.18701948e-01, 2.55059545e-01, 2.43611071e-01, 4.83050957e-03,
       7.87731699e-04, 4.06844863e-04, 1.19408148e-03, 1.19796877e-04,
       8.05514201e-03, 2.30508994e-02, 8.46988022e-02, 8.92059030e-02,
       6.92770507e-02, 1.31434288e-02, 2.24980817e-02, 9.36161309e-01,
       1.07838582e-03, 1.40354877e-03, 9.76022807e-03, 3.81830297e-03,
       2.45842445e-02, 6.96194416e-03, 7.88605727e-03, 6.43214247e-03,
       7.49723084e-04, 7.02692918e-01, 4.75473805e-03, 1.10898230e-02,
       1.62392001e-04, 2.40344868e-02, 3.46212974e-04, 8.38114231e-03,
       1.43264902e-02, 7.68348646e-03, 2.59916438e-04, 1.62670468e-02,
      

In [58]:
# Create a df that uses the adjacency list focal values and the BBs counts
temp = pd.DataFrame(adj_list.focal.values, test).reset_index()
temp

Unnamed: 0,index,0
0,0.103787,0
1,0.011150,0
2,0.020607,0
3,0.046543,0
4,0.173875,1
...,...,...
415,0.025788,84
416,0.326386,84
417,0.003818,84
418,0.001947,84


In [59]:
# Temporarily rename the columns
temp.columns = ['Eij', 'ID']
temp = temp.groupby(by='ID').sum()

In [60]:
temp.Eij.values[0:5]

array([0.18208704, 0.56001403, 0.97529461, 0.21590694, 0.61737256])

# Start building function

In [168]:
import numpy as np
import pandas as pd
import warnings
from scipy import sparse
from scipy import stats
from sklearn.base import BaseEstimator
import libpysal as lp
from esda.crand import (
    crand as _crand_plus,
    njit as _njit,
    _prepare_univariate
)

PERMUTATIONS = 999

class Local_Geary(BaseEstimator):
    """Local Geary - Univariate"""

    def __init__(self, connectivity=None, permutations=PERMUTATIONS, n_jobs=1, 
                 keep_simulations=True, seed=None):
        """
        connectivity     : scipy.sparse matrix object
                           the connectivity structure describing
                           the relationships between observed units.
                           Need not be row-standardized.
        permutations     : int
                           number of random permutations for calculation of pseudo
                           p_values
        n_jobs           : int
                           Number of cores to be used in the conditional randomisation. If -1,
                           all available cores are used.    
        keep_simulations : Boolean
                           (default=True)
                           If True, the entire matrix of replications under the null 
                           is stored in memory and accessible; otherwise, replications 
                           are not saved
        seed             : None/int
                           Seed to ensure reproducibility of conditional randomizations. 
                           Must be set here, and not outside of the function, since numba 
                           does not correctly interpret external seeds 
                           nor numpy.random.RandomState instances.  
                           
        Attributes
        ----------
        localG          : numpy array
                          array containing the observed univariate
                          Local Geary values.
        p_sim           : numpy array
                          array containing the simulated
                          p-values for each unit.
        """

        self.connectivity = connectivity
        self.permutations = permutations
        self.n_jobs = n_jobs
        self.keep_simulations = keep_simulations
        self.seed = seed

    def fit(self, x, n_jobs=1, permutations=permutations):
        """
        Arguments
        ---------
        x                : numpy.ndarray
                           array containing continuous data

        Returns
        -------
        the fitted estimator.

        Notes
        -----
        Technical details and derivations can be found in :cite:`Anselin1995`.

        Examples
        --------
        Guerry data replication GeoDa tutorial
        >>> import libpysal
        >>> import geopandas as gpd
        >>> guerry = lp.examples.load_example('Guerry')
        >>> guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp'))
        >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds)
        """
        x = np.asarray(x).flatten()

        w = self.connectivity
        w.transform = 'r'

        self.localG = self._statistic(x, w)

        if self.permutations:
            self.p_sim, self.rlocalG = _crand_plus(
                z=(x - np.mean(x))/np.std(x), 
                w=w, 
                observed=self.localG,
                permutations=permutations, 
                keep=True, 
                n_jobs=n_jobs,
                stat_func=_local_geary
            )
            
        del (self.keep_simulations, self.n_jobs, 
             self.permutations, self.seed, self.rlocalG,
             self.connectivity)

        return self

    @staticmethod
    def _statistic(x, w):
        # Caclulate z-scores for x
        zscore_x = (x - np.mean(x))/np.std(x)
        # Create focal (xi) and neighbor (zi) values
        adj_list = w.to_adjlist(remove_symmetric=False)
        zseries = pd.Series(zscore_x, index=wq.id_order)
        zi = zseries.loc[adj_list.focal].values
        zj = zseries.loc[adj_list.neighbor].values
        # Carry out local Geary calculation
        gs = sum(list(wq.weights.values()), []) * (zi-zj)**2
        # Reorganize data
        adj_list_gs = pd.DataFrame(adj_list.focal.values, gs).reset_index()
        adj_list_gs.columns = ['gs', 'ID']
        adj_list_gs = adj_list_gs.groupby(by='ID').sum()
        
        localG = adj_list_gs.gs.values
        
        return (localG)

# --------------------------------------------------------------
# Conditional Randomization Function Implementations
# --------------------------------------------------------------

# Note: does not using the scaling parameter

@_njit(fastmath=True)
def _local_geary(i, z, permuted_ids, weights_i, scaling):
    zi, zrand = _prepare_univariate(i, z, permuted_ids, weights_i)
    return (zi-zrand)**2 @ weights_i

In [169]:
functest = Local_Geary(connectivity=wq).fit(x)

In [170]:
functest.localG

array([1.82087039e-01, 5.60014026e-01, 9.75294606e-01, 2.15906938e-01,
       6.17372564e-01, 3.84450059e-02, 2.43181756e-01, 9.71802819e-01,
       4.06447101e-02, 7.24722785e-01, 6.30952854e-02, 2.42104497e-02,
       1.59496916e+01, 9.29326006e-01, 9.65188634e-01, 1.32383286e+00,
       3.31775497e-01, 2.99446505e+00, 9.43946814e-01, 2.99570159e+00,
       3.66702291e-01, 2.09592365e+00, 1.46515861e+00, 1.82118455e-01,
       3.10216680e+00, 5.43063937e-01, 5.74532559e+00, 4.79160197e-02,
       1.58993089e-01, 7.18327253e-01, 1.24297849e+00, 8.72629331e-02,
       7.52809650e-01, 4.56515485e-01, 3.86766562e-01, 1.17632604e-01,
       6.90884685e-01, 2.87206102e+00, 4.10455112e-01, 4.04349959e-01,
       1.14211758e-01, 9.59519953e-01, 3.51347976e-01, 7.30240974e-01,
       4.40370938e-01, 7.20360356e-02, 1.66241706e+00, 5.83258909e+00,
       2.30332507e-01, 4.38369688e-01, 8.41461470e-01, 1.52959486e+00,
       4.32157479e-02, 2.08325903e+00, 1.19722984e+00, 1.28169257e+00,
      

In [172]:
functest.p_sim

array([0.17, 0.05, 0.05, 0.14, 0.48, 0.01, 0.11, 0.45, 0.02, 0.22, 0.01,
       0.03, 0.2 , 0.46, 0.01, 0.03, 0.01, 0.18, 0.22, 0.1 , 0.01, 0.28,
       0.48, 0.09, 0.07, 0.34, 0.05, 0.03, 0.08, 0.25, 0.28, 0.01, 0.44,
       0.01, 0.14, 0.12, 0.24, 0.19, 0.27, 0.14, 0.07, 0.44, 0.17, 0.48,
       0.23, 0.01, 0.26, 0.03, 0.15, 0.2 , 0.23, 0.28, 0.03, 0.05, 0.5 ,
       0.29, 0.05, 0.26, 0.04, 0.02, 0.3 , 0.44, 0.41, 0.15, 0.44, 0.2 ,
       0.09, 0.08, 0.25, 0.15, 0.08, 0.2 , 0.02, 0.2 , 0.01, 0.13, 0.02,
       0.02, 0.03, 0.01, 0.02, 0.48, 0.01, 0.32, 0.25])

## Start working on inference (note: now implemented above)

### 'New' `_crand()` engine

In [23]:
from esda.crand import (
    crand as _crand_plus,
    njit as _njit,
    _prepare_univariate
)

In [125]:
@_njit(fastmath=True)
def _local_geary(i, z, permuted_ids, weights_i, scaling):
    zi, zrand = _prepare_univariate(i, z, permuted_ids, weights_i)
    return (zi-zrand)**2 @ weights_i

In [126]:
p_sim, rlocalG = _crand_plus(z=np.array(zscore_x), w=wq, observed=np.array(functest.localG), 
            permutations=999, keep=True, n_jobs=1, 
            stat_func=_local_geary)

print(p_sim)
print(rlocalG)

[0.187 0.004 0.061 0.097 0.269 0.387 0.164 0.086 0.02  0.397 0.13  0.028
 0.005 0.183 0.002 0.009 0.423 0.009 0.158 0.092 0.082 0.304 0.421 0.125
 0.24  0.224 0.008 0.422 0.423 0.464 0.435 0.469 0.228 0.002 0.034 0.17
 0.214 0.239 0.143 0.319 0.047 0.406 0.113 0.36  0.203 0.105 0.439 0.091
 0.11  0.175 0.194 0.484 0.087 0.01  0.171 0.377 0.018 0.117 0.135 0.019
 0.169 0.219 0.068 0.062 0.027 0.261 0.254 0.211 0.42  0.119 0.079 0.043
 0.005 0.142 0.001 0.076 0.268 0.334 0.012 0.003 0.053 0.036 0.001 0.397
 0.203]
[[1.15089263 5.1092341  0.32436992 ... 0.31646013 0.56422167 0.19564817]
 [0.90587301 2.54534839 0.41524965 ... 0.88118824 0.79952456 1.16455637]
 [1.47204902 2.21285296 0.83349888 ... 1.46890613 1.37675223 2.06251861]
 ...
 [3.96453866 3.82410249 2.57150961 ... 1.87942791 2.1494296  3.46309228]
 [3.15985923 5.14064783 1.80350159 ... 1.43142323 0.92044944 3.62316366]
 [3.67497967 1.2677366  1.75129115 ... 1.59674028 0.28077811 4.20963876]]


# Multivariate Local Geary

$$ c_i = \sum_{h=1}^m \sum_j w_{ij} (x_{hi} - x_{hj})^2 $$

Load in the sample data

In [26]:
import libpysal as lp
import geopandas as gpd
from scipy import stats
import pandas as pd
guerry = lp.examples.load_example('Guerry')
guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp'))

In [27]:
wq = lp.weights.Queen.from_dataframe(guerry_ds)

In [265]:
x = guerry_ds['Donatns']
y = guerry_ds['Suicids']

In [29]:
variables = [x,y]
variables

[0      5098
 1      8901
 2     10973
 3      2733
 4      6962
       ...  
 80    14035
 81     8922
 82    13817
 83     4040
 84     4276
 Name: Donatns, Length: 85, dtype: int64,
 0      35039
 1      12831
 2     114121
 3      14238
 4      16171
        ...  
 80     67963
 81     21851
 82     33497
 83     33029
 84     12789
 Name: Suicids, Length: 85, dtype: int64]

Standardize each variable

In [30]:
from scipy import stats
zseries = [stats.zscore(i) for i in variables]

Build the adj lists

In [31]:
adj_list = wq.to_adjlist(remove_symmetric=False)

In [32]:
# The zseries
zseries = [pd.Series(i, index=wq.id_order) for i in zseries]

In [33]:
low_extreme = (permutations - larger) < larger
larger[low_extreme] = permutations - larger[low_extreme]
p_sim = (larger + 1.0) / (permutations + 1.0)
p_sim

array([0.14, 0.21, 0.41, 0.14, 0.32, 0.02, 0.12, 0.43, 0.02, 0.35, 0.01,
       0.04, 0.03, 0.34, 0.44, 0.47, 0.13, 0.13, 0.37, 0.17, 0.13, 0.27,
       0.39, 0.06, 0.24, 0.22, 0.08, 0.01, 0.05, 0.3 , 0.49, 0.07, 0.24,
       0.12, 0.15, 0.03, 0.32, 0.15, 0.09, 0.08, 0.04, 0.42, 0.13, 0.27,
       0.17, 0.02, 0.32, 0.07, 0.07, 0.16, 0.4 , 0.42, 0.02, 0.35, 0.4 ,
       0.42, 0.12, 0.11, 0.22, 0.03, 0.38, 0.42, 0.27, 0.09, 0.16, 0.31,
       0.13, 0.19, 0.27, 0.19, 0.07, 0.24, 0.02, 0.22, 0.28, 0.08, 0.03,
       0.04, 0.05, 0.03, 0.46, 0.4 , 0.39, 0.48, 0.2 ])

In [34]:
zseries

[0    -0.336188
 1     0.450441
 2     0.879023
 3    -0.825375
 4     0.049370
         ...   
 80    1.512380
 81    0.454785
 82    1.467288
 83   -0.555029
 84   -0.506214
 Length: 85, dtype: float64,
 0    -0.047195
 1    -0.756433
 2     2.478379
 3    -0.711499
 4    -0.649766
         ...   
 80    1.004270
 81   -0.468369
 82   -0.096441
 83   -0.111387
 84   -0.757774
 Length: 85, dtype: float64]

In [35]:
# The focal values
focal = [zseries[i].loc[adj_list.focal].values for
         i in range(len(variables))]
# The neighbor values
neighbor = [zseries[i].loc[adj_list.neighbor].values for
            i in range(len(variables))]

In [36]:
gs = sum(list(wq.weights.values()), []) * (np.array(focal) - np.array(neighbor))**2

In [37]:
temp = pd.DataFrame(gs).T

In [38]:
temp['ID'] = adj_list.focal.values

In [39]:
adj_list_gs = temp.groupby(by='ID').sum()
adj_list_gs.head()

Unnamed: 0_level_0,0,1
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.728348,0.5022
1,3.360084,0.28263
2,5.851768,29.604873
3,0.863628,0.121489
4,1.852118,0.475642


In [40]:
k = len(variables)
k

2

In [41]:
adj_list_gs.sum(axis=1)/k

ID
0      0.615274
1      1.821357
2     17.728320
3      0.492558
4      1.163880
        ...    
80     6.629720
81     3.154587
82     3.872022
83     4.307689
84     1.080905
Length: 85, dtype: float64

In [175]:
import numpy as np
import pandas as pd
import warnings
from scipy import sparse
from scipy import stats
from sklearn.base import BaseEstimator
import libpysal as lp

PERMUTATIONS=999

class Local_Geary_MV(BaseEstimator):
    """Local Geary - Multivariate"""

    def __init__(self, connectivity=None, permutations=PERMUTATIONS, n_jobs=1, 
                 keep_simulations=True, seed=None):
        """
        connectivity     : scipy.sparse matrix object
                           the connectivity structure describing
                           the relationships between observed units.
                           Need not be row-standardized.
        permutations     : int
                           number of random permutations for calculation of pseudo
                           p_values
        n_jobs           : int
                           Number of cores to be used in the conditional randomisation. If -1,
                           all available cores are used.    
        keep_simulations : Boolean
                           (default=True)
                           If True, the entire matrix of replications under the null 
                           is stored in memory and accessible; otherwise, replications 
                           are not saved
        seed             : None/int
                           Seed to ensure reproducibility of conditional randomizations. 
                           Must be set here, and not outside of the function, since numba 
                           does not correctly interpret external seeds 
                           nor numpy.random.RandomState instances.  
                           
        Attributes
        ----------
        localG          : numpy array
                          array containing the observed multivariate
                          Local Geary values.
        p_sim           : numpy array
                          array containing the simulated
                          p-values for each unit.
        """

        self.connectivity = connectivity
        self.permutations = permutations
        self.n_jobs = n_jobs
        self.keep_simulations = keep_simulations
        self.seed = seed

    def fit(self, variables, n_jobs=1, permutations=permutations):
        """
        Arguments
        ---------
        variables        : numpy.ndarray
                           array containing continuous data

        Returns
        -------
        the fitted estimator.

        Notes
        -----
        Technical details and derivations can be found in :cite:`Anselin1995`.

        Examples
        --------
        Guerry data replication GeoDa tutorial
        >>> import libpysal
        >>> import geopandas as gpd
        >>> guerry = lp.examples.load_example('Guerry')
        >>> guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp'))
        >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds)
        """
        self.variables = np.array(variables, dtype='float')

        w = self.connectivity
        w.transform = 'r'

        self.localG = self._statistic(variables, w)

        if permutations:
            self.p_sim, self.rlocalG = _crand_plus(
                z=(x - np.mean(x))/np.std(x), 
                w=w, 
                observed=self.localG,
                permutations=permutations, 
                keep=True, 
                n_jobs=n_jobs,
                stat_func=_local_geary
            )

        return self

    @staticmethod
    def _statistic(variables, w):
        # Caclulate z-scores for input variables
        zseries = [stats.zscore(i) for i in variables]
        # Define denominator adjustment
        k = len(variables)
        # Create focal and neighbor values
        adj_list = w.to_adjlist(remove_symmetric=False)
        zseries = [pd.Series(i, index=wq.id_order) for i in zseries]
        focal = [zseries[i].loc[adj_list.focal].values for
                 i in range(len(variables))]
        neighbor = [zseries[i].loc[adj_list.neighbor].values for
                    i in range(len(variables))]
        # Carry out local Geary calculation
        gs = sum(list(wq.weights.values()), []) * \
        (np.array(focal) - np.array(neighbor))**2
        # Reorganize data
        temp = pd.DataFrame(gs).T
        temp['ID'] = adj_list.focal.values
        adj_list_gs = temp.groupby(by='ID').sum()
        localG = adj_list_gs.sum(axis=1)/k
        
        return (localG)

# --------------------------------------------------------------
# Conditional Randomization Function Implementations
# --------------------------------------------------------------

# Note: does not using the scaling parameter

@_njit(fastmath=True)
def _local_geary(i, z, permuted_ids, weights_i, scaling):
    zi, zrand = _prepare_univariate(i, z, permuted_ids, weights_i)
    return (zi-zrand)**2 @ weights_i    

In [176]:
functest = Local_Geary_MV(connectivity=wq).fit([x,y])
functest.localG

ID
0     0.153819
1     0.303560
2     2.954720
3     0.123140
4     0.387960
        ...   
80    1.657430
81    0.525764
82    0.645337
83    0.717948
84    0.216181
Length: 85, dtype: float64

## Working on inference

In [192]:
from esda.crand import (
    crand as _crand_plus,
    njit as _njit,
    _prepare_univariate
)

In [193]:
@_njit(fastmath=True)
def _local_geary(i, z, permuted_ids, weights_i, scaling):
    zi, zrand = _prepare_univariate(i, z, permuted_ids, weights_i)
    return (zi-zrand)**2 @ weights_i

In [287]:
p_sim, rlocalG = _crand_plus(z=np.array(x, dtype='float'), w=wq, observed=np.array(functest.localG), 
            permutations=999, keep=True, n_jobs=1, 
            stat_func=_local_geary)

print(p_sim)
print(rlocalG)

[0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001
 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001
 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001
 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001
 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001
 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001
 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001
 0.001]
[[25781080.5        38392599.5        25869371.         ...
  19836670.         20170723.75       26142676.        ]
 [59517338.49999999 20321718.66666667 21480082.83333333 ...
  25236883.5        26373085.66666666 28622690.33333333]
 [62182621.16666666 26992177.33333333 35549653.5        ...
  42533248.83333333 44516899.         44043895.66666666]
 ...
 [99150639.16666666 64239364.99999999 63864001.16666666 ...
  76826485.99999999 49562698.16666667 61235350.66666666]
 [  4029

In [472]:
result = zip(x,y)
result = list(result)
result = np.array(result)
result[0:5]

array([[  5098,  35039],
       [  8901,  12831],
       [ 10973, 114121],
       [  2733,  14238],
       [  6962,  16171]])

In [436]:
# Get length based on first variable
n = len(result)
joins = np.zeros((n, permutations))
n_1 = n - 1
prange = list(range(permutations))
k = wq.max_neighbors + 1
nn = n - 1
rids = np.array([np.random.permutation(nn)[0:k] for i in prange])
ids = np.arange(wq.n)
ido = wq.id_order
w = [wq.weights[ido[i]] for i in ids]
wc = [wq.cardinalities[ido[i]] for i in ids]


In [468]:
for i in range(wq.n):
    idsi = ids[ids != i]
    np.random.shuffle(idsi)
    tmp = result[idsi[rids[:, 0:wc[i]]]]
    # Get mean of neighbors?
    tmp2 = np.mean(tmp[i][:,1])
    #joins[i] = result[i] * (w[i] * tmp).sum(1)
    joins[i] = ((result[i,0] - tmp2)**2 * wq.weights[i][0]).sum(0)
rjoins = joins

In [469]:
rjoins

array([[4.12079850e+08, 4.12079850e+08, 4.12079850e+08, ...,
        4.12079850e+08, 4.12079850e+08, 4.12079850e+08],
       [2.67106848e+08, 2.67106848e+08, 2.67106848e+08, ...,
        2.67106848e+08, 2.67106848e+08, 2.67106848e+08],
       [2.74507921e+08, 2.74507921e+08, 2.74507921e+08, ...,
        2.74507921e+08, 2.74507921e+08, 2.74507921e+08],
       ...,
       [1.04265301e+08, 1.04265301e+08, 1.04265301e+08, ...,
        1.04265301e+08, 1.04265301e+08, 1.04265301e+08],
       [3.91383473e+07, 3.91383473e+07, 3.91383473e+07, ...,
        3.91383473e+07, 3.91383473e+07, 3.91383473e+07],
       [1.06382239e+08, 1.06382239e+08, 1.06382239e+08, ...,
        1.06382239e+08, 1.06382239e+08, 1.06382239e+08]])