# Nearest neighbors
[Index](0-index.ipynb)

Here we compute matrices the matrix of distances between communities, $[d_{ab}]$.

## Imports and global variables

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from pathlib import Path
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime

import matplotlib.pyplot as plt
import matplotlib.colors as mco
import matplotlib.gridspec as mgs
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib import animation
plt.rcParams['svg.fonttype'] = 'none'

from IPython.display import HTML
from IPython.display import Image

In [None]:
sys.path.append('../code')
from functions import show_image, geo_dist

In [None]:
resdir = Path('../results/')
if not resdir.is_dir():
    raise ValueError('No results directory!')

In [None]:
resfile = resdir / 'safegraph_analysis.hdf5'
complevel=7
complib='zlib'
with pd.HDFStore(resfile, complevel=complevel, complib=complib) as store:
    print(f"File {resfile.stem} has {len(store.keys())} entries.")

In [None]:
tfmt = '%Y-%m-%d'

In [None]:
figdir = Path('../figures') / '4-distances'
if not figdir.is_dir():
    figdir.mkdir(parents=True, exist_ok=True)
exts = ['.png', '.svg']

## Functions
We use the [formula](https://en.wikipedia.org/wiki/Geographical_distance) of the Spherical Earth projected to a plane. for the 'Spherical Earth projected to a plane':
\begin{equation}
d(M_1,M_2) = R \sqrt{\Delta \phi^2 + \cos{\bar{\phi}} \Delta \lambda^2},
\end{equation}
where $\phi$ is the latitude, $\lambda$ is the longitude, and $\bar{\phi} = (\phi_1 + \phi_2)/2$. $R = 6, 371.009 \, \mathrm{km}$ is the radius of the Earth.

Here `X` is the longitude and `Y` is the latitude (see this [notebook](06-cross_compare_cssegi.ipynb)).

In [None]:
R_earth = 6.371009e6

## Clusters

### Load clusters

In [None]:
key = "/clustering/clusters"
with pd.HDFStore(resfile, complevel=complevel, complib=complib) as store:
    clusters = store[key]
# clusters = pd.read_hdf(resfile, key)
N = len(clusters)
print(f"N = {N}")
clusters

## Compute distance matrix using Spherical Earth projected to a plane formula

In [None]:
XY = clusters.loc[:, ['X','Y']].astype('float64').to_numpy()

Long, Lat = XY.T * np.pi/180.

LLat1, LLat2 = np.meshgrid(Lat, Lat, indexing='ij')
dLLat = LLat1 - LLat2
LLat_bar = 0.5*(LLat1 + LLat2)

LLong1, LLong2 = np.meshgrid(Long, Long, indexing='ij')
dLLong = LLong1 - LLong2

D = R_earth * np.sqrt(dLLat**2 + np.cos(LLat_bar)*dLLong**2)

In [None]:
df_dist = pd.DataFrame(data=D, index=clusters.index, columns=clusters.index)

key = "/clustering/distances"
with pd.HDFStore(resfile, complevel=complevel, complib=complib) as store:
    store[key] = df_dist

Compare with [equirectangular projection](https://en.wikipedia.org/wiki/Equirectangular_projection) distances

In [None]:
P = clusters.loc[:, ['Y', 'X']].to_numpy()
PP = np.einsum('ik,jk->ij',P,P)
U = np.ones((N,1))
Delta = PP.diagonal().reshape(-1,1)
Dequi = (U*Delta.T) + (Delta*U.T) - 2*PP
Dequi = R_earth * np.pi/180.* np.sqrt(Dequi)

In [None]:
err = np.linalg.norm(D-Dequi)/np.linalg.norm(D)
print("Equirectangular projection error = {:.1f} %".format(err*100))

## Some statistics on the actual separation

In [None]:
key = "/clustering/distances"
with pd.HDFStore(resfile, complevel=complevel, complib=complib) as store:
    df_dist = store[key]

df_dist

In [None]:
Ns = df_dist.index.to_numpy()
dists = df_dist.to_numpy()
dists_sorted = np.sort(dists, axis=-1)

# median distance of 1st, 2nd, 3rd...etc neighbor
dist_med = np.array([np.median(dists_sorted[:,n]) for n in Ns[1:]])

In [None]:
# parameters
figsize = (6,4.5)
dpi = 300
ms=2
lw=1

fig = plt.figure(facecolor='w', figsize=figsize)
ax = fig.gca()

ax.plot(Ns[1:], dist_med/1.0e3, '-', ms=ms, color='darkblue')

# ax.set_xscale('log')
# ax.set_yscale('log')
ax.set_xlim(Ns[1],None)
ax.set_xlabel("kth nearest neighbor", fontsize="medium")
ax.set_ylabel("median distance (km)", fontsize="medium")
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.tick_params(left=True, labelleft=True, bottom=True, labelbottom=True)
ax.tick_params(axis='both', length=4)
fig.tight_layout()

fname = 'median_distance_nearest_neighbors'
for ext in exts:
    filepath = figdir / (fname + ext)
    fig.savefig(filepath, bbox_inches='tight', pad_inches=0, dpi=dpi)
    print("Written file: {:s}".format(str(filepath)))
fig.clf()
plt.close('all')

In [None]:
filepath = figdir / (fname + '.png')
Image(filename=filepath, width=4./3*360)