In [1]:
import yaml
import pandas as pd
import os
import sys
import pickle

from shapely.geometry import Polygon
import numpy as np
import matplotlib.pyplot as plt
from haversine import inverse_haversine, Direction, Unit

from utils import get_grid_index

import folium
import geohash

### Create Grids

In [2]:
# bounding box for NYC
han_min_lat = 40.7085
han_min_lon = -74.0214
han_max_lat = 40.8344
han_max_lon = -73.9146

cell_size = 500

grid = {}

south_west = (han_min_lat, han_min_lon)
north_west = inverse_haversine(south_west, cell_size, Direction.NORTH, unit=Unit.METERS)
south_east = inverse_haversine(south_west, cell_size, Direction.EAST, unit=Unit.METERS)
north_east = (north_west[0], south_east[1])
cell = [south_west, north_west, north_east, south_east, south_west]

print(cell)
row_count, col_count = 0, 0
while cell[0][0] < han_max_lat:
    row_start = cell
    row_count += 1
    while cell[0][1] < han_max_lon:
        col_count +=1
        grid[len(grid)] = Polygon([[x[1], x[0]] for x in cell])
        # north_west = last north_east
        north_west = cell[2]
        # south_west = last south_east
        south_west = cell[3]

        north_east = inverse_haversine(north_west, cell_size, Direction.EAST, unit=Unit.METERS)
        south_east = inverse_haversine(south_west, cell_size, Direction.EAST, unit=Unit.METERS)
        cell = [south_west, north_west, north_east, south_east, south_west]

    # south_west = last north_west
    south_west = row_start[1]
    # south_east = last north_east
    south_east = row_start[2]
    north_west = inverse_haversine(south_west, cell_size, Direction.NORTH, unit=Unit.METERS)
    north_east = inverse_haversine(south_east, cell_size, Direction.NORTH, unit=Unit.METERS)
    cell = [south_west, north_west, north_east, south_east, south_west]
print(row_count, col_count // row_count)

[(40.7085, -74.0214), (40.712996601818624, -74.0214), (40.712996601818624, -74.01546810178247), (40.708499848185404, -74.01546810178247), (40.7085, -74.0214)]
28 18


In [3]:
south_west = (han_min_lat, han_min_lon)
north_west = inverse_haversine(south_west, 8000, Direction.NORTH, unit=Unit.METERS)
south_east = inverse_haversine(south_west, 14000, Direction.EAST, unit=Unit.METERS)
north_east = (north_west[0], south_east[1])
print(north_east)

(40.78044562909796, -73.85530704756502)


In [4]:
print(len(grid))

508


### Compute Geohash for each grid

In [5]:
idx_to_geohash = {}

for i, g in grid.items():
    lon = g.centroid.x
    lat = g.centroid.y
    idx_to_geohash[i] = [geohash.encode(lon, lat, precision=2), geohash.encode(lon, lat, precision=5), geohash.encode(lon, lat, precision=6), 
                         geohash.encode(lon, lat, precision=7)]
idx_to_geohash

{0: ['hf', 'hfufy', 'hfufym', 'hfufymk'],
 1: ['hf', 'hfufy', 'hfufyq', 'hfufyqk'],
 2: ['hf', 'hfufy', 'hfufyr', 'hfufyrs'],
 3: ['hf', 'hfugn', 'hfugn2', 'hfugn2s'],
 4: ['hf', 'hfugn', 'hfugn3', 'hfugn3s'],
 5: ['hf', 'hfugn', 'hfugn6', 'hfugn6u'],
 6: ['hf', 'hfugn', 'hfugn7', 'hfugn7u'],
 7: ['hf', 'hfugn', 'hfugnk', 'hfugnku'],
 8: ['hf', 'hfugn', 'hfugnm', 'hfugnmu'],
 9: ['hf', 'hfugn', 'hfugnr', 'hfugnrh'],
 10: ['hf', 'hfugq', 'hfugq2', 'hfugq2h'],
 11: ['hf', 'hfugq', 'hfugq3', 'hfugq3h'],
 12: ['hf', 'hfugq', 'hfugq6', 'hfugq6k'],
 13: ['hf', 'hfugq', 'hfugq7', 'hfugq7k'],
 14: ['hf', 'hfugq', 'hfugqk', 'hfugqkk'],
 15: ['hf', 'hfugq', 'hfugqm', 'hfugqms'],
 16: ['hf', 'hfugq', 'hfugqq', 'hfugqqs'],
 17: ['hf', 'hfugq', 'hfugqr', 'hfugqrs'],
 18: ['hf', 'hfugw', 'hfugw2', 'hfugw2u'],
 19: ['hf', 'hfufy', 'hfufym', 'hfufymr'],
 20: ['hf', 'hfufy', 'hfufyq', 'hfufyqr'],
 21: ['hf', 'hfufy', 'hfufyr', 'hfufyrx'],
 22: ['hf', 'hfugn', 'hfugn2', 'hfugn2x'],
 23: ['hf', 'hfugn', 

In [6]:
#count unique
print(len(set(idx_to_geohash.values())))

TypeError: unhashable type: 'list'

### Swap rows to align with ST Image

In [7]:
idx_to_geohash_swap = {}
H, W = 28, 18
for i in range(H):
    for j in range(W):
        
        # bottom row of ST image
        first_row_index = i*W + j
        
        # top row of ST image
        second_row_index = H*W - (W*(i+1)) + j

        #print(first_row_index, second_row_index)

        # Swap values between rows
        idx_to_geohash_swap[first_row_index], idx_to_geohash_swap[second_row_index] = idx_to_geohash[second_row_index], idx_to_geohash[first_row_index]
idx_to_geohash_swap = dict(sorted(idx_to_geohash_swap.items()))
idx_to_geohash_swap

{0: ['hf', 'hfv53', 'hfv53h', 'hfv53h3'],
 1: ['hf', 'hfv53', 'hfv53j', 'hfv53j9'],
 2: ['hf', 'hfv53', 'hfv53n', 'hfv53n9'],
 3: ['hf', 'hfv53', 'hfv53p', 'hfv53p9'],
 4: ['hf', 'hfv4c', 'hfv4cj', 'hfv4cjm'],
 5: ['hf', 'hfv4c', 'hfv4cn', 'hfv4cnm'],
 6: ['hf', 'hfv4c', 'hfv4cp', 'hfv4cpt'],
 7: ['hf', 'hfv51', 'hfv510', 'hfv510t'],
 8: ['hf', 'hfv51', 'hfv511', 'hfv511t'],
 9: ['hf', 'hfv51', 'hfv514', 'hfv514v'],
 10: ['hf', 'hfv51', 'hfv515', 'hfv515v'],
 11: ['hf', 'hfv51', 'hfv51h', 'hfv51hv'],
 12: ['hf', 'hfv51', 'hfv51n', 'hfv51nj'],
 13: ['hf', 'hfv51', 'hfv51p', 'hfv51pj'],
 14: ['hf', 'hfv53', 'hfv530', 'hfv530j'],
 15: ['hf', 'hfv53', 'hfv531', 'hfv531m'],
 16: ['hf', 'hfv53', 'hfv534', 'hfv534m'],
 17: ['hf', 'hfv53', 'hfv535', 'hfv535m'],
 18: ['hf', 'hfv52', 'hfv52u', 'hfv52uq'],
 19: ['hf', 'hfv52', 'hfv52v', 'hfv52vw'],
 20: ['hf', 'hfv52', 'hfv52y', 'hfv52yw'],
 21: ['hf', 'hfv52', 'hfv52z', 'hfv52zw'],
 22: ['hf', 'hfv4c', 'hfv4cj', 'hfv4cj3'],
 23: ['hf', 'hfv4c', 

In [8]:
with open('/location/NYC_geohash.pkl', 'wb') as f:  # open a text file
    pickle.dump(idx_to_geohash_swap, f)  # serialize the list
f.close()

### Transform geohash string to number

In [None]:
geohash_to_number = [dict() for _ in range(4)]
number_to_geohash = [dict() for _ in range(4)]

sets = [set() for _ in range(4)]

for k, v in idx_to_geohash_swap.items():
    for i, val in enumerate(v):
        sets[i].add(val)

len_sets = []

for h, s in enumerate(sets):
    s = sorted(s)
    for i, val in enumerate(s):
        number_to_geohash[h][i] = val
        geohash_to_number[h][val] = i
    #idx += len(s)
    len_sets.append(len(s))
    
    
print(len_sets)

print(number_to_geohash[0], number_to_geohash[1], number_to_geohash[2], number_to_geohash[3])


### Compute index of geohas for each grid 

In [None]:
grid_geohash_to_number = {}

for k, v in idx_to_geohash_swap.items():
    grid_geohash_to_number[k] = []
    for i, val in enumerate(v):
        grid_geohash_to_number[k].append(geohash_to_number[i][val])
        
grid_geohash_to_number

In [None]:
with open('/location/NYC_geohash_to_number.pkl', 'wb') as f:  # open a text file
    pickle.dump(grid_geohash_to_number, f)  # serialize the list
f.close()