### Install Pygeohash library

This library provides functions for computing geohash


In [1]:
!pip install pygeohash

Collecting pygeohash
  Downloading https://files.pythonhosted.org/packages/2c/33/c912fa4476cedcd3ed9cd25c44c163583b92d319860438e6b632f7f42d0c/pygeohash-1.2.0.tar.gz
Building wheels for collected packages: pygeohash
  Building wheel for pygeohash (setup.py) ... [?25l[?25hdone
  Created wheel for pygeohash: filename=pygeohash-1.2.0-py2.py3-none-any.whl size=6162 sha256=9fd135060a03f77afc4dffc41d7c935d609d95935e56ea6a56d73eb61b13f812
  Stored in directory: /root/.cache/pip/wheels/3f/5f/14/989d83a271207dda28232746d63e737a2dbd88ea7f7a9db807
Successfully built pygeohash
Installing collected packages: pygeohash
Successfully installed pygeohash-1.2.0


### Import pygeohash, networkx and Pandas libraries

Pygeohash - functions for converting latitude, longitude to geohash and related distance measurement utilities

Networkx - functions for creating, manipulating and querying open source network graphs 

Pandas - Python functions for table manipuation

In [2]:
import pygeohash as pgh
import networkx as nx
import pandas as pd

### Connect to datasets using Google drive or local files

In [3]:
using_Google_colab = True
using_Anaconda_on_Mac_or_Linux = False
using_Anaconda_on_windows = False

In [4]:
if using_Google_colab:
    from google.colab import drive
    drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
if using_Google_colab:
  state_location = pd.read_csv('/content/drive/MyDrive/COVID_Project/input/state_lat_long.csv')
if using_Anaconda_on_Mac_or_Linux:
  state_location = pd.read_csv('../input/state_lat_long.csv')
if using_Anaconda_on_windows:
  state_location = pd.read_csv(r'..\input\state_lat_long.csv')
state_location.loc[0:5,]

Unnamed: 0,state,latitude,longitude,name
0,AK,63.588753,-154.493062,Alaska
1,AL,32.318231,-86.902298,Alabama
2,AR,35.20105,-91.831833,Arkansas
3,AZ,34.048928,-111.093731,Arizona
4,CA,36.778261,-119.417932,California
5,CO,39.550051,-105.782067,Colorado


### Apply a function call to convert Lat, Long to Geohash

In [6]:
def lat_long_to_geohash(lat_long):
  return pgh.encode(lat_long[0], lat_long[1])

In [7]:
state_location['geohash'] = state_location[['latitude', 
                                            'longitude']].apply(lat_long_to_geohash, 
                                                                axis=1)
state_location.iloc[0:10,]

Unnamed: 0,state,latitude,longitude,name,geohash
0,AK,63.588753,-154.493062,Alaska,be61srvx6dxq
1,AL,32.318231,-86.902298,Alabama,djdpy7remdyj
2,AR,35.20105,-91.831833,Arkansas,9yq8q24hgpq6
3,AZ,34.048928,-111.093731,Arizona,9w118n2j2knn
4,CA,36.778261,-119.417932,California,9qe14x68bsb0
5,CO,39.550051,-105.782067,Colorado,9xhbbzggepvg
6,CT,41.603221,-73.087749,Connecticut,drkh8vehk482
7,DC,38.905985,-77.033418,District of Columbia,dqcjr49gz322
8,DE,38.910832,-75.52767,Delaware,dqfm37d2j8z6
9,FL,27.664827,-81.515754,Florida,dhyj3h4p3383


### Truncate geohash to first two characters

In [8]:
state_location['geohash'] = state_location.geohash.str.slice(stop=2)
state_location.iloc[0:10,]

Unnamed: 0,state,latitude,longitude,name,geohash
0,AK,63.588753,-154.493062,Alaska,be
1,AL,32.318231,-86.902298,Alabama,dj
2,AR,35.20105,-91.831833,Arkansas,9y
3,AZ,34.048928,-111.093731,Arizona,9w
4,CA,36.778261,-119.417932,California,9q
5,CO,39.550051,-105.782067,Colorado,9x
6,CT,41.603221,-73.087749,Connecticut,dr
7,DC,38.905985,-77.033418,District of Columbia,dq
8,DE,38.910832,-75.52767,Delaware,dq
9,FL,27.664827,-81.515754,Florida,dh


### Find neighbors by sorting the states by 2 character geohash codes attached to each state

### Initialize Graph and create state and geohash concepts as nodes

In [9]:
GRAPH_ID = nx.DiGraph()
GRAPH_ID.add_node('state')
GRAPH_ID.add_node('geohash')

### Create a node for each state

In [10]:
state_list = state_location.state.values
for state in state_list:
  GRAPH_ID.add_node(state)
  GRAPH_ID.add_edge('state', state, label='instance')

### Create a list of unique geohash codes and create a node for each geohash

In [11]:
geohash_list = state_location.geohash.values
for geohash in geohash_list:
  GRAPH_ID.add_node(geohash)
  GRAPH_ID.add_edge('geohash', geohash, label='instance')

In [12]:
df_state_geohash = state_location[['state', 'geohash']]
for state_geohash in df_state_geohash.itertuples():
    GRAPH_ID.add_edge(state_geohash.state, state_geohash.geohash, 
                      label='located_at')
    GRAPH_ID.add_edge(state_geohash.geohash, state_geohash.state, 
                      label='locates', 
                      distance=0.0)

### Find geohash associated with California and Naveda


In [13]:
list(GRAPH_ID.neighbors('CA'))

['9q']

In [14]:
list(GRAPH_ID.neighbors('NV'))

['9q']

### Find States locsted with geohash '9q'

In [15]:
list(GRAPH_ID.neighbors('9q'))

['CA', 'NV']

### Find states located with geohah for all geohashes

In [16]:
for geohash in GRAPH_ID['geohash']:
  print("Geohash: ", geohash, "States: ", list(GRAPH_ID.neighbors(geohash)))

Geohash:  be States:  ['AK']
Geohash:  dj States:  ['AL', 'GA', 'MS']
Geohash:  9y States:  ['AR', 'KS', 'MO', 'OK']
Geohash:  9w States:  ['AZ', 'NM', 'UT']
Geohash:  9q States:  ['CA', 'NV']
Geohash:  9x States:  ['CO', 'WY']
Geohash:  dr States:  ['CT', 'MA', 'NH', 'NJ', 'NY', 'PA', 'RI', 'VT']
Geohash:  dq States:  ['DC', 'DE', 'MD', 'VA']
Geohash:  dh States:  ['FL']
Geohash:  8e States:  ['HI']
Geohash:  9z States:  ['IA', 'NE', 'SD']
Geohash:  9r States:  ['ID', 'OR']
Geohash:  dp States:  ['IL', 'IN', 'MI', 'OH', 'WI']
Geohash:  dn States:  ['KY', 'NC', 'SC', 'TN', 'WV']
Geohash:  9v States:  ['LA', 'TX']
Geohash:  f2 States:  ['ME']
Geohash:  cb States:  ['MN', 'ND']
Geohash:  c8 States:  ['MT']
Geohash:  de States:  ['PR']
Geohash:  c2 States:  ['WA']


### Task 7 Activity 1 - Find number of state and geohash nodes in a graph

In [17]:
len(list (GRAPH_ID.neighbors('geohash')))

20

In [18]:
len(list (GRAPH_ID.neighbors('state')))

52

### Connect neighboring geohash codes if the distance is less than 1,000 km

In [19]:
for geohash_1 in geohash_list:
  for geohash_2 in geohash_list:
    if geohash_1 != geohash_2:
      distance = pgh.geohash_haversine_distance(geohash_1, geohash_2)
      if distance < 1000000:
        GRAPH_ID.add_edge(geohash_1, geohash_2, label='near')

### Find path length from NY to all nodes (states and geohashes)

In [20]:
neighbor_path_length = nx.single_source_dijkstra_path_length(GRAPH_ID, 'NY', weight='distance')
neighbor_path_length

{'9q': 6,
 '9r': 5,
 '9v': 5,
 '9w': 5,
 '9x': 4,
 '9y': 4,
 '9z': 3,
 'AL': 4.0,
 'AR': 4.0,
 'AZ': 5.0,
 'CA': 6.0,
 'CO': 4.0,
 'CT': 1.0,
 'DC': 2.0,
 'DE': 2.0,
 'FL': 5.0,
 'GA': 4.0,
 'IA': 3.0,
 'ID': 5.0,
 'IL': 2.0,
 'IN': 2.0,
 'KS': 4.0,
 'KY': 3.0,
 'LA': 5.0,
 'MA': 1.0,
 'MD': 2.0,
 'ME': 2.0,
 'MI': 2.0,
 'MN': 4.0,
 'MO': 4.0,
 'MS': 4.0,
 'MT': 5.0,
 'NC': 3.0,
 'ND': 4.0,
 'NE': 3.0,
 'NH': 1.0,
 'NJ': 1.0,
 'NM': 5.0,
 'NV': 6.0,
 'NY': 0,
 'OH': 2.0,
 'OK': 4.0,
 'OR': 5.0,
 'PA': 1.0,
 'RI': 1.0,
 'SC': 3.0,
 'SD': 3.0,
 'TN': 3.0,
 'TX': 5.0,
 'UT': 5.0,
 'VA': 2.0,
 'VT': 1.0,
 'WA': 6.0,
 'WI': 2.0,
 'WV': 3.0,
 'WY': 4.0,
 'c2': 6,
 'c8': 5,
 'cb': 4,
 'dh': 5,
 'dj': 4,
 'dn': 3,
 'dp': 2,
 'dq': 2,
 'dr': 1,
 'f2': 2}

### Make a list of all nodes covered in the path length and then find those nodes which are states and less than or equal to 3 hops

In [21]:
neighbor_states = neighbor_path_length.keys()

In [22]:
state_list = (list (GRAPH_ID.neighbors('state')))
for state in state_list:
  if state in neighbor_states:
    if neighbor_path_length[state] <= 3:
      print(state)


CT
DC
DE
IA
IL
IN
KY
MA
MD
ME
MI
NC
NE
NH
NJ
NY
OH
PA
RI
SC
SD
TN
VA
VT
WI
WV


In [23]:
for state_1 in state_list:
  neighbor_path_length = nx.single_source_dijkstra_path_length(GRAPH_ID, state_1)
  neighbor_state_list = neighbor_path_length.keys()
  next_door_list = []
  for state_2 in neighbor_state_list:
    if state_1 != state_2:
      if state_2 in state_list:
        if neighbor_path_length[state_2] <=3:
          next_door_list.append(state_2)
  if next_door_list:
    print(state_1, next_door_list)

AL ['GA', 'MS', 'FL', 'KY', 'NC', 'SC', 'TN', 'WV']
AR ['KS', 'MO', 'OK', 'AZ', 'NM', 'UT', 'IA', 'NE', 'SD', 'LA', 'TX']
AZ ['NM', 'UT', 'AR', 'KS', 'MO', 'OK', 'CA', 'NV', 'CO', 'WY']
CA ['NV', 'AZ', 'NM', 'UT', 'ID', 'OR']
CO ['WY', 'AZ', 'NM', 'UT', 'IA', 'NE', 'SD', 'ID', 'OR', 'MT']
CT ['MA', 'NH', 'NJ', 'NY', 'PA', 'RI', 'VT', 'DC', 'DE', 'MD', 'VA', 'IL', 'IN', 'MI', 'OH', 'WI', 'ME']
DC ['DE', 'MD', 'VA', 'CT', 'MA', 'NH', 'NJ', 'NY', 'PA', 'RI', 'VT', 'KY', 'NC', 'SC', 'TN', 'WV']
DE ['DC', 'MD', 'VA', 'CT', 'MA', 'NH', 'NJ', 'NY', 'PA', 'RI', 'VT', 'KY', 'NC', 'SC', 'TN', 'WV']
FL ['AL', 'GA', 'MS']
GA ['AL', 'MS', 'FL', 'KY', 'NC', 'SC', 'TN', 'WV']
IA ['NE', 'SD', 'AR', 'KS', 'MO', 'OK', 'CO', 'WY', 'IL', 'IN', 'MI', 'OH', 'WI', 'MN', 'ND']
ID ['OR', 'CA', 'NV', 'CO', 'WY', 'WA']
IL ['IN', 'MI', 'OH', 'WI', 'CT', 'MA', 'NH', 'NJ', 'NY', 'PA', 'RI', 'VT', 'IA', 'NE', 'SD', 'KY', 'NC', 'SC', 'TN', 'WV']
IN ['IL', 'MI', 'OH', 'WI', 'CT', 'MA', 'NH', 'NJ', 'NY', 'PA', 'RI', 'V

In [25]:
nx.dijkstra_path(GRAPH_ID, 'NY', 'CA', weight='distance')

['NY', 'dr', 'dp', '9z', '9y', '9w', '9q', 'CA']

In [26]:
nx.dijkstra_path(GRAPH_ID, 'OR', 'CA', weight='distance')

['OR', '9r', '9q', 'CA']

In [27]:
GRAPH_ID.nodes()


NodeView(('state', 'geohash', 'AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'PR', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY', 'be', 'dj', '9y', '9w', '9q', '9x', 'dr', 'dq', 'dh', '8e', '9z', '9r', 'dp', 'dn', '9v', 'f2', 'cb', 'c8', 'de', 'c2'))

In [28]:
nx.single_source_dijkstra_path_length(GRAPH_ID, 'NY')

{'9q': 6,
 '9r': 5,
 '9v': 5,
 '9w': 5,
 '9x': 4,
 '9y': 4,
 '9z': 3,
 'AL': 5,
 'AR': 5,
 'AZ': 6,
 'CA': 7,
 'CO': 5,
 'CT': 2,
 'DC': 3,
 'DE': 3,
 'FL': 6,
 'GA': 5,
 'IA': 4,
 'ID': 6,
 'IL': 3,
 'IN': 3,
 'KS': 5,
 'KY': 4,
 'LA': 6,
 'MA': 2,
 'MD': 3,
 'ME': 3,
 'MI': 3,
 'MN': 5,
 'MO': 5,
 'MS': 5,
 'MT': 6,
 'NC': 4,
 'ND': 5,
 'NE': 4,
 'NH': 2,
 'NJ': 2,
 'NM': 6,
 'NV': 7,
 'NY': 0,
 'OH': 3,
 'OK': 5,
 'OR': 6,
 'PA': 2,
 'RI': 2,
 'SC': 4,
 'SD': 4,
 'TN': 4,
 'TX': 6,
 'UT': 6,
 'VA': 3,
 'VT': 2,
 'WA': 7,
 'WI': 3,
 'WV': 4,
 'WY': 5,
 'c2': 6,
 'c8': 5,
 'cb': 4,
 'dh': 5,
 'dj': 4,
 'dn': 3,
 'dp': 2,
 'dq': 2,
 'dr': 1,
 'f2': 2}