In [4]:
# Import libraries
import geopandas as gpd
import pandas as pd
import numpy as np
from google.colab import drive
from pathlib import Path
import sys
import os
import requests
import requests_cache
import time

# Set up request caching
requests_cache.install_cache('covid_exposure_match_unnamed_buildings')

In [5]:
drive.mount('/content/drive', force_remount = True)

# Set base path
base = Path('/content/drive/MyDrive/covid_exposure')
sys.path.append(str(base))
path = '/content/drive/MyDrive/covid_exposure/UC_Davis_Building_Footprints_2024-02-09.geojson'

Mounted at /content/drive


In [6]:
# Load files
footprints = gpd.read_file(path)
building_dict = pd.read_csv('/content/drive/MyDrive/covid_exposure/building_dictionary.csv', sep=",")
unmatched_building = pd.read_csv('/content/drive/MyDrive/covid_exposure/unmatched_buildings.csv', encoding='latin-1')

In [7]:
unmatched_building = unmatched_building[['campus_building', 'worksite', 'location']]
unmatched_series = unmatched_building['campus_building'].drop_duplicates()
unmatched_build = pd.DataFrame({"campus_building": unmatched_series})

In [8]:
# get important columns and drop NAs
footprints_coord = footprints.dropna(subset=['FullBldgNa'])[['FullBldgNa', 'LatY', 'LongX']]

# Google API

Google Maps API to get address (lat, long) of unmatched buildings -> match lat, long of footprint within a set radius -> Add FullBldgNa (footprints) to target + campus_building (unmatched_series) to building_dict -> Order bulding_dict

In [12]:
import googlemaps

In [13]:
google_api_key = 'INSERT_API_KEY'

# Initialize Google Maps Client
gmaps = googlemaps.Client(key=google_api_key)

In [14]:
#test
#test = unmatched_build[0:25]
#geocode_result = gmaps.geocode('Walter A. Robinson Welcome Center' + ' Davis CA US')

[{'address_components': [{'long_name': '550',
    'short_name': '550',
    'types': ['street_number']},
   {'long_name': 'Alumni Lane', 'short_name': 'Alumni Ln', 'types': ['route']},
   {'long_name': 'Davis',
    'short_name': 'Davis',
    'types': ['locality', 'political']},
   {'long_name': 'Yolo County',
    'short_name': 'Yolo County',
    'types': ['administrative_area_level_2', 'political']},
   {'long_name': 'California',
    'short_name': 'CA',
    'types': ['administrative_area_level_1', 'political']},
   {'long_name': 'United States',
    'short_name': 'US',
    'types': ['country', 'political']},
   {'long_name': '95616', 'short_name': '95616', 'types': ['postal_code']}],
  'formatted_address': '550 Alumni Ln, Davis, CA 95616, USA',
  'geometry': {'location': {'lat': 38.5345563, 'lng': -121.7473162},
   'location_type': 'ROOFTOP',
   'viewport': {'northeast': {'lat': 38.5358663302915,
     'lng': -121.7461258697085},
    'southwest': {'lat': 38.5331683697085, 'lng': -121.74

In [15]:
for index, row in unmatched_build.iterrows():
    print(index, row)

    # Geocode the address
    geocode_result = gmaps.geocode(row['campus_building'] + ' Davis CA US')

    # Check if result is found
    if geocode_result:
        # Extract latitude and longitude and assign to DataFrame
        unmatched_build.loc[index, 'lat'] = geocode_result[0]['geometry']['location']['lat']
        unmatched_build.loc[index, 'long'] = geocode_result[0]['geometry']['location']['lng']
    else:
        # Handle case where result is not found
        print("Geocode result not found for address:", row['campus_building'])

0 campus_building    Scrubs Café
Name: 0, dtype: object
3 campus_building    100 Dairy Road
Name: 3, dtype: object
4 campus_building    1000 Old Davis Road
Name: 4, dtype: object
5 campus_building    112 A Street
Name: 5, dtype: object
6 campus_building    1333 Research Park Drive
Name: 6, dtype: object
10 campus_building    1441 Research Park Drive
Name: 10, dtype: object
14 campus_building    1450 Drew Avenue
Name: 14, dtype: object
15 campus_building    1450 Drew Circle
Name: 15, dtype: object
16 campus_building    1515 Newton Court
Name: 16, dtype: object
20 campus_building    1544 Newton Court
Name: 20, dtype: object
29 campus_building    1605 Tilia Street
Name: 29, dtype: object
33 campus_building    1632 Da Vinci Court
Name: 33, dtype: object
34 campus_building    1632 Davinci Court
Name: 34, dtype: object
35 campus_building    1815 Research Park
Name: 35, dtype: object
36 campus_building    1850 Research Park Drive
Name: 36, dtype: object
72 campus_building    202 Cousteau Plac

# Sklearn BallTree algorithm

* Unsupervised nearest neighbor: output the nearest building in the UC Davis Footprint within a 30 meter radius using the Harvestine metric
* Harvestine metric: based on a sphere
* Sklearn only supports radians - need to convert lat/long coordinates
* https://scikit-learn.org/stable/modules/neighbors.html

In [24]:
from sklearn.neighbors import BallTree

# sklearn require radians
footprints_radians = np.radians(footprints_coord[["LatY", "LongX"]].values)

# Build BallTree for UC Davis building footprints
tree = BallTree(footprints_radians, metric='haversine')

In [25]:
# Convert distance to radians
test_radius = 30 # meters
earth_radius = 6371000 # meters
radius = test_radius / earth_radius

In [26]:
# Unmatched coord
locations_radians = np.radians(unmatched_build[["lat", "long"]].values)

In [27]:
# Query nearest neighbors within the radius for each building
ind, results = tree.query_radius(X = locations_radians, r=radius, return_distance=True)

In [28]:
#print(ind)
#print(results * earth_radius)

In [40]:
# Initialize a list to store the results
results_list = []

# Iterate over each query result
for index, (indices, distances) in enumerate(zip(ind, results)):
    if len(indices) > 0:

        # find index of the closest building
        closest_building_index = indices[0]

        # calculate distance to the closest building
        closest_building_distance = distances[0] * earth_radius

        # get name of the closest building from footprints
        closest_building_name = footprints_coord['FullBldgNa'].iloc[closest_building_index]

        # get name of the unmatched campus building
        unmatched_building_name = unmatched_build.iloc[index]['campus_building']

        # Append results to the list
        results_list.append({'target': closest_building_name,
                             'variation': unmatched_building_name,
                             'distance': closest_building_distance})
    else:
        # when no neighbors are found within the radius
        unmatched_building_name = unmatched_build.iloc[index]['campus_building']
        results_list.append({'target': None,
                             'variation': unmatched_building_name,
                             'distance': None})

# convert list of dictionaries into a df
results_df = pd.DataFrame(results_list)
results_df

Unnamed: 0,target,variation,distance
0,Scrubs Cafe,Scrubs Café,12.051592
1,,100 Dairy Road,
2,Animal Resource Service R2,1000 Old Davis Road,28.222169
3,Davis 116 A Street,112 A Street,16.091604
4,Administrative Services West,1333 Research Park Drive,21.766394
...,...,...,...
233,West Village Sol 110 A Sage Street,West Village - 1715 Tilia Street,5.979258
234,West Village Sol 110 A Sage Street,West Village 215 Sage Street,5.979258
235,,West Village Square & Community College,
236,West Village Sol 215 Sage Street,Western Cooling Efficiency Center,6.047015


In [82]:
print("Manual input:", results_df['distance'].isnull().sum(), ", Percentage:", results_df['distance'].isnull().sum()/results_df.shape[0])

Manual input: 128 , Percentage: 0.5378151260504201


In [83]:
# test
footprints[footprints["StreetAddr"].fillna("").str.contains("Alumni")][['LatY', 'LongX']]
# 38.5345563, 'lng': -121.7473162

Unnamed: 0,LatY,LongX
1131,38.535195,-121.748216
1293,38.534811,-121.747233
1306,38.534535,-121.746942


# Append target, variation into building_dictionary.csv

* Append
* Order in ascending order
* Add unmatched buildings with "", variation for manual input

In [75]:
# append
final_building_dict = pd.concat([building_dict, results_df[['target', 'variation']]]) # .append() method depreciated
final_building_dict = final_building_dict.sort_values(by = 'target')
final_building_dict

Unnamed: 0,target,variation
0,260 Cousteau,260 Cousteau Place
1,Academic Surge Building,Academic Surge
2,Academic Surge Building,Academic Surge HRVIP Lab
9,Activities and Recreation Center,Activities Recreation & Center
7,Activities and Recreation Center,ARC
...,...,...
227,,Veterinary Medicine MPT
228,,Veterinary Medicine Teaching Hospital
229,,Veterinary Medicine VM3A
230,,Veterinary Small Animal Clinic


In [78]:
# export building_dict.csv into drive
final_building_dict.to_csv('/content/drive/MyDrive/covid_exposure/building_dictionary_1.csv', index=False)

# export unmatched_build into drive
# so API doesn't have to be run again
unmatched_build.to_csv('/content/drive/MyDrive/covid_exposure/unmatched_building_coordinates.csv', index=False)