In [21]:
import pandas as pd
import sqlite3
import ast

In [22]:
conn = sqlite3.connect("housing_info.db")
df_master = pd.read_sql_query('SELECT * FROM df_master', conn)

full_map_coords = pd.read_csv('data//full_map_coordinates.csv', index_col = [0])

In [None]:
df_master['geocode_json'] = df_master['geocode_json'].apply(ast.literal_eval)

In [51]:
df_master['geocode_json'][0]

[{'address_components': [{'long_name': '1301',
    'short_name': '1301',
    'types': ['subpremise']},
   {'long_name': '4488', 'short_name': '4488', 'types': ['street_number']},
   {'long_name': 'Juneau Street',
    'short_name': 'Juneau St',
    'types': ['route']},
   {'long_name': 'Coquitlam',
    'short_name': 'Coquitlam',
    'types': ['locality', 'political']},
   {'long_name': 'Metro Vancouver',
    'short_name': 'Metro Vancouver',
    'types': ['administrative_area_level_2', 'political']},
   {'long_name': 'British Columbia',
    'short_name': 'BC',
    'types': ['administrative_area_level_1', 'political']},
   {'long_name': 'Canada',
    'short_name': 'CA',
    'types': ['country', 'political']},
   {'long_name': 'V5C 0M4',
    'short_name': 'V5C 0M4',
    'types': ['postal_code']}],
  'formatted_address': '4488 Juneau St #1301, Coquitlam, BC V5C 0M4, Canada',
  'geometry': {'location': {'lat': 49.26337940000001, 'lng': -123.0037234},
   'location_type': 'ROOFTOP',
   'viewpo

In [46]:
df_master['geocode_json'][0][0]['geometry']['location']

{'lat': 49.26337940000001, 'lng': -123.0037234}

In [52]:
def extract_lat_lon(row):
    address_data = row['geocode_json'][0]['geometry']
    if isinstance(address_data, dict):
        location = address_data.get('location', {})
        lat = location.get('lat')
        lng = location.get('lng')
        return lat, lng
    else:
        return None, None

# Apply the extract_lat_lon function and create new columns
df_master['latitude'], df_master['longitude'] = zip(*df_master.apply(extract_lat_lon, axis=1))


In [55]:
df_master[['latitude','longitude']]

Unnamed: 0,latitude,longitude
0,49.263379,-123.003723
1,49.220089,-122.950739
2,49.250485,-122.900122
3,49.274787,-122.981492
4,49.265688,-123.003635
...,...,...
14910,49.279445,-123.119668
14911,49.256891,-123.097177
14912,49.209577,-123.139504
14913,49.266163,-123.095995


In [103]:
from shapely.geometry import Point
from shapely.wkt import loads

def create_location_pt(row):
    return Point(row['longitude'],row['latitude'])

df_master['location_pt'] = df_master.apply(create_location_pt, axis = 1)
# full_map_coords['geometry'] = full_map_coords['geometry'].apply(loads)

def point_in_polygons(point, polygons, names):
    nearest_polygon = None
    min_distance = float('inf')

    for polygon, name in zip(polygons, names):
        if point.within(polygon):
            return name
        # Calculate the distance from the point to the polygon's boundary
        distance = point.distance(polygon.boundary)
        # Update nearest polygon if distance is smaller
        if distance < min_distance:
            nearest_polygon = name
            min_distance = distance
    
    # Return the name of the nearest polygon if one exists
    if nearest_polygon is not None:
        return nearest_polygon
    else:
        return None  # No polygon found within approximation

df_master['Polygon_Name'] = df_master.apply(lambda row: point_in_polygons(row['location_pt'], full_map_coords['geometry'], full_map_coords['name']), axis=1)

  return lib.distance(a, b, **kwargs)


In [104]:
df_master['Polygon_Name'].isna().sum() #681

0

In [95]:
df_master[pd.isnull(df_master['Polygon_Name'])].head()

Unnamed: 0,mls_number,address,city,home_type,yr_built,home_age,garage,garage_size,taxes,avg_price_sqft,...,difference_in_days,price_difference_abs,price_difference_pct,geocode_json,postal_code,index_col,latitude,longitude,location_pt,Polygon_Name
3086,R2614946,6 - 315 Highland Way,coquitlam,Townhouse,1978.0,46.0,No,2.0,"$1,514",$693,...,-680.0,27200.0,4.542418,"[{'address_components': [{'long_name': '6', 's...",V3H 3V6,3086,49.281295,-122.822238,POINT (-122.8222377 49.2812953),
3615,R2686498,305 - 1120 Tsatsu Shores Drive,delta,Apartment/Condominium,1996.0,28.0,Yes,1.0,"$2,434",$907,...,127.0,-49000.0,-6.132666,"[{'address_components': [{'long_name': '305', ...",V4M 4G3,3615,49.023328,-123.101063,POINT (-123.1010628 49.02332759999999),
3772,R2652477,308 - 1120 Tsatsu Shores Drive,delta,Apartment/Condominium,1996.0,28.0,Yes,2.0,"$2,434",$907,...,24.0,0.0,0.0,"[{'address_components': [{'long_name': '308', ...",V4M 4G3,3772,49.023328,-123.101063,POINT (-123.1010628 49.02332759999999),
4534,R2755942,12 - 23080 Dyke Road,richmond,House,1980.0,44.0,No,1.0,$200,$423,...,161.0,0.0,0.0,"[{'address_components': [{'long_name': '12', '...",V6V 1E1,4534,49.168941,-122.967189,POINT (-122.9671893 49.1689413),
4612,R2717875,5 - 23000 Dyke Road,richmond,House,2020.0,4.0,No,1.0,,$241,...,151.0,-6000.0,-1.851852,"[{'address_components': [{'long_name': '5', 's...",V6V 2H3,4612,49.168804,-122.967478,POINT (-122.9674779 49.16880440000001),
