In [59]:
import geopandas as gpd
import fiona
import folium
import json
from shapely.geometry import Point
from geopy.geocoders import Nominatim
from pyproj import Transformer
import pandas as pd
import warnings
import requests


In [60]:
# Path to the geodatabase
gdb_path = "property_data/Non_Sensitive.gdb"

In [62]:
# List all available layers in the geodatabase
layers = fiona.listlayers(gdb_path)
print("Available layers:", layers)

# Load a specific layer
layer_name = layers[0]
gdf = gpd.read_file(gdb_path, layer=layer_name)

# Display various aspects the GeoDataFrame
print(len(gdf))
print(gdf.head())
print(gdf.info())
print(gdf.columns)
print(gdf.describe())

Available layers: ['GEOGRAPHIC_LOT_FABRIC']


  return ogr_read(


292834
       OGF_ID   CLASS_SUBTYPE  CLASS_SUBTYPE_NUM LOT_IDENT CONCESSION_IDENT  \
0  54816323.0  Road Allowance               2608      None                    
1  54816326.0  Road Allowance               2608      None                    
2  54816386.0  Road Allowance               2608      None                    
3  54816407.0  Road Allowance               2608      None                    
4  54816416.0  Road Allowance               2608      None                    

  GEOGRAPHIC_TOWNSHIP_NAME ROAD_ALLOWANCE_STATUS_FLG RELATED_FEATURE_DESCR  \
0             SOUTH CROSBY                      None                  None   
1             SOUTH CROSBY                      None                  None   
2             SOUTH CROSBY                      None                  None   
3             SOUTH CROSBY                      None                  None   
4             SOUTH CROSBY                      None                  None   

   LOCATION_ACCURACY LOCATION_DESCR VERIFICATION_

In [58]:
for index, row in gdf.iterrows():
    print(f"Index: {index}")
    print(f"Geometry: {row['geometry']}")  # The geometry column
    print(f"Attributes: {row}")  # Access all attributes for that row
    print()  # Add spacing between rows for clarity

    if index > 2:
        break

print()
for g, geometry in enumerate(gdf['geometry']):
    print(g, geometry)
    if g > 2:
        break

Index: 0
Geometry: POINT (-75.2246660730139 45.845085305433884)
Attributes: id_provinc                          80090037981585000000000
code_mun                                              80090
arrond                                                 None
rejet                                                     0
date_entree                                            2021
mat18                                    037981585000000000
geometry       POINT (-75.2246660730139 45.845085305433884)
Name: 0, dtype: object

Index: 1
Geometry: POINT (-75.21270721326675 45.84037760642377)
Attributes: id_provinc                          80090047876934100000000
code_mun                                              80090
arrond                                                 None
rejet                                                     0
date_entree                                            2021
mat18                                    047876934100000000
geometry       POINT (-75.21270721326675 45.

In [14]:
# Step 1: Check the CRS of the GeoDataFrame
print(gdf.crs)  # Should be EPSG:3005 or similar projected CRS

# Step 2: Calculate the centroid in the original CRS (EPSG:3005 or projected CRS)
index = 5
centroid_projected = gdf.geometry.centroid[index]  # Calculate centroid in original projected CRS

# Step 3: Create a GeoDataFrame from the centroid point
centroid_gdf = gpd.GeoDataFrame(geometry=[centroid_projected], crs=gdf.crs)

# Step 4: Reproject the centroid to WGS 84 (EPSG:4326 for latitude/longitude)
centroid_wgs84 = centroid_gdf.to_crs(epsg=4326)

# Step 5: Extract the (longitude, latitude) coordinates
center = centroid_wgs84.geometry[0].coords[0]  # Get lon, lat directly
print(center)  # This will now be in (longitude, latitude) format


EPSG:4326



  centroid_projected = gdf.geometry.centroid[index]  # Calculate centroid in original projected CRS


(-96.28618369921544, 51.18831957694115)


In [67]:
pids = gdf.PID
pids = pids[~pids.isna()]

print(len(pids))

1519890


In [20]:
# Convert to GeoJSON if needed
# gdf.to_file('bc_parcel_polygons.geojson', driver='GeoJSON')


In [68]:
def get_geocode_data(address, api_key):
    # Define the Geocoding API endpoint
    endpoint = "https://maps.googleapis.com/maps/api/geocode/json"
    
    # Define the parameters for the API request
    params = {
        'address': address,
        'key': api_key
    }
    
    # Make the request to the Geocoding API
    response = requests.get(endpoint, params=params)

    print(response)
    
    # Parse the JSON response
    if response.status_code == 200:
        geocode_data = response.json()
        
        if geocode_data['status'] == 'OK':
            # Extract the location details
            latitude = geocode_data['results'][0]['geometry']['location']['lat']
            longitude = geocode_data['results'][0]['geometry']['location']['lng']
            address_components = geocode_data['results'][0]['address_components']
            
            # Return the results
            return latitude, longitude, address_components
        else:
            print("Error:", geocode_data['status'])
            return None
    else:
        print("Request failed with status code:", response.status_code)
        return None

def processAddressComponents(address_components):
    processed_address_components = {}
    for comp in address_components:
        processed_address_components[comp['types'][0]] = comp['long_name']
    return processed_address_components

def filterGDF(longitude, latitude, processed_address_components):
    if 'administrative_area_level_2' in processed_address_components:
        filtered_gdf = gdf[gdf.processed_district == processed_address_components['administrative_area_level_2'].lower()]
    else:
        raise AttributeError('Processed address components missing administrative_area_level_2')

    if 'administrative_area_level_3' in processed_address_components:
        filtered_gdf = filtered_gdf[filtered_gdf.processed_municipality == processed_address_components['administrative_area_level_3'].lower()]
    else:
        warnings.warn('administrative_area_level_3 missing from processed address components. Compression might be lower resulting in longer filtering times')
        
    filtered_gdf = filtered_gdf.to_crs(epsg=4326) #convert to longitude and latitude
    target_point = Point(longitude, latitude)
    filtered_gdf['distance'] = filtered_gdf.geometry.distance(target_point)
    
    # Sort to get minimum
    filtered_gdf = filtered_gdf.sort_values(by = ['distance'])
    # Alternatively, could get min with 
    # filtered_gdf.loc[filtered_gdf['distance'].idxmin()]
    
    filtered_gdf = filtered_gdf.reset_index(drop=True)

    print(f"Total: {len(gdf)}")
    print(f"Remaining: {len(filtered_gdf)}")
    print(f"Compression: {len(filtered_gdf)/len(gdf) * 100}%")
    print(f"Closest point distance:", filtered_gdf.loc[0,'distance'])
    return filtered_gdf

def style_function(feature):
    return {
        'fillColor': 'blue',  # Fill color for the polygon
        'color': 'black',    # Border color for the polygon
        'weight': 5,          # Border weight
        'fillOpacity': 0.8,   # Opacity of the fill
    }

def displayMap(GDF, index = None, pid = None):
    length = len(GDF)
    if length == 1:
        loc = GDF
    elif length > 1 and index is not None and pid is None:
        loc = GDF[GDF.index == index] #This type of indexing to keep it as a geopandas obj
    elif length > 1 and index is None and pid is not None:
        loc = GDF[GDF['PID'] == pid]
    elif length > 1 and index is None and pid is None:
        raise ValueError("One of index or pid must be provided to displayMap")
    elif length > 1 and index is not None and pid is not None:
        raise ValueError("Only one of index or pid must be provided to displayMap")

    # Convert all datetime columns to strings for JSON serialization
    for col in loc.columns:
        if pd.api.types.is_datetime64_any_dtype(loc[col]):
            loc.loc[:, col] = loc[col].astype(str)
    
    centroid = loc.geometry.centroid.iloc[0]
    longitude = centroid.x
    latitude = centroid.y
    loc_geojson = json.loads(loc.to_json())

    # fields = ['PID','OWNER_TYPE','MUNICIPALITY','REGIONAL_DISTRICT','FEATURE_AREA_SQM'] #for BC
    fields = []
    # print(loc_geojson)
    
    m = folium.Map(location=[latitude, longitude], zoom_start=16)
    folium.GeoJson(
        loc_geojson,
        style_function=style_function,
        tooltip=folium.GeoJsonTooltip(fields=fields)  # Change this field based on your data
    ).add_to(m)
    
    # Save and display the map
    # m.save('filtered_gdb_map.html')
    display(m)

In [72]:
displayMap(gdf, index = 190000)

  loc.loc[:, col] = loc[col].astype(str)
  loc.loc[:, col] = loc[col].astype(str)
  loc.loc[:, col] = loc[col].astype(str)

  centroid = loc.geometry.centroid.iloc[0]


In [73]:
# Sample usage
api_key = 'AIzaSyDR116R2PRPMu81WsGWkLR6j5sMoB8PT_0' #FILL IN JUST NOT WHEN UPLOADING TO GITHUB
address = '35 arklow place, london ontario'
geocode_info = get_geocode_data(address, api_key)

if geocode_info:
    latitude, longitude, address_components = geocode_info    
    processed_address_components = processAddressComponents(address_components)
    print(processed_address_components)
    
    filtered_gdf = filterGDF(longitude, latitude, processed_address_components)
    displayMap(filtered_gdf, index=0)

<Response [200]>
{'street_number': '35', 'route': 'Arklow Place', 'neighborhood': 'Fanshawe', 'locality': 'London', 'administrative_area_level_3': 'London', 'administrative_area_level_2': 'Middlesex County', 'administrative_area_level_1': 'Ontario', 'country': 'Canada', 'postal_code': 'N5X 0B8'}


AttributeError: 'GeoDataFrame' object has no attribute 'processed_district'

In [74]:
gdf.columns

Index(['OGF_ID', 'CLASS_SUBTYPE', 'CLASS_SUBTYPE_NUM', 'LOT_IDENT',
       'CONCESSION_IDENT', 'GEOGRAPHIC_TOWNSHIP_NAME',
       'ROAD_ALLOWANCE_STATUS_FLG', 'RELATED_FEATURE_DESCR',
       'LOCATION_ACCURACY', 'LOCATION_DESCR', 'VERIFICATION_STATUS_FLG',
       'VERIFICATION_STATUS_DATE', 'SYSTEM_CALCULATED_AREA',
       'USER_CALCULATED_METRIC', 'GENERAL_COMMENTS',
       'GEOMETRY_UPDATE_DATETIME', 'EFFECTIVE_DATETIME', 'Shape_Length',
       'Shape_Area', 'geometry'],
      dtype='object')

In [84]:
gdf.LOCATION_DESCR

0         None
1         None
2         None
3         None
4         None
          ... 
292829    None
292830    None
292831    None
292832    None
292833    None
Name: LOCATION_DESCR, Length: 292834, dtype: object

In [107]:
gdf['processed_district'] = gdf['REGIONAL_DISTRICT'].str.lower()

# Define the words to remove (in lowercase)
words_to_remove = ['regional', 'district', 'of']

# Create a regex pattern that matches any of the words to remove
pattern = r'\b(?:' + '|'.join(words_to_remove) + r')\b'

# Remove the specified words from the REGIONAL_DISTRICT column
gdf['processed_district'] = gdf['processed_district'].str.replace(pattern, '', regex=True).str.strip()


In [260]:
gdf['processed_municipality'] = gdf['MUNICIPALITY'].str.lower()

# Define the words to remove (in lowercase)
words_to_remove = ['the','corporation','city', 'of','township','district','village']

# Create a regex pattern that matches any of the words to remove
pattern = r'\b(?:' + '|'.join(words_to_remove) + r')\b'

# Remove the specified words from the REGIONAL_DISTRICT column
gdf['processed_municipality'] = gdf['processed_municipality'].str.replace(pattern, '', regex=True).str.strip()
gdf['processed_municipality'] = gdf['processed_municipality'].str.replace(',', '', regex=True).str.strip()


In [8]:
import pandas as pd

In [9]:
d1 = pd.read_csv('landwise-dashboard/public/demo/weather2017-2019.csv')
d2 = pd.read_csv('landwise-dashboard/public/demo/weather.csv')

In [10]:
d = pd.concat([d1,d2])

In [11]:
d.drop_duplicates(inplace = True)
d.sort_values(by=['datetime'],inplace = True)


In [18]:
d.to_csv('weather.csv', index=False)  


In [17]:
len(d.datetime.unique())/365

7.736986301369863