In [1]:
import pandas as pd
import sqlite3
import src.data_enrichment as enrich

import geopandas as gpd

In [2]:
conn = sqlite3.connect("housing_info.db")
df_master = pd.read_sql_query('SELECT * FROM df_master', conn)

t_geodata = gpd.read_file('opendata/local-area-boundary.shp')
t_geodata['geometry'] = t_geodata['geometry'].to_crs({'init': 'EPSG:4269'})

t_geodata['City'] = 'Vancouver'

  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [3]:
coords_mapping = pd.read_csv('data/coords_mapping.csv').dropna()

In [21]:
import json

with open("opendata/Municipal_Boundary.geojson") as f:
    test = json.load(f)


For Surrey

In [4]:
import json
from pyproj import Proj, transform

with open("opendata/surrey_city_boundary.json") as f:
    json_data = json.load(f)

# Define the projections
proj_26910 = Proj(init='epsg:26910')  # EPSG:26910
proj_4269 = Proj(init='epsg:4269')    # EPSG:4269

# Function to transform coordinates
def transform_coordinates(coords):
    flattened_coords = []
    for coord in coords:
        if isinstance(coord[0], list):  # Check if it's a nested list
            flattened_coords.extend(transform_coordinates(coord))  # Recursively flatten and transform
        else:
            x, y = coord
            x_new, y_new = transform(proj_26910, proj_4269, x, y)
            flattened_coords.append({'lat': y_new, 'lng': x_new})
    return flattened_coords
# Function to extract and transform relevant information from each feature
def extract_and_transform_feature_info(feature):
    properties = feature['properties']
    geometry = feature['geometry']
    coordinates = geometry['coordinates']
    transformed_coords = transform_coordinates(coordinates)
    return {
        'name': properties.get('NAME'),
        'coordinates': transformed_coords
    }

# Extract and transform data from all features
features = json_data['features']
data = [extract_and_transform_feature_info(feature) for feature in features]

# Convert to DataFrame
surrey_df = pd.DataFrame(data)

surrey_df['name'] = surrey_df['name'].str.title()
surrey_df['City'] = 'Surrey'

surrey_df = surrey_df[['name','City','coordinates']]

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x_new, y_new = transform(proj_26910, proj_4269, x, y)


In [5]:
import json
def convert_to_json(data_string):
    # Removing comments
    data_string = ''.join(i for i in data_string.splitlines(True) if not i.strip().startswith('#'))

    # Removing '\n' characters
    data_string = data_string.replace('\n', '')

    # Load as JSON
    json_data = json.loads(data_string)

    return json_data

coords_mapping['JSON'] = coords_mapping['JSON'].apply(convert_to_json)

In [6]:
from shapely.geometry import Polygon

def create_polygon(json):
    coordinates = [(coords["lng"], coords["lat"]) for coords in json]
    polygon = Polygon(coordinates)

    return polygon

coords_mapping['polygon'] = coords_mapping['JSON'].apply(create_polygon)
surrey_df['geometry'] = surrey_df['coordinates'].apply(create_polygon)

coords_df = pd.DataFrame({'name': coords_mapping.Neighborhood,
                          'geometry': coords_mapping.polygon,
                          'City': coords_mapping.City})
t_geodata = pd.concat([t_geodata,coords_df,surrey_df], axis = 0).reset_index(drop = True)

In [8]:
t_geodata = t_geodata[['name','geometry','City']]

In [9]:
# Import necessary libraries
import json
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, HoverTool, ColumnDataSource
from bokeh.transform import factor_cmap
from bokeh.palettes import Category20

# Ensure you have the 'City' column in your GeoDataFrame `t_geodata`

# Read data to JSON.
t_geodata_json = json.loads(t_geodata.to_json())
# Convert to String like object.
json_data = json.dumps(t_geodata_json)

# Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson=json_data)

# Get list of unique cities
cities = t_geodata['City'].unique().tolist()

# Create a color map
color_map = factor_cmap('City', palette=Category20[len(cities)], factors=cities)

# Initialize the figure
p = figure(title='GVA neighbourhoods', plot_height=600, plot_width=950)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

# Add patch renderer to figure.
p.patches('xs', 'ys', source=geosource,
          fill_color=color_map, line_color='white', line_width=0.25, fill_alpha=1)

# Extract points for edges from GeoJSON
edge_x = []
edge_y = []
for feature in t_geodata_json['features']:
    geometry = feature['geometry']
    if geometry['type'] == 'Polygon':
        # Exterior ring
        exterior_ring = geometry['coordinates'][0]
        for point in exterior_ring:
            edge_x.append(point[0])
            edge_y.append(point[1])
        # Interior rings (holes)
        for interior_ring in geometry['coordinates'][1:]:
            for point in interior_ring:
                edge_x.append(point[0])
                edge_y.append(point[1])

# Create a ColumnDataSource for the edge points
edge_source = ColumnDataSource(data=dict(x=edge_x, y=edge_y))

# Add Scatter markers for edge points
p.circle(x='x', y='y', source=edge_source, size=5, color='red', alpha=0.7)

# Add hover tool with custom formatting for coordinates
hover = HoverTool()
hover.tooltips = [("Coordinates", "(@x{0.000000}, @y{0.000000})"), ("City", "@City"), ("Neighbourhood", "@name")]
p.add_tools(hover)

# Display figure in Jupyter Notebook.
output_notebook()

# Display figure.
show(p)


In [None]:
test =[
{"lat": 49.251664, "lng": -122.786532},
{"lat": 49.247880, "lng": -122.786532},

{"lat": 49.247880, "lng": -122.786532},
{"lat": 49.247880, "lng": -122.782744},
{"lat": 49.246267, "lng": -122.782744},
{"lat": 49.245721, "lng": -122.783045},
{"lat": 49.244832, "lng": -122.784010},
{"lat": 49.243482, "lng": -122.785386},
{"lat": 49.242724, "lng": -122.786728},
{"lat": 49.242199, "lng": -122.787517},
{"lat": 49.241665, "lng": -122.788018}

]
# test
list(reversed(test))

asdf


In [None]:
import requests
from bs4 import BeautifulSoup

url = "https://www03.cmhc-schl.gc.ca/hmip-pimh/en/TableMapChart/TableMatchingCriteria?GeographyType=MetropolitanMajorArea&GeographyId=2410&CategoryLevel1=Population%2C%20Households%20and%20Housing%20Stock&CategoryLevel2=Household%20Income&ColumnField=HouseholdIncomeRange&RowField=Neighbourhood&SearchTags%5B0%5D.Key=Households&SearchTags%5B0%5D.Value=Number&SearchTags%5B1%5D.Key=Statistics&SearchTags%5B1%5D.Value=AverageAndMedian"

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find the table element
    table = soup.find('table')
    
    # Extract table headers
    headers = [header.text.strip() for header in table.find_all('th')]
    
    # Extract table rows
    rows = []
    for row in table.find_all('tr'):
        rows.append([cell.text.strip() for cell in row.find_all(['td', 'th'])])  # Include th for the first column

else:
    print("Failed to retrieve data. Status code:", response.status_code)

In [None]:
income_df = pd.DataFrame(rows)

income_df[0][0] = 'City'
new_header = income_df.iloc[0] #grab the first row for the header
income_df = income_df[1:] #take the data less the header row
income_df.columns = new_header #set the header row as the df header

income_df.to_csv('data/income_df.csv')

In [None]:
income_df