In [64]:
import os
from pyrosm import OSM
import osmnx as ox
import geopandas as gpd
import pandas as pd
import uuid
import matplotlib.pyplot as plt
import folium
import branca.colormap as cm
import json
#project specific packages
import services.network_bands as network_bands
import services.batch_csv as batch_csv
import services.census_merge as census_merge
import services.pandas_aux as pdaux

In [None]:
#https://build.nisra.gov.uk/en/custom/data?d=HOUSEHOLD&v=DZ21&v=HH_LIFESTAGE_AGG15

In [None]:
#set base directory for data file paths.
base_dir = os.getcwd()

In [None]:
# create network graph and edges.
base_road_path = f'{base_dir}\\testEnvironment\\Data\\belfast_slightly_trimmed.osm.pbf'
G, nodes, edges = network_bands.load_osm_network(file_path=base_road_path, network_type='driving', graph_type='networkx')

In [None]:
#Start locations
start_locations = pd.read_csv(f'{base_dir}\\testEnvironment\\Data\\libraries_belfast_2024.csv')
print(start_locations.columns)
#Ensure data is converted to a dataframe
start_locations_gdf = network_bands.csv_to_gdf(start_locations, 'X COORDINATE', 'Y COORDINATE', 29902, 4326)

In [None]:
#Network graph creation.
#Create the network areas and contours - Considering making this into a Class with basic GUI, but for now fine as this.
start_locations_nearest_node = network_bands.nearest_node_and_name(G, start_locations=start_locations_gdf,  
                                                                   location_name = 'Static Library Name')

#Create service areas for each distance. Remember, these overlap, good for counting checking how close 
#input custom distances as a list.
search_distances = [1000,2000,3000]
#Create individual network areas around start locations. alpha area of 500 quite good for Belfast
network_areas = network_bands.network_areas(nearest_node_dict=start_locations_nearest_node, graph=G, 
                                                    search_distances=search_distances, alpha_value=500, weight = 'length', progress=True)
#Create network area contours by dissolving and differencing polygons based on distance.
network_contours = network_bands.network_contours(network_areas, dissolve_cat='distance',aggfunc='first', show_graph=True)

In [None]:
#Load in data zones from 2021 census
#Ensure evrything's in 4326 for network analysis, probably can change it back to tm65.
data_zones = gpd.read_file(f'{base_dir}\\testEnvironment\\Data\\DZ2021.shp')
data_zones.to_crs(4326, inplace=True)
#extract only belfast datazones
belfast_zones = data_zones[data_zones['LGD2014_nm'] == 'Belfast']

#Load in house data 
pointer = gpd.read_file(f'{base_dir}\\testEnvironment\\Data\\pointer_randomised.shp')
pointer['uuid'] = pointer.apply(lambda i: uuid.uuid4(), axis=1)
pointer.to_crs(4326, inplace=True)
belfast_zones.to_crs(pointer.crs, inplace=True)

In [None]:
# Perform a spatial join of pointer households and datazones in Belfast to calculate households in each datazone
joined_gdf = gpd.sjoin(pointer, belfast_zones, how='left', predicate='intersects' )
#number of points found within each datazone
datazone_pointer_count = joined_gdf.groupby('DZ2021_cd')['DZ2021_cd'].count().rename('actual_households').reset_index()
belfast_zones = pd.merge(belfast_zones, datazone_pointer_count, how = 'left')

In [None]:
#Load the Census data, file_paths in file_paths.
file_paths = [
    '/testEnvironment/Data/census_data/ni-2021-usual-residents.csv',
    '/testEnvironment/Data/census_data/ni-2021-households.csv',
    '/testEnvironment/Data/census_data/ni-2021-employment-deprivation.csv'
]
#extract each one from dataframe
loaded_csv = batch_csv.batch_csv_read(file_paths)

#check data is loaded loaded
print(loaded_csv.keys())

#force rename to maintain consistency of important join value column.
loaded_csv['ni-2021-employment-deprivation'].rename(columns={'Census 2021 Data Zone Code':'Geography code',
                                                             'Count':'employment_deprivation_count'}, inplace=True)

#OSNI data has irregular capitalisation, avoids this bug by forcing lower case. Some are 'Geography Code', 'geography Code' etc.
# Need to incorporate this properly into function
for key, df in loaded_csv.items():
    df.columns = df.columns.str.lower()
    
joined_census_data = census_merge.join_census_csv(loaded_csv, 'geography code',  drop=True,join_type='left')
#dropping some extraneous columns as they are not needed and clutter the dataset
joined_census_data.drop(columns=['household deprivation (employment) code','household deprivation (employment) label'], inplace=True)

In [None]:
# Merge the data zones with the 
belfast_zones_census = pd.merge(belfast_zones, joined_census_data, left_on='DZ2021_cd', right_on='geography code', how='left', suffixes=('_left', '_right'))

# Drop the duplicate columns from the merged dataframe
census_merge.drop_dupe_cols(belfast_zones_census, ('_left', '_right'))

In [None]:
#Calculate CENSUS METRICS PER HOUSE in pointer data
#join the census data loaded 
#Need to force these to numeric. Ensure coerce for any nulls
belfast_zones_census['all households'] = pd.to_numeric(belfast_zones_census['all households'], errors = 'coerce')
belfast_zones_census['all usual residents'] = pd.to_numeric(belfast_zones_census['all usual residents'], errors = 'coerce')
## Calculate your metrics. Average resident per house etc.
# actual household data has been extracted by spatial join of the randomised poitner dataset. 
#average residents per household
belfast_zones_census['avg_resi_house'] = (belfast_zones_census['all usual residents'] / belfast_zones_census['actual_households'])
#actual residents based off pointer
belfast_zones_census['actual_total_residents'] = (belfast_zones_census['avg_resi_house'] * belfast_zones_census['actual_households'])
#average number of employment deprived people per household. - Super relevant for this analysis.
belfast_zones_census['avg_emp_dep_per_house'] = (belfast_zones_census['employment_deprivation_count'] / belfast_zones_census['actual_households'])*belfast_zones_census['avg_resi_house']
#average number of employmenet deprived people per resident.
belfast_zones_census['avg_emp_dep_per_capita'] = (belfast_zones_census['employment_deprivation_count'] / belfast_zones_census['all usual residents'])*belfast_zones_census['avg_resi_house']

#Force to gdf, currently will be a panda series
belfast_zones_census = gpd.GeoDataFrame(belfast_zones_census)


In [None]:
# Spatial join to find which network contour each household falls into
households_with_contour = gpd.sjoin(pointer, network_contours, how="left", predicate="within")
households_with_contour = households_with_contour.drop(columns='index_right').reset_index(drop=True)
# Assuming 'distance' is the column in network_contours indicating the distance threshold
households_with_contour['distance'] = households_with_contour['distance']

# Spatial join to attach zone information to each household
households_with_zones = gpd.sjoin(households_with_contour, belfast_zones_census, how="left", predicate="within").drop(columns='index_right').reset_index(drop=True)

# Group by census zone and distance, then count the number of households
household_counts = households_with_zones.groupby(['DZ2021_cd', 'distance']).size().unstack(fill_value=0)

# Join this back to the belfast_census_zones to add the counts as new columns
belfast_census_zones = belfast_zones_census.merge(household_counts, on='DZ2021_cd', how='left')

pdaux.fill_na_with_zero(belfast_census_zones, ['1000.0','2000.0','3000.0'])


In [66]:
#Numbers will be off due to randomisation of pointer dataset.
belfast_census_data_geojson = json.loads(belfast_census_zones.to_json())

# Create the Folium map centered around the average coordinates of your geometries, can ignore warning
map_center = belfast_census_zones.geometry.centroid.unary_union.centroid
m = folium.Map(location=[map_center.y, map_center.x], zoom_start=12)

# define style



# Create a color scale
# Convert column names to string if they are not
belfast_census_zones.columns = belfast_census_zones.columns.astype(str)

# Now access the column
min_households = belfast_census_zones['1000.0'].min()
max_households = belfast_census_zones['1000.0'].max()

color_scale = cm.linear.YlOrRd_09.scale(min_households, max_households)

def households_1000_style(feature):
    # Correctly access properties within the 'properties' key of the feature
    households_1000 = feature['properties'].get('1000.0', 0)  # Provide a default value in case key is missing
    return {
        'fillColor': color_scale(households_1000),
        'color': 'black',  # Outline color
        'weight': 1,  # Outline weight
        'fillOpacity': 0.7
    }

#defines the highlight colour
def highlight_function(feature):
    return {
        'fillColor': '#green',
        'color': 'green',
        'weight': 3,
        'fillOpacity': 0.25
    }

# Creates a geojson layer to then be added to the folium map
belfast_1000m_count = folium.GeoJson(
    belfast_census_data_geojson,
    name='Houses within 1000m',
    style_function=households_1000_style,  # Use the updated style function
    highlight_function=highlight_function,
    tooltip=folium.GeoJsonTooltip(
        fields=['DZ2021_cd', 'DZ2021_nm', 'actual_households', '1000.0','2000.0', '3000.0'],
        aliases=['Data Zone:', 'Data Zone Name:', 'Households:', 'Households within 1km:', 'Households within 2km:', 'Households within 3km:'],
        localize=True
    )
)
belfast_1000m_count.add_to(m)

color_scale.caption = 'Households within 1000m'
color_scale.add_to(m)

# layer control
folium.LayerControl().add_to(m)

# Save the map to an HTML file
m.save('test.html', cdn_resources='cdn')

m


  map_center = belfast_census_zones.geometry.centroid.unary_union.centroid
