In [None]:
from pyrosm import OSM
import osmnx as ox
import network_bands
import geopandas as gpd
import pandas as pd
import uuid
import os
import matplotlib.pyplot as plt


In [None]:
#https://build.nisra.gov.uk/en/custom/data?d=HOUSEHOLD&v=DZ21&v=HH_LIFESTAGE_AGG15



In [None]:
#set base directory for data file paths.
base_dir = os.getcwd()

In [None]:
# create network graph and edges.
base_road_path = f'{base_dir}\\testEnvironment\\Data\\belfast_super_trimmed.osm.pbf'
G, nodes, edges = network_bands.load_osm_network(file_path=base_road_path, network_type='driving', graph_type='networkx')

In [None]:
#Start locations
start_locations = pd.read_csv(f'{base_dir}\\testEnvironment\\Data\\libraries_belfast_2024.csv')

#Ensure data is converted to a dataframe
start_locations_gdf = network_bands.csv_to_gdf(start_locations, 'X COORDINATE', 'Y COORDINATE', 29902, 4326)

In [None]:
#Create the network
start_locations_nearest_node = network_bands.nearest_node_and_name(G, start_locations=start_locations_gdf,  location_name = 'Static Library Name')
#Create service areas for each distance. Remember, these overlap.
#input custom distances as a list.
search_distances = [1000,2000,3000]
#this will print ongoing progress.
alpha_areas = network_bands.single_source_polygon(nearest_node_dict=start_locations_nearest_node, graph=G, search_distances=search_distances,
                                                  alpha_value=500, weight = 'distance', progress=True)

In [None]:
#import pointer data
pointer = gpd.read_file(f'{base_dir}\\testEnvironment\\Data\\pointer_trimmed_for_trimmed_library.shp')
#Ensures that the pointer and start location CRS are the same (should be 4326 if using osm data)
if pointer.crs != start_locations_gdf.crs:
    pointer = pointer.to_crs(start_locations_gdf.crs)
# assign each house a uuid - useful later down the line.
pointer['uuid'] = pointer.apply(lambda index: uuid.uuid4(), axis =1)


In [None]:
#Load in data zones from 2021 census
data_zones = gpd.read_file(f'{base_dir}\\testEnvironment/Data/DZ2021.shp')
#extract only belfast datazones
belfast_zones = data_zones[data_zones['LGD2014_nm'] == 'Belfast']

In [None]:
fig, ax = plt.subplots(figsize=(12,12))

edges.plot(ax=ax, zorder=2)
pointer.plot(ax=ax, color='black', zorder = 12, markersize=2)
start_locations_gdf.plot(ax=ax, color='red', zorder=13, markersize=150)


In [None]:
file_paths = [
    '/testEnvironment/Data/census_data/ni-2021-usual-residents.csv',
    '/testEnvironment/Data/census_data/ni-2021-households.csv',
    '/testEnvironment/Data/census_data/ni-2021-employment-deprivation.csv'
]

#probs move this to services.
def mass_csv_read(file_paths:list):
    """ Read function to read all CSVs and place into a dictionary of dataframes for subsequent analysis and joining.
    File paths should be from the parent folder onwards. Do not include C:/User etc.
    Parameters:
        file_paths (list): A list of file paths, each string should look like '/data/stored/here/mydata.csv'.
    """
    base_dir = os.getcwd()
    csv_loaded = {}
    for file_path in file_paths:
            filename = os.path.basename(file_path)
            key = os.path.splitext(filename)[0]
            csv_loaded[key] = pd.read_csv(base_dir+file_path)
    return csv_loaded
#extract each one from dataframe

loaded_csv = mass_csv_read(file_paths)


In [None]:
#check data is loaded loaded
print(loaded_csv.keys())

#force rename to maintain consistency of important join value column.
loaded_csv['ni-2021-employment-deprivation'].rename(columns={'Census 2021 Data Zone Code':'Geography code'}, inplace=True)

#OSNI data has irregular capitalisation. Some are 'Geography Code', 'geography Code' etc.
for key, df in loaded_csv.items():
    df.columns = df.columns.str.lower()

In [None]:
#Likely move this function to services too.
def join_ni_census(dict_of_df:dict, join_column:str, join_type='left'):
    """ Join OSNI census data geographic code. Deletes duplicated. Ensure there are not any duplicate label names.
    geography_code or whaterver the join column is should be returned as dropped from the right dataframe.
    
    Parameters: 
        dict_of_df (dict): dictionary of dataframes, a result of the mass_csv_read() function.
        join_column (str): column name to join by.
        join_type: type of join - SQL-like, see pd.merge() docs."""
    joined_df = next(iter(dict_of_df.values()))
    columns_dropped = []
    for key, df in loaded_csv.items():
        
        if df is not joined_df: #ensure it doesn't join self
            #clean the data first. using .drop_duplicated() producted awkward column names. This way is cleaner.
            columns_to_drop = []
            for column in df.columns:
                if column in joined_df.columns and column != join_column:
                    columns_to_drop.append(column)
            
            df_trimmed = df.drop(columns=columns_to_drop)
            columns_dropped.append(columns_to_drop)
            joined_df = pd.merge(joined_df, df_trimmed, on=join_column, how=join_type)
    print(f'The following columns were duplicates from the right join and not included: {columns_dropped}')
    
    return joined_df

joined_census_data = join_ni_census(loaded_csv, 'geography code', 'left')


In [None]:
joined_census_data