In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import networkx as nx
import shapely
import folium
import geojson

In [2]:
# Defining classes for the dataframes
class AmenityPoint:
    def __init__(self, geometry, lat, lon, amenity, name, addr_city):
        self.geometry = geometry
        self.lat = lat
        self.lon = lon
        self.amenity = amenity
        self.name = name
        self.addr_city = addr_city

class AmenityPolygon:
    def __init__(self, geometry, lat, lon, amenity, name, addr_city):
        self.geometry = geometry
        self.lat = lat
        self.lon = lon
        self.amenity = amenity
        self.name = name
        self.addr_city = addr_city

In [3]:
# Read the building footprints data

buildingfootprints_gdf = gpd.read_file('manila_building_footprints.geojson')

buildingfootprints_gdf.head()

Unnamed: 0,id,geometry
0,0,"POLYGON ((120.95954 14.62190, 120.95945 14.621..."
1,1,"POLYGON ((120.96084 14.62342, 120.96095 14.623..."
2,2,"POLYGON ((120.96181 14.60838, 120.96172 14.608..."
3,3,"POLYGON ((120.96087 14.62855, 120.96103 14.628..."
4,4,"POLYGON ((120.96158 14.62105, 120.96162 14.621..."


In [4]:
# Load the Manila amenities data into a Geopandas dataframe
from shapely import wkt

manila_amenities_df = pd.read_csv('manila_amenities.csv')
manila_amenities_df['geometry'] = manila_amenities_df['geometry'].apply(wkt.loads)
manila_amenities_gdf = gpd.GeoDataFrame(manila_amenities_df, crs='epsg:4326')

# Separate into point and polygon dataframes
manila_amenities_polygon_gdf = manila_amenities_gdf[manila_amenities_gdf['geometry'].geom_type == 'Polygon']
manila_amenities_point_gdf = manila_amenities_gdf[manila_amenities_gdf['geometry'].geom_type == 'Point']
manila_amenities_multipoly_gdf = manila_amenities_gdf[manila_amenities_gdf['geometry'].geom_type == 'MultiPolygon']

# Append multipolygons to the polygon dataframe
manila_amenities_polygon_gdf = gpd.GeoDataFrame(pd.concat([manila_amenities_polygon_gdf, manila_amenities_multipoly_gdf], ignore_index=True))

# Reset point dataframe index
manila_amenities_point_gdf.reset_index(drop=True, inplace=True)

# Add a column to the polygon dataframe to store a list of Amenity Points within the polygon
manila_amenities_polygon_gdf['amenity_points'] = None

In [5]:
# For each polygon in the polygon dataframe, find all the points from the point dataframe lying inside that polygon
# Store the list of points in the 'amenity_points' column of the polygon dataframe as a list of point indices
for i, polygon in manila_amenities_polygon_gdf.iterrows():
    points_within_polygon = []

    for j, point in manila_amenities_point_gdf.iterrows():
        try:
            if polygon['geometry'].intersects(point['geometry']):
                # Append the index of the current point
                points_within_polygon.append(j)
        except Exception as e:
            print(f"Error processing polygon {i} point {j}: {e}")
    manila_amenities_polygon_gdf.at[i, 'amenity_points'] = points_within_polygon

In [6]:
manila_amenities_polygon_gdf['amenity'].unique()

array(['education', 'finance', 'government offices', 'grocery', 'health',
       'malls', 'residential areas', 'security'], dtype=object)

In [7]:
# Buckle up. We're trying to create a network out of this monstrosity of a dataframe

# Create a networkx graph

def create_network(amenities_polygon_gdf, amenities_point_gdf):
    amenities_network = nx.Graph()

   # Add polygon nodes
    for index, row in amenities_polygon_gdf.iterrows():
        # Check if essential columns exist in the row
        if 'geometry' in row and 'amenity' in row and 'name' in row and 'addr_city' in row and 'amenity_points' in row:
            # Generate a unique node identifier for polygons
            node_id = f"polygon_{index}"
            amenities_network.add_node(node_id, polygon_index=index, geometry=row['geometry'], lat=row['geometry'].centroid.y, lon=row['geometry'].centroid.x, amenity=row['amenity'], name=row.get('name', ''), addr_city=row['addr_city'], amenity_points=row['amenity_points'])
        else:
            print(f"Skipping row {index} in amenities_polygon_gdf due to missing data.")

    # Add point nodes
    for index, row in amenities_point_gdf.iterrows():
        # Check if essential columns exist in the row
        if 'geometry' in row and 'amenity' in row and 'name' in row and 'addr_city' in row:
            # Generate a unique node identifier for points
            node_id = f"point_{index}"
            amenities_network.add_node(node_id, point_index=index, geometry=row['geometry'], lat=row['y'], lon=row['x'], amenity=row['amenity'], name=row.get('name', ''), addr_city=row['addr_city'], is_in_polygon=False)
        else:
            print(f"Skipping row {index} in amenities_point_gdf due to missing data.")
            
    return amenities_network

In [25]:
from shapely.ops import unary_union
from shapely.geometry import Polygon, MultiPolygon, LineString
from geopy.distance import geodesic

def combine_contiguous_amenities_by_polygon(graph, max_distance, max_perimeter, lines):
    for node_key, node_data in list(graph.nodes.items()):
        if 'geometry' in node_data and node_data['geometry'].geom_type == 'Polygon':
            other_node_distances = []
            contained_points = []
            for other_node_key, other_node_data in list(graph.nodes.items()):
                if 'geometry' in other_node_data and other_node_data['geometry'].geom_type in ['Polygon', 'Multipolygon']:
                    if node_key != other_node_key and node_data['amenity'] == other_node_data['amenity']:
                        c1 = (node_data['geometry'].centroid.y, node_data['geometry'].centroid.x)
                        c2 = (other_node_data['geometry'].centroid.y, other_node_data['geometry'].centroid.x)
                        distance = geodesic(c1, c2).meters
                        #istance = node_data['geometry'].distance(other_node_data['geometry'])

                        
                        if distance < max_distance:

                            # Check if any other amenities intersect the line between centroids
                            line_between_centroids = LineString([node_data['geometry'].centroid, other_node_data['geometry'].centroid])
                            amenities_intersecting = any(graph.nodes[amenity_key]['geometry'].intersects(line_between_centroids) for amenity_key in graph.nodes if amenity_key != node_key and amenity_key != other_node_key)
                            if not amenities_intersecting:
                                other_node_distances.append((other_node_key, node_data['geometry'].centroid.distance(other_node_data['geometry'])))
                                connect_lines(node_data['geometry'], other_node_data['geometry'], lines)
                
                elif 'geometry' in other_node_data and other_node_data['geometry'].geom_type == 'Point':
                    if node_data['geometry'].intersects(other_node_data['geometry']) and not other_node_data.get('is_in_polygon', False):
                        contained_points.append(other_node_key)
            other_node_distances.sort(key=lambda x: x[1])

            if other_node_distances:
                for new_node_key, new_distance in other_node_distances:
                    if new_node_key in graph.nodes and node_key in graph.nodes:  # Check if new_node_key exists in the graph
                        combined_node = shapely.ops.unary_union([node_data['geometry'], graph.nodes[new_node_key]['geometry']])
                        if combined_node.length < max_perimeter:
                            graph.nodes[node_key]['geometry'] = combined_node
                            graph.nodes[node_key]['name'] = combine_names(graph.nodes[node_key].get('name'), graph.nodes[new_node_key].get('name'))
                            graph.nodes[node_key]['lat'] = combined_node.centroid.y
                            graph.nodes[node_key]['lon'] = combined_node.centroid.x
                            graph.nodes[node_key]['amenity_points'] += graph.nodes[new_node_key].get('amenity_points', 0)
                            graph.remove_node(new_node_key)

            for point_key in contained_points:
                graph.nodes[point_key]['is_in_polygon'] = True

    return graph

# TEMPORARY SOLUTION FOR NULL NAMES
def combine_names(name1, name2):
    # Combine names ensuring that no null values are included
    if isinstance(name1, str) and isinstance(name2, str):
        return f"{name1}, {name2}"
    elif isinstance(name1, str):
        return name1
    elif isinstance(name2, str):
        return name2
    else:
        return None



#FUNCTION FOR VISUALIZATION
def connect_lines(p1, p2, lines):
    centroid1 = p1.centroid
    centroid2 = p2.centroid

    # Create a LineString from the centroids
    line_between_polygons = LineString([centroid1, centroid2])
    lines.append(line_between_polygons)

In [10]:
#ORIGINAL
# NOTABLE HYPERPARAMETERS:
# Amenities are connected based on a radius from the centroid of the polygon
# Aggregated polygon length (perimeter) is used to check if zone is sufficiently large  

def combine_contiguous_amenities_by_polygon2(graph, max_distance, max_perimeter):
    for node in list(graph.nodes(data=True)):
        if node[1]['geometry'].geom_type == 'Polygon':
            other_node_distances = []
            contained_points = []
            for other_node in list(graph.nodes(data=True)):
                if other_node[1]['geometry'].geom_type in ['Polygon', 'Multipolygon']:
                    if node[0] != other_node[0] and node[1]['amenity'] == other_node[1]['amenity']:
                        if node[1]['geometry'].centroid.distance(other_node[1]['geometry']) < max_distance:
                            other_node_distances.append((other_node[0], node[1]['geometry'].centroid.distance(other_node[1]['geometry'])))
                elif other_node[1]['geometry'].geom_type == 'Point':
                    if node[1]['geometry'].intersects(other_node[1]['geometry']) and not other_node[1]['is_in_polygon']:
                        contained_points.append(other_node[0])
            other_node_distances.sort(key=lambda x: x[1])

            if other_node_distances:
                for new_node, new_distance in other_node_distances:
                    combined_node = shapely.ops.unary_union([node[1]['geometry'], graph.nodes[new_node]['geometry']])
                    if combined_node.length < max_perimeter:
                        graph.nodes[node[0]]['geometry'] = combined_node
                        if isinstance(graph.nodes[node[0]]['name'], float): # Temporary for null values
                            graph.nodes[node[0]]['name'] = 'null, ' + graph.nodes[new_node]['name']
                        else:
                            graph.nodes[node[0]]['name'] = graph.nodes[node[0]]['name'] + ', ' + graph.nodes[new_node]['name']
                        graph.nodes[node[0]]['lat'] = combined_node.centroid.y
                        graph.nodes[node[0]]['lon'] = combined_node.centroid.x
                        graph.nodes[node[0]]['amenity_points'] += graph.nodes[new_node]['amenity_points']
                        graph.remove_node(new_node)
                        break

            for point in contained_points:
                graph.nodes[point]['is_in_polygon'] = True

    return graph

In [11]:
def plot_network_on_map(amenities_network, initial_location=[0, 0], zoom_start=10):
    # Create a map centered at the initial location
    map_center = (14.599512, 120.984222) # TEMPORARY WILL ZOOM TO MANILA
    m = folium.Map(location=map_center, zoom_start=zoom_start, tiles='openstreetmap')
    
    #Colours for Visualization
    amenity_colors = {
        'education': 'green',
        'finance': 'blue',
        'government offices': 'red',
        'grocery': 'orange',
        'health': 'magenta',
        'malls': 'yellow',
        'residential areas': 'brown',
        'security': 'gray'
    }

    # Iterate over the nodes in the network
    for node, data in amenities_network.nodes(data=True):
        # Check if the node has a geometry attribute
        if 'geometry' in data:
            # Get the geometry of the node
            geometry = data['geometry']

            # Check the geometry type and plot accordingly
            if geometry.geom_type == 'Point':
                # Plot a marker for points    
                #folium.Marker(location=[geometry.y, geometry.x], popup=f"{data['name']}").add_to(m)
                continue
            elif geometry.geom_type in ['Polygon', 'MultiPolygon']:
                # Plot polygons or multipolygons
                color = amenity_colors[data.get('amenity')]
                if geometry.geom_type == 'Polygon':
                    polygons = [geometry]
                else:
                    polygons = geometry.geoms

                for polygon in polygons:
                    coordinates = []
                    for point in polygon.exterior.coords:
                        coordinates.append([point[1], point[0]])
                    folium.Polygon(locations=coordinates, fill=True, color=color, fill_opacity=0.4).add_to(m)

    # Return the map
    return m


In [12]:
# Extract the graph into a geojson for loading into QGIS

def graph_to_geojson(graph, filename):
    # Initialize an empty list to hold GeoJSON features
    features = []

    # Iterate over the nodes in the graph
    for node, data in graph.nodes(data=True):
        # Check if the node has a geometry attribute
        if 'geometry' in data:
            # Convert the geometry to a GeoJSON-compatible format
            geometry = shapely.geometry.shape(data['geometry'])
            # Create a copy of the properties to check for NaN values
            properties = data.copy()
            # Remove the geometry from the properties
            properties.pop('geometry', None)
            # Check for NaN values in the properties
            if all(not (isinstance(value, float) and np.isnan(value)) for value in properties.values()):
                # Create a GeoJSON feature for the node
                feature = geojson.Feature(geometry=geometry, properties=properties)
                # Add the feature to the list
                features.append(feature)

    # Create a GeoJSON FeatureCollection
    feature_collection = geojson.FeatureCollection(features)

    # Return the GeoJSON FeatureCollection
    return feature_collection

In [26]:
manila_amenities_network = create_network(manila_amenities_polygon_gdf, manila_amenities_point_gdf)

before_map = plot_network_on_map(manila_amenities_network, initial_location=[0, 0], zoom_start=100)
before_map.save('before_map.html') # Save the map to an HTML file

connected_lines = [] # This is to visualize which polygons are connected

#Adjusted max_distance to 50 according to testing
combine_contiguous_amenities_by_polygon(manila_amenities_network, max_distance=70, max_perimeter=5000, lines=connected_lines)

after_map = plot_network_on_map(manila_amenities_network, initial_location=[0, 0], zoom_start=100)

# Convert the LineString coordinates to a format that Folium understands
for line in connected_lines:
    line_coords = [[coord[1], coord[0]] for coord in line.coords]
    folium.PolyLine(locations=line_coords, color='black').add_to(after_map)

after_map.save('after_map.html') # Save the map to an HTML file

feature_collection = graph_to_geojson(manila_amenities_network, 'output.geojson')
with open('output.geojson', 'w', encoding='utf-8') as f:
    f.write(geojson.dumps(feature_collection, indent=2))