# Intro
This is a notebook that introduces some of the data and concepts we will be working with

In [None]:
import pandas as pd
import geopandas as gpd
pd.set_option('display.max_columns', None)
from shapely.geometry import Point, LineString, Polygon
import rasterio
from rasterio.plot import show
from rasterio.mask import mask
import os

## Paths to some of the data

In [None]:
path = "../../synced-data/population-exploration/"

village_boundaries_filename = os.path.join(path,"Rwanda Village Boundaries/Village.shp")

highway_primary_filename = os.path.join(path, "rwanda_highway-primary.geojson")
highway_residential_filename = os.path.join(path, "rwanda_highway-residential.geojson")
highway_secondary_filename = os.path.join(path, "rwanda_highway-secondary.geojson")
highway_tertiary_filename = os.path.join(path, "rwanda_highway-tertiary.geojson")
highway_trunk_filename = os.path.join(path, "rwanda_highway-trunk.geojson")
highway_unclassified_filename = os.path.join(path, "rwanda_highway-unclassified.geojson")

rwa_pop_2020_filename = os.path.join(path, "rwa_ppp_2020.tif")



## Reading and Visualizing some of the data

In [None]:
village_boundaries = gpd.read_file(os.path.join(path, village_boundaries_filename))
village_boundaries

In [None]:
#village_boundaries.explore()

In [None]:
highway_trunk = gpd.read_file(os.path.join(path, highway_trunk_filename))
#highway_trunk.explore()

In [None]:
rwa_pop_2020 = rasterio.open(rwa_pop_2020_filename)
#show(rwa_pop_2020)


In [None]:
# Read in bridges CSV as dataframe and list column names
bridges_filename = os.path.join(path, "rct-all-bridges.csv")
bridges = pd.read_csv(os.path.join(path, bridges_filename))
print(bridges.columns)

In [None]:
# Check CRS of village boundaries gdf
print(f'The CRS of the village boundaries gdf is: {village_boundaries.crs}')

# Create lat/lon variables
lon = bridges['GPS (Longitude)']
lat = bridges['GPS (Latitude)']

# Create gdf of bridges data by converting lat/lon values to list of Shapely Point objects and copying CRS of village boundaries gdf
bridges_points = gpd.GeoDataFrame(bridges, 
                                  geometry=gpd.points_from_xy(x=lon, y=lat), 
                                  crs='EPSG:4326')

# Set CRS of bridges gdf to CRS of village boundaries gdf
bridges_points.to_crs(village_boundaries.crs, inplace=True)

# Check that reprojection was successful
print(f'The CRS of the bridges gdf is: {bridges_points.crs}')

# Visualize bridges point data
bridges_points.explore()

In [None]:
# Check linear unit of projection
print(bridges_points.crs.axis_info)

# Create 2 km buffers around bridges
buf = bridges_points.buffer(distance=2000)

# Add bridges point attribute information to buffers
bridges_buffers = bridges_points.copy()
bridges_buffers['geometry'] = buf

# Check whether attribute information copied over correctly
bridges_buffers.head(3)

# Visualize buffers
# bridges_buffers.explore()

In [None]:
# Create list to store new combined geometries
merged_buffers = []

# Iterate through each buffer polygon in bridges_buffers
for idx, buffer in bridges_buffers.iterrows():
    # Find villages that intersect with this buffer
    intersecting_villages = village_boundaries[village_boundaries.intersects(buffer['geometry'])]
    
    # Combine the buffer geometry with the intersecting villages' geometries
    combined_geom = buffer['geometry'].union(intersecting_villages.geometry.union_all())
    
    # Append to the list of merged buffers
    merged_buffers.append({"geometry": combined_geom})

# Convert the merged buffers list to a new GeoDataFrame
merged_buffers_gdf = gpd.GeoDataFrame(merged_buffers, geometry="geometry", crs=bridges_buffers.crs)

# Merge attributes from the original bridges_buffers based on index
merged_buffers_gdf = merged_buffers_gdf.merge(bridges_buffers.drop(columns='geometry'), left_index=True, right_index=True, how='left')

# Check whether attribute information copied over correctly
merged_buffers_gdf.head(3)

# Visualize buffers
merged_buffers_gdf.explore()

In [None]:
# Plot bridges, bridge buffers, and expanded buffers
import matplotlib.pyplot as plt

# Create the plot
fig, ax = plt.subplots(figsize=(10, 10))

# Plot the original 2 km buffers (buffer polygons)
bridges_buffers.plot(ax=ax, color='green')

# Plot the expanded buffers that include the intersecting village polygons
merged_buffers_gdf.plot(ax=ax, color='blue', alpha=0.5)

# Plot villages
village_boundaries.plot(ax=ax, color='gray', alpha=0.5)

# Set title and labels
ax.set_title("Bridge Buffers and Expanded Buffers with Villages")
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")

# Show the plot
plt.show()

In [None]:
# # Query WorldPop API to determine population count within expanded buffers
# import requests
# from rasterio.mask import mask

# # Step 1: Define API Query Parameters for Rwanda
# worldpop_api_url = "https://hub.worldpop.org/rest/data/pop"
# params = {
#     "iso3": "RWA",  # Rwanda's ISO3 country code
#     "year": 2020,  # Year of data
#     "resolution": "100m",  # Resolution (depends on availability)
#     "unadj": 0,  # Adjusted or unadjusted population
#     "type": "gpwv4"  # WorldPop dataset type (check API for latest)
# }

# # Step 2: Query the WorldPop API
# response = requests.get(worldpop_api_url, params=params)

# if response.status_code == 200:
#     worldpop_data = response.json()
#     print("WorldPop API response received successfully.")
    
# else:
#     print(f"Failed to fetch data: {response.status_code}")
#     print("Response Content:", response.text)
#     exit()

# # Step 3: Extract the Population Raster URL
# if "data" in worldpop_data and len(worldpop_data["data"]) > 0:
#     raster_url = worldpop_data["data"][0]["download"]
#     print(f"Raster URL: {raster_url}")
# else:
#     print("No population raster found for Rwanda.")
#     exit()

# # Step 4: Download the Raster
# raster_path = "RWA_WorldPop_2020.tif"
# raster_response = requests.get(raster_url, stream=True)

# if raster_response.status_code == 200:
#     with open(raster_path, "wb") as f:
#         for chunk in raster_response.iter_content(chunk_size=1024):
#             f.write(chunk)
#     print("Raster downloaded successfully.")
# else:
#     print("Failed to download the raster.")
#     exit()

In [None]:
import numpy as np

# Copy input geodataframe to avoid modifying the original
expanded_buffers = merged_buffers_gdf.copy()

# Clip raster and compute population sum
def extract_population(raster_path, gdf):
    with rasterio.open(raster_path) as src:
        # Print information from raster profile
        print("CRS:", src.crs)
        print("Raster shape:", src.shape)
        print("Number of Bands:", src.count)
        
        # Ensure the GeoDataFrame is in the same CRS as the raster
        gdf = gdf.to_crs(src.crs)

        # Iterate over each expanded buffer polygon and extract population data
        results = []
        for idx, row in gdf.iterrows():
            try:
                out_image = mask(src, [row.geometry], crop=True)
                out_image = out_image[0]

                # Handle masked arrays and replace with 0
                if np.ma.is_masked(out_image):
                    out_image = out_image.filled(0)

                # Remove negative values (e.g., nodata placeholders)
                out_image = np.where(out_image < 0, 0, out_image)

                total_population = np.sum(out_image)

            except Exception as e:
                print(f"Error at village {idx}: {e}")
                total_population = 0  # or np.nan

            results.append(total_population)

        # Add population data to the GeoDataFrame
        gdf["population"] = results

    return gdf

# Apply function to extract population within each buffer
expanded_buffers = extract_population(rwa_pop_2020_filename, expanded_buffers)

# Save results
# expanded_buffers.to_file(os.path.join(path, "expanded_buffers_with_population.geojson"), driver="GeoJSON")

# Print the first few rows
print(expanded_buffers[["geometry", "population"]].head())

In [None]:
expanded_buffers.head()
#expanded_buffers.explore()