In [10]:
import os
from pathlib import Path
import rasterio
import geopandas as gpd
import numpy as np
from rasterstats import zonal_stats
import pandas as pd
from rasterio.warp import reproject, Resampling

In [11]:
# paths
pop_folder = Path('/home/h99g576/zambia/Population')
urban_folder = Path('/home/h99g576/zambia/Land_Cover')
districts = gpd.read_file('/home/h99g576/zambia/Data/district.shp')
districts.set_crs(epsg=4326, inplace=True)

# initialize empty list
results = []

for year in range(2001, 2021):
    # load population raster
    pop_path = pop_folder / f'zmb_ppp_{year}_UNadj.tif'
    with rasterio.open(pop_path) as src:
        pop = src.read(1)
        pop_meta = src.meta
        nodata_val = src.nodata

    urban_path = urban_folder / f'modis_urban_mask_{year}.tif'
    with rasterio.open(urban_path) as urban_src:
        urban = urban_src.read(1)

        urban_resampled = np.empty_like(pop)

        reproject(
            source=urban,
            destination=urban_resampled,
            src_transform=urban_src.transform,
            src_crs=urban_src.crs,
            dst_transform=pop_meta['transform'],
            dst_crs=pop_meta['crs'],
            resampling=Resampling.nearest
        )

    urban_mask = (urban_resampled == 1)
    urban_pop = np.where(urban_mask, pop, 0)

    # save temporary masked raster
    temp_path = f'temp_urban_pop_{year}.tif'
    with rasterio.open(temp_path, 'w', **pop_meta) as dst:
        dst.write(urban_pop, 1) 

    # zonal stats
    total_stats = zonal_stats(
        vectors=districts,
        raster=pop_path,
        stats=['sum'],
        nodata=nodata_val,
        geojson_out=False
    )
    
    urban_stats = zonal_stats(
        vectors=districts,
        raster=temp_path,
        stats=['sum'],
        nodata=nodata_val,
        geojson_out=False
    )
    
    # combine results
    for i, district in districts.iterrows():
        total_pop = total_stats[i]['sum']
        urban_pop = urban_stats[i]['sum']
        pct_urban = urban_pop / total_pop if total_pop and total_pop > 0 else 0

        results.append({
            'district': district['NAME_2'],
            'year': year,
            'total_pop': total_pop,
            'urban_pop': urban_pop,
            'pct_urban': pct_urban
        })

    # remove temp file
    os.remove(temp_path)

# convert to dataframe
urban_df = pd.DataFrame(results)

urban_df.head()

Unnamed: 0,district,year,total_pop,urban_pop,pct_urban
0,Chibombo,2001,290989.5,9850.777344,0.033853
1,Chisamba,2001,98511.9375,2.780307,2.8e-05
2,Chitambo,2001,72652.632812,0.0,0.0
3,Itezhi-tezhi,2001,52810.480469,34.378117,0.000651
4,Kabwe,2001,191999.625,34625.625,0.180342


In [12]:
urban_df.to_csv('urban_pop.csv')