In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
from libpysal.weights import Queen
from esda.getisord import G_Local
import numpy as np
from scipy.interpolate import griddata  # For IDW-like interpolation
import rasterio
from rasterio.transform import from_bounds


In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
from libpysal.weights import KNN  # Switch to KNN for speed (less neighbors)
from esda.getisord import G_Local
import numpy as np
from scipy.interpolate import griddata  # For IDW-like interpolation
import rasterio
from rasterio.transform import from_bounds

# Load your full GDF (from preprocessing; assumes 'Malaria_Prevalence_2020' exists from cov merge)
full_gdf = gpd.read_file(r'C:\Users\Hp\Documents\capstone_project\notebooks\data\processed\merged_gdf.geojson')

# Load Nigeria boundaries (use nga_admin1.geojson for states)
nigeria_gdf = gpd.read_file(r'C:\Users\Hp\Documents\capstone_project\data\raw\nga_admin_boundaries.geojson\nga_admin1.geojson')  # Adjust path if needed

# Ensure CRS match (EPSG:4326 common)
full_gdf = full_gdf.to_crs(nigeria_gdf.crs)

# Compute hotspots (Getis-Ord Gi* as in paper; use continuous prevalence; KNN for faster weights on many points)
w = KNN.from_dataframe(full_gdf, k=5)  # Faster than Queen for large n
gi = G_Local(full_gdf['Malaria_Prevalence_2020'], w)  # Continuous var from paper/DHS cov
full_gdf['gi_star'] = gi.Gs
full_gdf['gi_p'] = gi.p_sim

# Enhanced Hotspot Map with Nigeria Base
fig, ax = plt.subplots(figsize=(10, 12))
nigeria_gdf.boundary.plot(ax=ax, color='black', linewidth=1)  # Country/states outline
full_gdf.plot(column='gi_star', cmap='Reds', legend=True, ax=ax, markersize=20, alpha=0.8)
ax.set_title('Malaria Hotspots (Gi*) over Nigeria')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
plt.savefig('data/outputs/hotspots_with_nigeria_map.png')
plt.show()

# Intensity Map (IDW Interpolation for Continuous Risk Surface, like paper's raster maps)
# Create grid over Nigeria bounds (coarser res for speed: 0.1 deg ~11km)
bounds = nigeria_gdf.total_bounds  # [minx, miny, maxx, maxy]
grid_res = 0.1  # Coarser to reduce compute time (from 0.05)
x_grid = np.arange(bounds[0], bounds[2], grid_res)
y_grid = np.arange(bounds[1], bounds[3], grid_res)
x_mesh, y_mesh = np.meshgrid(x_grid, y_grid)

# Interpolate (switch to 'linear' method for much faster computation; still good for intensity)
points = np.array(full_gdf.geometry.apply(lambda g: (g.x, g.y)).tolist())
values = full_gdf['Malaria_Prevalence_2020'].values  # Continuous prevalence for intensity
interp_grid = griddata(points, values, (x_mesh, y_mesh), method='linear', rescale=True)  # Linear is O(n log n), cubic is slower

# Plot Interpolated Intensity Map
fig, ax = plt.subplots(figsize=(10, 12))
nigeria_gdf.boundary.plot(ax=ax, color='black', linewidth=1)
im = ax.imshow(interp_grid, extent=(bounds[0], bounds[2], bounds[1], bounds[3]), cmap='YlOrRd', origin='lower')
plt.colorbar(im, ax=ax, label='Interpolated Malaria Prevalence Intensity')
ax.set_title('Malaria Risk Intensity Map (Linear Interpolation) over Nigeria')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
plt.savefig('data/outputs/intensity_map.png')
plt.show()

# Optional: Save as Raster (for GIS use)
transform = from_bounds(bounds[0], bounds[1], bounds[2], bounds[3], interp_grid.shape[1], interp_grid.shape[0])
with rasterio.open('data/outputs/intensity_raster.tif', 'w', driver='GTiff', height=interp_grid.shape[0], 
                   width=interp_grid.shape[1], count=1, dtype='float32', crs=nigeria_gdf.crs, transform=transform) as dst:
    dst.write(interp_grid, 1)
print("Intensity raster saved!")

  w = Queen.from_dataframe(full_gdf)  # Or KNN(k=5)
