The premise of this notebook is taking travel time data from all countries and joining data to hex ids. These hex-ids will be uploaded to a database with the associated travel times coorelated travel times in appropriate columns. 

We will also create one geojson file with the geometry of each hex that is used. We may create a tileset with this down the rod if it's useful. We may have to do this with PM tiles. Alternatively we may go direct to the database to generate the hex layer.

# Imports

In [9]:
import os
import json
import pandas as pd
import geopandas as gpd
pd.set_option('display.max_columns', None)
import numpy as np
from shapely.geometry import LineString, Point, Polygon
import h3

output_folder_path ="/Volumes/samsung-4tb/b2p/impact-model/cleaned_data/hex_data/"

# Functions

In [86]:
def convert_h3_col(df, column_name, level):
    if level > 8:
        print(f"Cannot convert from level 8 to higher precision level {level}")
        return df
    elif level == 8:
        print("h3 indices are already level 8")
        return df
    elif level < 8:
        try:
            df[column_name] = df[column_name].apply(lambda h3_index: h3.h3_to_parent(h3_index, level))
            print(f"Converted column {column_name} to level {level}")
            return df
        except Exception as e:
            print(f"Error converting column {column_name}: {e}")
            return df
    else:
        print(f"Conversion to {level} not possible")
        return df

In [73]:
def merge_df_by_h3(df, data_name, level):
    # Columns to drop
    drop_columns = ["row", "col", "x", "y", "x_ras", "y_ras"]

    # Columns to sum
    sum_columns = [ "population", "pop_0_4", "females_0_4", "males_0_4", "pop_5_9", "females_5_9", "males_5_9", "pop_10_14", "females_10_14", "males_10_14", "pop_0_9", "females_0_9", "males_0_9", "pop_15_49", "females_15_49", "males_15_49", "pop_50_64", "females_50_64", "males_50_64", "pop_65_plus", "females_65_plus", "males_65_plus", "births", "pregnancies", ]

    # Columns for weighted average based on population
    weighted_avg_columns = [ "travel_time", "travel_time_no_sites", "time_delta_no_sites", "rwi", "underweight", "female_educational_attainment_mean", "male_educational_attainment_mean", ]
    # if population is 0 or travel_time is NaN or infinity drop the row
    df = df.dropna(subset=["population", "travel_time"])
    df = df[~df["population"].isin([0])]
    df = df[~df["travel_time"].isin([np.inf])]
    df = df[~df["travel_time"].isin([np.nan])]

    # Create new columns for the weighted values to use later
    convert_h3_col(df, "h3_index", level)

    for col in weighted_avg_columns:
        if col in df.columns:
            # Precompute the weighted value
            df[f"{col}_weighted"] = df[col] * df["population"]

    # Remove columns to drop
    df_cleaned = df.drop(columns=drop_columns, errors="ignore")

    # Create an aggregation dictionary
    agg_dict = {}

    # Set up sum aggregation
    for col in sum_columns:
        if col in df_cleaned.columns:
            agg_dict[col] = "sum"

    # Set up weighted aggregation
    for col in weighted_avg_columns:
        if col in df_cleaned.columns:
            agg_dict[f"{col}_weighted"] = "sum"

    # Group by h3_index and apply aggregations
    grouped = df_cleaned.groupby("h3_index").agg(agg_dict)
    print("Grouped by h3_index and applied aggregations.")

    # Calculate weighted averages from the aggregated data
    for col in weighted_avg_columns:
        if col in df_cleaned.columns:
            weighted_col = f"{col}_weighted"
            if weighted_col in grouped.columns:
                # Calculate the weighted average
                grouped[col] = grouped[weighted_col] / grouped["population"]
                # Drop the intermediate weighted column
                grouped = grouped.drop(columns=[weighted_col])

    # Reset index to make h3_index a column again
    result = grouped.reset_index()

    result = result.rename(columns={
        'travel_time': f'travel_time_{data_name}',
        'travel_time_no_sites': f'travel_time_no_sites_{data_name}',
        'time_delta_no_sites': f'time_delta_no_sites_{data_name}',
    })

    print(f"Finished processing {data_name}.")

    return result

In [None]:
def join_country_dataframes(df_list):
    if not df_list:
        return pd.DataFrame()
    
    if len(df_list) == 1:
        return df_list[0]
    
    # Define column groups
    highest_value_columns = [
        "population", "pop_0_4", "females_0_4", "males_0_4", "pop_5_9", 
        "females_5_9", "males_5_9", "pop_10_14", "females_10_14", "males_10_14", 
        "pop_0_9", "females_0_9", "males_0_9", "pop_15_49", "females_15_49", 
        "males_15_49", "pop_50_64", "females_50_64", "males_50_64", "pop_65_plus", 
        "females_65_plus", "males_65_plus", "births", "pregnancies"
    ]
    
    avg_columns = [
        "rwi", "underweight", "female_educational_attainment_mean", 
        "male_educational_attainment_mean"
    ]
    
    # Create a base dataframe with all h3_indices
    all_h3_indices = set()
    for df in df_list:
        all_h3_indices.update(df['h3_index'])
    
    # Create the result dataframe with h3_index column
    result_df = pd.DataFrame({'h3_index': list(all_h3_indices)})
    
    # For each column type, create a separate dataframe and then merge
    for column in highest_value_columns:
        temp_dfs = []
        for df in df_list:
            if column in df.columns:
                temp_df = df[['h3_index', column]].copy()
                temp_dfs.append(temp_df)
        
        if temp_dfs:
            # Concatenate all dataframes with this column
            combined = pd.concat(temp_dfs)
            # Group by h3_index and get the maximum value
            grouped = combined.groupby('h3_index')[column].max().reset_index()
            # Merge with result
            result_df = pd.merge(result_df, grouped, on='h3_index', how='left')
    
    # Process average columns
    for column in avg_columns:
        temp_dfs = []
        for df in df_list:
            if column in df.columns:
                temp_df = df[['h3_index', column]].copy()
                temp_dfs.append(temp_df)
        
        if temp_dfs:
            # Concatenate all dataframes with this column
            combined = pd.concat(temp_dfs)
            # Group by h3_index and get the average value
            grouped = combined.groupby('h3_index')[column].mean().reset_index()
            # Merge with result
            result_df = pd.merge(result_df, grouped, on='h3_index', how='left')
    
    # Process any remaining columns (first non-null value)
    all_columns = set()
    for df in df_list:
        all_columns.update(df.columns)
    
    remaining_columns = all_columns - set(['h3_index']) - set(highest_value_columns) - set(avg_columns)
    
    for column in remaining_columns:
        temp_dfs = []
        for df in df_list:
            if column in df.columns:
                temp_df = df[['h3_index', column]].copy()
                temp_dfs.append(temp_df)
        
        if temp_dfs:
            # Concatenate all dataframes with this column
            combined = pd.concat(temp_dfs)
            # Group by h3_index and get the first non-null value
            grouped = combined.groupby('h3_index')[column].first().reset_index()
            # Merge with result
            result_df = pd.merge(result_df, grouped, on='h3_index', how='left')
    
    return result_df

In [None]:
def add_geometry_from_h3(df):
    # Function to convert h3 index to polygon
    def h3_to_polygon(h3_index):
        try:
            # Get boundary coordinates in (lat, lng) format
            coords = h3.h3_to_geo_boundary(h3_index, geo_json=True)
            # Convert to (lng, lat) format for Shapely
            return Polygon(coords)
        except Exception as e:
            print(f"Error processing h3_index {h3_index}: {e}")
            return None
    
    # Create geometry column
    geometries = df['h3_index'].apply(h3_to_polygon)
    
    # Create GeoDataFrame
    gdf = gpd.GeoDataFrame(df, geometry=geometries, crs="EPSG:4326")
    
    return gdf

In [74]:
# country_path_example = "ivory_coast/"

def process_country_hex (country_path, country_name, level):
    # load_data
    local_path = "/Volumes/samsung-4tb/b2p/impact-model/no_order_1_less_than_500m_with_top_sites/model_outputs/"

    all_education_path = "joined_data/travel_time_to_all_education_facilities_fixed.parquet"
    all_health_path = "joined_data/travel_time_to_health_centers_optimal.parquet"
    health_centers_path = "joined_data/travel_time_to_health_centers_optimal.parquet"
    health_posts_path = "joined_data/travel_time_to_health_posts_optimal.parquet"
    major_hospitals_path = "joined_data/travel_time_to_major_hospitals_optimal.parquet"
    major_roads_path = "joined_data/travel_time_to_major_roads_optimal.parquet"
    primary_schools_path = "joined_data/travel_time_to_primary_schools_fixed.parquet"
    secondary_schools_path = "joined_data/travel_time_to_secondary_schools_fixed.parquet"
    semi_dense_urban_path = "joined_data/travel_time_to_semi_dense_urban_optimal.parquet"

    all_education = pd.read_parquet(os.path.join(local_path, country_path, all_education_path))
    all_health = pd.read_parquet(os.path.join(local_path, country_path, all_health_path))
    health_centers = pd.read_parquet(os.path.join(local_path, country_path, health_centers_path))
    health_posts = pd.read_parquet(os.path.join(local_path, country_path, health_posts_path))
    major_hospitals = pd.read_parquet(os.path.join(local_path, country_path, major_hospitals_path))
    major_roads = pd.read_parquet(os.path.join(local_path, country_path, major_roads_path))
    primary_schools = pd.read_parquet(os.path.join(local_path, country_path, primary_schools_path))
    secondary_schools = pd.read_parquet(os.path.join(local_path, country_path, secondary_schools_path))
    semi_dense_urban = pd.read_parquet(os.path.join(local_path, country_path, semi_dense_urban_path))

    # Merge all dataframes by h3_index and handle value merging rules
    all_education = merge_df_by_h3(all_education, "all_education", level)
    all_health = merge_df_by_h3(all_health, "all_health", level)
    health_centers = merge_df_by_h3(health_centers, "health_centers", level)
    health_posts = merge_df_by_h3(health_posts, "health_posts", level)
    major_hospitals = merge_df_by_h3(major_hospitals, "major_hospitals", level)
    major_roads = merge_df_by_h3(major_roads, "major_roads", level)
    primary_schools = merge_df_by_h3(primary_schools, "primary_schools", level)
    secondary_schools = merge_df_by_h3(secondary_schools, "secondary_schools", level)
    semi_dense_urban = merge_df_by_h3(semi_dense_urban, "semi_dense_urban", level)

    df_list = [ all_education, all_health, health_centers, health_posts, major_hospitals, major_roads, primary_schools, secondary_schools, semi_dense_urban ]
    
    merged_df = join_country_dataframes(df_list)

    merged_gdf = add_geometry_from_h3(merged_df)
    # add a column for country name
    merged_gdf["country_name"] = country_name

    return merged_gdf

In [51]:
def adjust_precision(gdf):
    result = gdf.copy()
    one_decimal = [
    'female_educational_attainment_mean', 
    'male_educational_attainment_mean'
    ]

    # Columns with three decimal points
    three_decimals = [
        'rwi', 
        'underweight'
    ]

    # Columns to convert to integer type
    int_columns = [
        'births', 'females_0_4', 'females_0_9', 'females_10_14', 'females_15_49', 
        'females_50_64', 'females_5_9', 'females_65_plus', 'males_0_4', 'males_0_9', 
        'males_10_14', 'males_15_49', 'males_50_64', 'males_5_9', 'males_65_plus', 
        'pop_0_4', 'pop_0_9', 'pop_10_14', 'pop_15_49', 'pop_50_64', 'pop_5_9', 
        'pop_65_plus', 'population', 'pregnancies', 'time_delta_no_sites_all_education', 
        'time_delta_no_sites_all_health', 'time_delta_no_sites_health_centers', 
        'time_delta_no_sites_health_posts', 'time_delta_no_sites_major_hospitals', 
        'time_delta_no_sites_major_roads', 'time_delta_no_sites_primary_schools', 
        'time_delta_no_sites_secondary_schools', 'time_delta_no_sites_semi_dense_urban', 
        'travel_time_all_education', 'travel_time_all_health', 'travel_time_health_centers', 
        'travel_time_health_posts', 'travel_time_major_hospitals', 'travel_time_major_roads', 
        'travel_time_no_sites_all_education', 'travel_time_no_sites_all_health', 
        'travel_time_no_sites_health_centers', 'travel_time_no_sites_health_posts', 
        'travel_time_no_sites_major_hospitals', 'travel_time_no_sites_major_roads', 
        'travel_time_no_sites_primary_schools', 'travel_time_no_sites_secondary_schools', 
        'travel_time_no_sites_semi_dense_urban', 'travel_time_primary_schools', 
        'travel_time_secondary_schools', 'travel_time_semi_dense_urban'
    ]
    
    for col in result.columns:
        if col.startswith("travel_time") or col.startswith("time_delta"):
            # change all inf values to 9999
            print(f"{col}: min = {result[col].min()}, max = {result[col].max()}")
            result[col] = result[col].replace([np.inf], 9999)
            print(f"{col}: new min = {result[col].min()}, new max = {result[col].max()}")
    
    print("Applying one decimal place columns")
    for col in one_decimal:
        if col in result.columns:
            result[col] = result[col].round(1)
    
    print ("Applying three decimal places columns")
    for col in three_decimals:
        if col in result.columns:
            result[col] = result[col].round(3)
    
    print ("Converting to integer (with safe handling of NaN values)")
    for col in int_columns:
        if col in result.columns:
            # Fill NaNs with 0 before converting to integer
            if result[col].isna().any():
                result[col] = result[col].fillna(0).astype(int)
            else:
                result[col] = result[col].astype(int)
    
    print("dropping populations that are 0")
    result = result[result['population'] != 0]
    result = result.replace(9999, None)

    return result

# Hex 8 processing

## Country Processing

In [7]:
civ = process_country_hex("ivory_coast/", "civ", 8)
eth = process_country_hex("ethiopia/", "eth", 8)
ken = process_country_hex("kenya/", "ken", 8)
rwa = process_country_hex("rwanda/", "rwa", 8)
tan = process_country_hex("tanzania/", "tan", 8)
uga = process_country_hex("uganda/", "uga", 8)
zmb = process_country_hex("zambia/", "zmb", 8)

NameError: name 'process_country_hex' is not defined

In [76]:
# merge all countries
all_countries = pd.concat([civ, eth, ken, rwa, tan, uga, zmb], ignore_index=True)
all_countries

Unnamed: 0,h3_index,population,pop_0_4,females_0_4,males_0_4,pop_5_9,females_5_9,males_5_9,pop_10_14,females_10_14,males_10_14,pop_0_9,females_0_9,males_0_9,pop_15_49,females_15_49,males_15_49,pop_50_64,females_50_64,males_50_64,pop_65_plus,females_65_plus,males_65_plus,births,pregnancies,rwi,underweight,female_educational_attainment_mean,male_educational_attainment_mean,travel_time_no_sites_all_health,time_delta_no_sites_semi_dense_urban,travel_time_health_posts,travel_time_major_roads,travel_time_no_sites_secondary_schools,travel_time_secondary_schools,travel_time_no_sites_health_centers,travel_time_no_sites_major_roads,time_delta_no_sites_secondary_schools,time_delta_no_sites_all_health,travel_time_health_centers,time_delta_no_sites_health_centers,time_delta_no_sites_major_roads,travel_time_semi_dense_urban,time_delta_no_sites_major_hospitals,travel_time_all_health,travel_time_no_sites_primary_schools,travel_time_no_sites_semi_dense_urban,time_delta_no_sites_health_posts,travel_time_no_sites_all_education,travel_time_major_hospitals,travel_time_no_sites_major_hospitals,travel_time_primary_schools,time_delta_no_sites_primary_schools,travel_time_all_education,time_delta_no_sites_all_education,travel_time_no_sites_health_posts,geometry,country_name
0,887512209bfffff,5.990220,1.004511,0.494525,0.509987,0.889155,0.433623,0.455532,0.858690,0.418766,0.439924,1.893666,0.928148,0.965518,2.668025,1.339517,1.328508,0.402525,0.192652,0.209873,0.167314,0.080078,0.087236,0.312739,0.418208,-0.276731,0.164609,2.953700,5.127717,174.999985,0.0,358.000000,59.000000,,,174.999985,59.000000,,0.0,174.999985,0.0,0.0,144.000000,,174.999985,inf,144.000000,0.0,inf,,,1607.999878,inf,1599.000122,inf,358.000000,"POLYGON ((-6.50082 7.36543, -6.50476 7.36330, ...",civ
1,8875ae4635fffff,22.113190,3.970983,2.019156,1.951827,3.199136,1.619498,1.579637,2.427142,1.228692,1.198450,7.170118,3.638654,3.531464,10.155474,4.969656,5.185819,1.643627,0.887022,0.756606,0.716827,0.386852,0.329975,0.113316,0.151532,-0.768019,0.155533,2.438673,4.796358,858.928040,0.0,341.928009,857.928040,,,858.928040,857.928040,,0.0,858.928040,0.0,0.0,880.928040,,858.928040,952.927979,880.928040,0.0,884.928040,,,952.927979,0.0,884.928040,0.0,341.928009,"POLYGON ((-8.16296 6.42187, -8.16688 6.41973, ...",civ
2,88753244dbfffff,40.797909,7.351212,3.608194,3.743018,6.394514,3.307443,3.087070,4.455016,2.304274,2.150742,13.745726,6.915637,6.830088,17.495735,9.427015,8.068719,3.539936,1.939281,1.600655,1.561497,0.855434,0.706063,0.138954,0.185815,-0.603251,0.143881,1.945259,3.354836,739.631836,0.0,271.301636,376.631836,430.934601,430.934601,739.631836,376.631836,0.0,0.0,739.631836,0.0,0.0,399.631836,0.0,739.631836,367.631836,399.631836,0.0,367.631836,1154.301514,1154.301514,367.631836,0.0,367.631836,0.0,271.301636,"POLYGON ((-5.39022 9.68681, -5.39424 9.68467, ...",civ
3,8875ab8c3bfffff,8.688528,1.497071,0.816595,0.680476,1.254670,0.627162,0.627509,0.957096,0.478415,0.478680,2.751741,1.443756,1.307985,4.355628,2.113554,2.242074,0.492966,0.211655,0.281311,0.131098,0.056287,0.074811,0.054424,0.072778,-0.399614,0.142701,3.301325,4.907494,,0.0,181.000000,64.000000,,,,64.000000,,,,,0.0,1047.000000,0.0,,,1047.000000,0.0,1426.000000,897.000000,897.000000,,,1426.000000,0.0,181.000000,"POLYGON ((-7.33689 5.25591, -7.34074 5.25381, ...",civ
4,8875ad3897fffff,25.432112,4.264758,2.099556,2.165201,3.775002,1.840992,1.934009,3.645661,1.777915,1.867745,8.039759,3.940549,4.099211,11.327381,5.687062,5.640319,1.708961,0.817923,0.891038,0.710349,0.339979,0.370370,1.638586,2.191190,-0.139363,0.160427,2.820049,4.570149,590.508972,0.0,272.508972,24.508980,853.409912,853.409912,590.508972,24.508980,0.0,0.0,590.508972,0.0,0.0,250.508987,0.0,590.508972,956.508972,250.508987,0.0,559.508972,998.508972,998.508972,956.508972,0.0,559.508972,0.0,272.508972,"POLYGON ((-6.22794 6.03084, -6.23181 6.02875, ...",civ
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2066209,8896315601fffff,2.047997,0.340260,0.167654,0.172606,0.296184,0.147490,0.148694,0.261033,0.128600,0.132433,0.636444,0.315143,0.321301,0.929758,0.460995,0.468763,0.139431,0.084975,0.054456,0.081332,0.050700,0.030631,0.073004,0.103383,-0.571243,0.123782,4.141656,4.841606,861.000000,0.0,953.000000,981.000000,998.000000,998.000000,861.000000,981.000000,0.0,0.0,861.000000,0.0,0.0,828.000000,0.0,861.000000,479.000000,828.000000,0.0,479.000000,996.000000,996.000000,479.000000,0.0,479.000000,0.0,953.000000,"POLYGON ((31.07198 -14.27235, 31.07398 -14.267...",zmb
2066210,889606a35dfffff,10.445472,1.814088,0.898212,0.915876,1.558978,0.769205,0.789773,1.389145,0.678345,0.710800,3.373066,1.667417,1.705649,4.795680,2.382257,2.413423,0.610882,0.330809,0.280073,0.276698,0.157964,0.118734,0.433651,0.614106,-0.058632,0.147348,5.901771,6.371011,602.395996,,748.395935,710.395935,1144.395996,1144.395996,602.395996,710.395935,0.0,0.0,602.395996,0.0,0.0,,,602.395996,206.395996,,0.0,206.395996,,,206.395996,0.0,206.395996,0.0,748.395935,"POLYGON ((29.93108 -13.61891, 29.93309 -13.614...",zmb
2066211,88961c1aa1fffff,15.577436,2.736472,1.348312,1.388160,2.271756,1.124131,1.147625,2.053756,1.015151,1.038605,5.008228,2.472443,2.535785,7.286330,3.650328,3.636002,0.881108,0.457705,0.423403,0.348015,0.202460,0.145554,0.485639,0.687727,-0.427487,0.169140,5.783052,7.096989,227.415543,0.0,292.415558,148.415558,703.415527,703.415527,227.415543,148.415558,0.0,0.0,227.415543,0.0,0.0,676.415527,0.0,227.415543,233.415527,676.415527,0.0,233.415527,1066.415527,1066.415527,233.415527,0.0,233.415527,0.0,292.415558,"POLYGON ((29.26507 -11.22173, 29.26708 -11.216...",zmb
2066212,889631126bfffff,60.235184,10.007640,4.930985,5.076655,8.711281,4.337925,4.373356,7.677429,3.782336,3.895092,18.718920,9.268909,9.450010,27.345816,13.558668,13.787146,4.100915,2.499270,1.601645,2.392107,1.491189,0.900918,6.460267,9.148563,-0.475685,0.126585,4.520392,5.459009,284.377960,0.0,121.299278,39.377949,269.377960,269.377960,284.377960,39.377949,0.0,0.0,284.377960,0.0,0.0,1380.377930,0.0,284.377960,84.377953,1380.377930,0.0,84.377953,256.377960,256.377960,84.377953,0.0,84.377953,0.0,121.299278,"POLYGON ((30.90233 -14.56360, 30.90433 -14.558...",zmb


In [77]:
# write to parquet and geojson
all_countries.to_parquet(os.path.join(output_folder_path, "all_countries_merged_hex8_high_precision.parquet"), index=False)
print("parquet file saved")
all_countries.to_file(os.path.join(output_folder_path, "all_countries_merged_hex8_high_precision.geojson"), driver='GeoJSON')
print("geojson file saved")

parquet file saved
geojson file saved


In [None]:
# run if dataset is not loaded
# all_countries = gpd.read_parquet(os.path.join(output_folder_path, "all_countries_merged_hex8_high_precision.parquet"))

In [78]:
all_countries = adjust_precision(all_countries)

travel_time_no_sites_all_health: min = 0.0, max = inf
travel_time_no_sites_all_health: new min = 0.0, new max = 9999.0
time_delta_no_sites_semi_dense_urban: min = 0.0, max = inf
time_delta_no_sites_semi_dense_urban: new min = 0.0, new max = 9999.0
travel_time_health_posts: min = 0.0, max = 3188.287841796875
travel_time_health_posts: new min = 0.0, new max = 3188.287841796875
travel_time_major_roads: min = 0.0, max = 2851.287841796875
travel_time_major_roads: new min = 0.0, new max = 2851.287841796875
travel_time_no_sites_secondary_schools: min = 0.0, max = inf
travel_time_no_sites_secondary_schools: new min = 0.0, new max = 9999.0
travel_time_secondary_schools: min = 0.0, max = 3189.2880859375
travel_time_secondary_schools: new min = 0.0, new max = 3189.2880859375
travel_time_no_sites_health_centers: min = 0.0, max = inf
travel_time_no_sites_health_centers: new min = 0.0, new max = 9999.0
travel_time_no_sites_major_roads: min = 0.0, max = inf
travel_time_no_sites_major_roads: new min =

In [79]:
all_countries.to_parquet(os.path.join(output_folder_path, "all_countries_merged_hex8.parquet"), index=False)
print ("parquet file written")
all_countries.to_file(os.path.join(output_folder_path, "all_countries_merged_hex8.geojson"), driver='GeoJSON')
print ("geojson file written")

parquet file written
geojson file written


In [80]:
all_countries_tiny = all_countries[["h3_index", "geometry"]]

In [81]:
all_countries_tiny.to_parquet(os.path.join(output_folder_path, "all_countries_merged_tiny_hex8.parquet"), index=False)
print("parquet file written")
all_countries_tiny.to_file(os.path.join(output_folder_path, "all_countries_merged_tiny_hex8.geojson"), driver='GeoJSON')
print("geojson file written")

parquet file written
geojson file written


# Hex 4 Processing

In [85]:
civ = process_country_hex("ivory_coast/", "civ", 4)
eth = process_country_hex("ethiopia/", "eth", 4)
ken = process_country_hex("kenya/", "ken", 4)
rwa = process_country_hex("rwanda/", "rwa", 4)
tan = process_country_hex("tanzania/", "tan", 4)
uga = process_country_hex("uganda/", "uga", 4)
zmb = process_country_hex("zambia/", "zmb", 4)

Grouped by h3_index and applied aggregations.
Finished processing all_education.
Grouped by h3_index and applied aggregations.
Finished processing all_health.
Grouped by h3_index and applied aggregations.
Finished processing health_centers.
Grouped by h3_index and applied aggregations.
Finished processing health_posts.
Grouped by h3_index and applied aggregations.
Finished processing major_hospitals.
Grouped by h3_index and applied aggregations.
Finished processing major_roads.
Grouped by h3_index and applied aggregations.
Finished processing primary_schools.
Grouped by h3_index and applied aggregations.
Finished processing secondary_schools.
Grouped by h3_index and applied aggregations.
Finished processing semi_dense_urban.
Grouped by h3_index and applied aggregations.
Finished processing all_education.
Grouped by h3_index and applied aggregations.
Finished processing all_health.
Grouped by h3_index and applied aggregations.
Finished processing health_centers.
Grouped by h3_index and 

In [87]:
# merge all countries
all_countries4 = pd.concat([civ, eth, ken, rwa, tan, uga, zmb], ignore_index=True)
all_countries4

Unnamed: 0,h3_index,population,pop_0_4,females_0_4,males_0_4,pop_5_9,females_5_9,males_5_9,pop_10_14,females_10_14,males_10_14,pop_0_9,females_0_9,males_0_9,pop_15_49,females_15_49,males_15_49,pop_50_64,females_50_64,males_50_64,pop_65_plus,females_65_plus,males_65_plus,births,pregnancies,rwi,underweight,female_educational_attainment_mean,male_educational_attainment_mean,travel_time_no_sites_all_health,time_delta_no_sites_semi_dense_urban,travel_time_health_posts,travel_time_major_roads,travel_time_no_sites_secondary_schools,travel_time_secondary_schools,travel_time_no_sites_health_centers,travel_time_no_sites_major_roads,time_delta_no_sites_secondary_schools,time_delta_no_sites_all_health,travel_time_health_centers,time_delta_no_sites_health_centers,time_delta_no_sites_major_roads,travel_time_semi_dense_urban,time_delta_no_sites_major_hospitals,travel_time_all_health,travel_time_no_sites_primary_schools,travel_time_no_sites_semi_dense_urban,time_delta_no_sites_health_posts,travel_time_no_sites_all_education,travel_time_major_hospitals,travel_time_no_sites_major_hospitals,travel_time_primary_schools,time_delta_no_sites_primary_schools,travel_time_all_education,time_delta_no_sites_all_education,travel_time_no_sites_health_posts,geometry,country_name
0,8454497ffffffff,35112.632812,6417.594727,3100.625977,3316.968506,5655.285645,2770.228027,2885.057373,3945.955078,1932.731323,2013.223755,12072.879883,5870.854004,6202.025879,15039.880859,8010.537109,7029.344238,2785.423584,1445.170654,1340.252930,1268.492310,656.419556,612.072815,584.473999,781.584534,-0.532973,0.168598,1.398552,2.772113,,14.054911,227.702927,172.277878,,1096.768066,,205.000809,,,1125.433838,,32.722919,668.821411,,1125.433838,,682.876282,17.954235,,1153.444214,,1123.703125,,1096.826294,,245.657166,"POLYGON ((-6.32384 9.37963, -6.52106 9.27389, ...",civ
1,8475a95ffffffff,40954.570312,7056.649902,3849.131104,3207.518799,5914.061523,2956.211914,2957.849609,4511.402832,2255.076660,2256.325928,12970.710938,6805.342773,6165.368164,20530.847656,9962.524414,10568.323242,2323.663330,997.666931,1325.996460,617.946533,265.315887,352.630646,317.517151,424.597992,-0.305215,0.147890,2.316012,3.613592,,,391.741058,958.420593,,1318.563965,,971.313965,,,1319.563965,,12.893368,872.117004,,1319.563965,287.739990,,4.911486,287.739990,1253.195801,,284.032013,3.707983,284.032013,3.707983,396.652557,"POLYGON ((-6.92278 5.59624, -7.11195 5.49335, ...",civ
2,8475189ffffffff,60712.929688,9102.460938,4539.844727,4562.616699,8741.062500,4270.768555,4470.294434,8221.963867,4016.497314,4205.466797,17843.523438,8810.613281,9032.911133,26875.492188,13711.602539,13163.889648,5253.223145,2653.792480,2599.430664,2518.726807,1300.760254,1217.966675,1951.986938,2610.283447,-0.233316,0.148477,3.342605,5.187007,,,279.498596,789.842834,,969.698242,,803.577637,,,952.291992,,13.734830,406.919556,,952.291992,,,6.900201,,1213.668213,,1106.343384,,870.554932,,286.398804,"POLYGON ((-4.69885 6.44812, -4.72371 6.26066, ...",civ
3,8475127ffffffff,80992.312500,13691.023438,6766.850098,6924.173828,11987.442383,5870.711914,6116.730469,11301.281250,5530.130371,5771.151367,25678.464844,12637.561523,13040.904297,36201.101562,18121.585938,18079.515625,5508.011719,2677.917480,2830.093994,2303.452881,1120.654419,1182.798462,2838.386475,3795.615967,-0.392412,0.177578,2.176699,4.147830,,34.892273,284.450012,474.570862,1543.160522,1543.160522,,490.091064,0.000000,,1125.997070,,15.520217,439.830383,0.000000,1125.997070,,474.722687,8.338957,,1547.322144,1547.322144,1275.133911,,1154.426270,,292.788971,"POLYGON ((-6.88795 7.59150, -7.08161 7.48667, ...",civ
4,8475a57ffffffff,36716.152344,6718.574219,3289.697266,3428.877197,5740.378418,2749.248535,2991.129639,4158.069824,1995.413086,2162.656738,12458.953125,6038.945801,6420.006836,16236.397461,8248.928711,7987.468750,2650.295166,1341.025635,1309.269531,1212.437622,611.770020,600.667664,857.810791,1147.102539,-0.383832,0.164788,1.888396,4.155733,709.201355,3.582616,235.499756,281.540436,,1376.216675,709.201355,286.818939,,3.178201,706.023193,3.178201,5.278489,645.624512,,706.023193,,649.207092,3.221197,450.678741,1195.329102,,1205.133545,,442.049866,8.628878,238.720963,"POLYGON ((-7.56531 8.14883, -7.76080 8.04280, ...",civ
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2426,849623bffffffff,398263.531250,60593.066406,30279.503906,30313.562500,54927.089844,27762.095703,27164.996094,49215.972656,25479.179688,23736.792969,115520.156250,58041.597656,57478.558594,211459.250000,101587.343750,109871.906250,16422.695312,7590.164062,8832.532227,5645.465332,3346.767090,2298.698486,10203.860352,14449.968750,0.759524,0.105764,8.922813,9.764507,81.692444,0.348521,104.136192,55.077335,137.398087,137.218094,81.692444,55.269817,0.179991,0.490323,81.202126,0.490323,0.192484,108.802628,0.172928,81.202126,52.411858,109.151146,0.402981,50.515079,175.165894,175.338821,51.900387,0.511468,50.033573,0.481504,104.539169,"POLYGON ((28.70901 -15.75321, 28.80980 -15.514...",zmb
2427,849618dffffffff,13144.328125,2340.691162,1157.639282,1183.051880,1929.105835,944.869690,984.236145,1688.152832,816.137146,872.015686,4269.796875,2102.509033,2167.288086,5957.826660,2979.104492,2978.722168,828.578064,457.180969,371.397095,399.974213,230.067474,169.906738,245.052429,347.025543,-0.384548,0.230254,5.909522,7.111406,251.190842,12.187391,253.437302,335.554504,918.893616,909.765747,251.190842,346.363129,9.127781,11.463492,239.727356,11.463492,10.808599,930.810974,15.843481,239.727356,83.167747,942.998413,0.676458,83.244492,959.185425,975.028931,83.031288,0.136455,83.108032,0.136455,254.113754,"POLYGON ((29.16346 -10.50983, 29.26166 -10.271...",zmb
2428,8496235ffffffff,8122.585449,1316.466431,658.724792,657.741638,1232.206543,610.490845,621.715637,1117.910645,561.657166,556.253418,2548.673096,1269.215698,1279.457275,3784.920654,1815.561035,1969.359619,456.544800,226.630966,229.913849,214.536697,125.234406,89.302292,103.616829,146.734650,-0.487303,0.111825,7.932514,8.557478,631.435425,0.000000,367.188019,235.238342,,1433.035400,631.435425,258.880981,,22.752441,608.682983,22.752441,23.642639,1476.118896,,608.682983,588.986023,1476.118896,26.265594,588.986633,1200.035034,,563.146240,25.839771,563.146851,25.839771,393.453613,"POLYGON ((29.43943 -15.20481, 29.53926 -14.965...",zmb
2429,84975d5ffffffff,264.853271,39.954796,19.307247,20.647549,36.487347,17.701221,18.786125,33.976273,16.820942,17.155331,76.442139,37.008469,39.433674,124.585312,60.916782,63.668530,18.324450,10.478309,7.846140,11.525094,6.642344,4.882750,2.289688,3.242490,-0.298497,0.123041,5.996464,6.986239,1520.506470,,,,,,1520.506470,,,0.000000,1520.506470,0.000000,,,,1520.506470,162.788910,,,162.788910,,,162.788910,0.000000,162.788910,0.000000,,"POLYGON ((23.72895 -17.91426, 23.83479 -17.680...",zmb


In [88]:
# write to parquet and geojson
all_countries4.to_parquet(os.path.join(output_folder_path, "all_countries_merged_hex4_high_precision.parquet"), index=False)
print("parquet file saved")
all_countries4.to_file(os.path.join(output_folder_path, "all_countries_merged_hex4_high_precision.geojson"), driver='GeoJSON')
print("geojson file saved")

parquet file saved
geojson file saved


In [89]:
# run if dataset is not loaded
# all_countries = gpd.read_parquet(os.path.join(output_folder_path, "all_countries_merged_hex4_high_precision.parquet"))

In [90]:
all_countries4 = adjust_precision(all_countries4)

travel_time_no_sites_all_health: min = 46.154109954833984, max = inf
travel_time_no_sites_all_health: new min = 46.154109954833984, new max = 9999.0
time_delta_no_sites_semi_dense_urban: min = 0.0, max = inf
time_delta_no_sites_semi_dense_urban: new min = 0.0, new max = 9999.0
travel_time_health_posts: min = 8.649688720703125, max = 3188.287841796875
travel_time_health_posts: new min = 8.649688720703125, new max = 3188.287841796875
travel_time_major_roads: min = 9.73422908782959, max = 2851.287841796875
travel_time_major_roads: new min = 9.73422908782959, new max = 2851.287841796875
travel_time_no_sites_secondary_schools: min = 18.05055809020996, max = 3189.2880859375
travel_time_no_sites_secondary_schools: new min = 18.05055809020996, new max = 3189.2880859375
travel_time_secondary_schools: min = 18.05055809020996, max = 3189.2880859375
travel_time_secondary_schools: new min = 18.05055809020996, new max = 3189.2880859375
travel_time_no_sites_health_centers: min = 46.154109954833984, m

In [91]:
all_countries4.to_parquet(os.path.join(output_folder_path, "all_countries_merged_hex4.parquet"), index=False)
print ("parquet file written")
all_countries4.to_file(os.path.join(output_folder_path, "all_countries_merged_hex4.geojson"), driver='GeoJSON')
print ("geojson file written")

parquet file written
geojson file written


In [92]:
all_countries_tiny4 = all_countries4[["h3_index", "geometry"]]

In [93]:
all_countries_tiny4.to_parquet(os.path.join(output_folder_path, "all_countries_merged_tiny_hex4.parquet"), index=False)
print("parquet file written")
all_countries_tiny4.to_file(os.path.join(output_folder_path, "all_countries_merged_tiny_hex4.geojson"), driver='GeoJSON')
print("geojson file written")

parquet file written
geojson file written


# Hex 6 Processing

In [96]:
civ = process_country_hex("ivory_coast/", "civ", 6)
eth = process_country_hex("ethiopia/", "eth", 6)
ken = process_country_hex("kenya/", "ken", 6)
rwa = process_country_hex("rwanda/", "rwa", 6)
tan = process_country_hex("tanzania/", "tan", 6)
uga = process_country_hex("uganda/", "uga", 6)
zmb = process_country_hex("zambia/", "zmb", 6)

# merge all countries
all_countries6 = pd.concat([civ, eth, ken, rwa, tan, uga, zmb], ignore_index=True)
all_countries6

Converted column h3_index to level 6
Grouped by h3_index and applied aggregations.
Finished processing all_education.
Converted column h3_index to level 6
Grouped by h3_index and applied aggregations.
Finished processing all_health.
Converted column h3_index to level 6
Grouped by h3_index and applied aggregations.
Finished processing health_centers.
Converted column h3_index to level 6
Grouped by h3_index and applied aggregations.
Finished processing health_posts.
Converted column h3_index to level 6
Grouped by h3_index and applied aggregations.
Finished processing major_hospitals.
Converted column h3_index to level 6
Grouped by h3_index and applied aggregations.
Finished processing major_roads.
Converted column h3_index to level 6
Grouped by h3_index and applied aggregations.
Finished processing primary_schools.
Converted column h3_index to level 6
Grouped by h3_index and applied aggregations.
Finished processing secondary_schools.
Converted column h3_index to level 6
Grouped by h3_in

Unnamed: 0,h3_index,population,pop_0_4,females_0_4,males_0_4,pop_5_9,females_5_9,males_5_9,pop_10_14,females_10_14,males_10_14,pop_0_9,females_0_9,males_0_9,pop_15_49,females_15_49,males_15_49,pop_50_64,females_50_64,males_50_64,pop_65_plus,females_65_plus,males_65_plus,births,pregnancies,rwi,underweight,female_educational_attainment_mean,male_educational_attainment_mean,travel_time_no_sites_all_health,time_delta_no_sites_semi_dense_urban,travel_time_health_posts,travel_time_major_roads,travel_time_no_sites_secondary_schools,travel_time_secondary_schools,travel_time_no_sites_health_centers,travel_time_no_sites_major_roads,time_delta_no_sites_secondary_schools,time_delta_no_sites_all_health,travel_time_health_centers,time_delta_no_sites_health_centers,time_delta_no_sites_major_roads,travel_time_semi_dense_urban,time_delta_no_sites_major_hospitals,travel_time_all_health,travel_time_no_sites_primary_schools,travel_time_no_sites_semi_dense_urban,time_delta_no_sites_health_posts,travel_time_no_sites_all_education,travel_time_major_hospitals,travel_time_no_sites_major_hospitals,travel_time_primary_schools,time_delta_no_sites_primary_schools,travel_time_all_education,time_delta_no_sites_all_education,travel_time_no_sites_health_posts,geometry,country_name
0,867506357ffffff,11.945259,1.923827,1.004196,0.919631,1.867733,0.876077,0.991655,1.518722,0.712371,0.806352,3.791559,1.880273,1.911286,4.922103,2.614136,2.307967,1.064976,0.651043,0.413933,0.647898,0.396075,0.251824,0.229551,0.306966,-0.015234,0.168101,1.634306,3.629200,,,265.000000,220.000000,,,,220.000000,,,,,0.000000,,,,227.000000,,0.000000,227.000000,,,227.000000,0.000000,227.000000,0.000000,265.000000,"POLYGON ((-3.69546 8.18997, -3.69942 8.16214, ...",civ
1,8675a8937ffffff,1249.150146,215.233978,117.401855,97.832123,180.384033,90.167038,90.216995,137.601715,68.781807,68.819908,395.618011,207.568893,188.049118,626.208801,303.865692,322.343079,70.873756,30.429712,40.444046,18.847910,8.092366,10.755543,4.302091,5.752946,-0.257584,0.162242,2.854004,4.545713,,0.407445,395.512695,1276.043213,inf,1535.000000,,1277.481323,inf,,,,1.438042,557.492310,,,inf,557.899780,0.375489,,,,1546.000000,inf,1342.043213,,395.888214,"POLYGON ((-7.01144 6.43700, -7.03875 6.42217, ...",civ
2,86544b607ffffff,141.532043,26.309265,12.474882,13.834384,23.533339,10.795362,12.737978,16.448463,7.545343,8.903121,49.842606,23.270245,26.572361,60.536339,31.789280,28.747059,9.958049,4.737256,5.220793,4.746580,2.258049,2.488531,1.959779,2.620704,-0.468760,0.186642,0.660028,2.411162,,,405.350647,525.350647,,,,525.350647,,,,,0.000000,,,,,,0.000000,,,,,,,,405.350647,"POLYGON ((-7.30642 8.87806, -7.33456 8.86286, ...",civ
3,8675a0357ffffff,3823.519043,686.609558,349.125580,337.483978,553.152039,280.022125,273.129913,419.669250,212.449127,207.220108,1239.761597,629.147705,610.613892,1755.949707,859.286865,896.662903,284.194214,153.372009,130.822205,123.944183,66.889359,57.054825,130.564728,174.596939,-0.379799,0.172048,1.692339,3.821920,,,116.297592,1275.182617,732.338623,732.338623,,,0.000000,,,,,1308.514771,,,688.338684,,0.681670,114.935135,,,688.338684,0.000000,114.935135,0.000000,116.979263,"POLYGON ((-8.08362 7.70762, -8.11147 7.69247, ...",civ
4,86750e467ffffff,4903.661621,719.734619,368.296967,351.437653,653.152893,318.713348,334.439575,618.384216,301.708008,316.676178,1372.887573,687.010315,685.877197,2420.130859,1185.814819,1234.316162,343.537506,158.856689,184.680801,148.721497,69.463387,79.258102,135.959976,181.811707,-0.012951,0.132493,3.274731,5.212858,,0.279066,32.762909,17.493961,,,,17.570345,,,,,0.076382,585.723816,,,1135.025635,586.002869,0.579221,1135.025635,,,1134.723755,0.301780,1134.723755,0.301780,33.342133,"POLYGON ((-3.22007 6.15118, -3.22410 6.12417, ...",civ
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86226,86967180fffffff,36.604301,5.530122,2.742241,2.787880,4.915339,2.478294,2.437045,4.754682,2.376869,2.377813,10.445461,5.220535,5.224925,17.021837,9.157583,7.864254,2.602421,1.562255,1.040166,1.779902,1.068602,0.711300,0.687349,0.973373,-0.436909,0.156647,5.742478,5.464236,1023.986572,,1041.417725,,1113.986572,1113.986572,1023.986572,,0.000000,0.000000,1023.986572,0.000000,,,,1023.986572,586.971741,,0.000000,586.971741,,,586.971741,0.000000,586.971741,0.000000,1041.417725,"POLYGON ((22.32327 -14.27225, 22.33829 -14.238...",zmb
86227,8696ac577ffffff,1036.633545,185.435211,92.476082,92.959129,159.141373,78.688629,80.452751,138.311142,67.649216,70.661919,344.576599,171.164703,173.411880,476.796082,235.852264,240.943832,52.651241,28.283356,24.367887,24.298536,12.433175,11.865360,49.675518,70.346878,-0.657487,0.168623,4.448072,6.268044,395.576080,,773.772278,307.061646,490.780884,436.635071,395.576080,357.997864,54.145828,17.557182,378.018890,17.557182,50.936222,1066.418945,,378.018890,383.527008,,15.191144,351.637268,,,281.423187,102.103813,238.257248,113.380020,788.963440,"POLYGON ((31.96221 -9.18231, 31.97575 -9.14827...",zmb
86228,86961c367ffffff,1177.725220,206.883759,101.926239,104.957512,171.722382,84.968918,86.753456,155.288147,76.755524,78.532623,378.606140,186.895157,191.710968,550.886230,275.953888,274.932373,66.636108,34.628155,32.007954,26.308653,15.297139,11.011514,14.476465,20.500525,-0.537620,0.167227,5.832980,7.374228,651.937134,9.756443,360.462372,885.819031,681.497070,671.150574,651.937134,896.747803,10.346558,93.521271,558.415833,93.521271,10.928732,874.036133,9.756443,558.415833,343.746918,883.792542,93.421623,343.746918,939.28009,949.036499,275.927460,67.819466,275.927460,67.819466,453.884003,"POLYGON ((28.64462 -11.25574, 28.65877 -11.221...",zmb
86229,86961e147ffffff,1254.496582,224.827072,108.248878,116.578194,182.437851,89.639786,92.798073,162.330414,78.770248,83.560158,407.264923,197.888657,209.376266,570.845825,284.808350,286.037506,76.909081,43.003803,33.905277,37.146320,20.258579,16.887739,18.774067,26.586477,-0.250388,0.199815,5.941327,7.234296,476.705627,3.309443,96.621483,45.546375,691.705627,689.614990,476.705627,46.331097,2.090612,2.090612,474.614990,2.090612,0.784721,832.614990,3.440032,474.614990,99.386841,835.924438,3.170039,99.386841,836.61499,840.054993,96.128784,3.258066,96.128784,3.258066,99.791519,"POLYGON ((29.71009 -10.37004, 29.72404 -10.335...",zmb


In [97]:
# write to parquet and geojson
all_countries6.to_parquet(os.path.join(output_folder_path, "all_countries_merged_hex6_high_precision.parquet"), index=False)
print("parquet file saved")
all_countries6.to_file(os.path.join(output_folder_path, "all_countries_merged_hex6_high_precision.geojson"), driver='GeoJSON')
print("geojson file saved")

parquet file saved
geojson file saved


In [98]:
all_countries6 = adjust_precision(all_countries6)

travel_time_no_sites_all_health: min = 6.0, max = inf
travel_time_no_sites_all_health: new min = 6.0, new max = 9999.0
time_delta_no_sites_semi_dense_urban: min = 0.0, max = inf
time_delta_no_sites_semi_dense_urban: new min = 0.0, new max = 9999.0
travel_time_health_posts: min = 4.0, max = 3188.287841796875
travel_time_health_posts: new min = 4.0, new max = 3188.287841796875
travel_time_major_roads: min = 0.0, max = 2851.287841796875
travel_time_major_roads: new min = 0.0, new max = 2851.287841796875
travel_time_no_sites_secondary_schools: min = 9.919168472290039, max = inf
travel_time_no_sites_secondary_schools: new min = 9.919168472290039, new max = 9999.0
travel_time_secondary_schools: min = 9.919168472290039, max = 3189.2880859375
travel_time_secondary_schools: new min = 9.919168472290039, new max = 3189.2880859375
travel_time_no_sites_health_centers: min = 6.0, max = inf
travel_time_no_sites_health_centers: new min = 6.0, new max = 9999.0
travel_time_no_sites_major_roads: min = 0.

In [99]:
all_countries6.to_parquet(os.path.join(output_folder_path, "all_countries_merged_hex6.parquet"), index=False)
print ("parquet file written")
all_countries6.to_file(os.path.join(output_folder_path, "all_countries_merged_hex6.geojson"), driver='GeoJSON')
print ("geojson file written")

parquet file written
geojson file written


In [100]:
all_countries_tiny6 = all_countries6[["h3_index", "geometry"]]

In [101]:
all_countries_tiny6.to_parquet(os.path.join(output_folder_path, "all_countries_merged_tiny_hex6.parquet"), index=False)
print("parquet file written")
all_countries_tiny6.to_file(os.path.join(output_folder_path, "all_countries_merged_tiny_hex6.geojson"), driver='GeoJSON')
print("geojson file written")

parquet file written
geojson file written


# mbtiles investigation

SyntaxError: invalid syntax (3966932415.py, line 2)

In [20]:
!mb-util --silent  ./all_countries_merged_hex8.mbtiles
!find extracted_tiles -type f -exec du -h {} \; | sort -hr | head -n 10

Usage: mb-util [options] input output
    
    Examples:

    Export an mbtiles file to a directory of files:
    $ mb-util world.mbtiles dumps # when the 2nd argument is "dumps", then dumps the metatdata.json

    Export an mbtiles file to a directory of files:
    $ mb-util world.mbtiles tiles # tiles must not already exist
    
    Import a directory of tiles into an mbtiles file:
    $ mb-util tiles world.mbtiles # mbtiles file must not already exist

Options:
  -h, --help            show this help message and exit
  --scheme=SCHEME       Tiling scheme of the tiles. Default is "xyz" (z/x/y),
                        other options are "tms" which is also z/x/y but uses a
                        flipped y coordinate, and "wms" which replicates the
                        MapServer WMS TileCache directory structure
                        "z/000/000/x/000/000/y.png"
  --image_format=FORMAT
                        The format of the image tiles, either png, jpg, webp
                    

# Cleaning Population Data out of tiles

In [None]:
hex8_all = gpd.read_parquet(os.path.join(output_folder_path, "all_countries_merged_hex8.parquet")) 

In [23]:
# drop columns that start with pop_, females_, males_
drop_columns = [col for col in hex8_all.columns if col.startswith(('pop_', 'females_', 'males_'))]
drop_columns.append("country_name")
drop_columns.append("geometry-type")
hex8_no_pop = hex8_all.drop(columns=drop_columns, errors="ignore")


In [24]:
hex8_no_pop.to_parquet(os.path.join(output_folder_path, "all_countries_merged_hex8_no_pop.parquet"), index=False)
hex8_no_pop.to_file(os.path.join(output_folder_path, "all_countries_merged_hex8_no_pop.geojson"), driver='GeoJSON')
print ("geojson file written")
print ("parquet file written")

geojson file written
parquet file written


In [25]:
for c in hex8_no_pop.columns:
    print (c)

h3_index
population
births
pregnancies
rwi
underweight
female_educational_attainment_mean
male_educational_attainment_mean
travel_time_no_sites_all_health
time_delta_no_sites_semi_dense_urban
travel_time_health_posts
travel_time_major_roads
travel_time_no_sites_secondary_schools
travel_time_secondary_schools
travel_time_no_sites_health_centers
travel_time_no_sites_major_roads
time_delta_no_sites_secondary_schools
time_delta_no_sites_all_health
travel_time_health_centers
time_delta_no_sites_health_centers
time_delta_no_sites_major_roads
travel_time_semi_dense_urban
time_delta_no_sites_major_hospitals
travel_time_all_health
travel_time_no_sites_primary_schools
travel_time_no_sites_semi_dense_urban
time_delta_no_sites_health_posts
travel_time_no_sites_all_education
travel_time_major_hospitals
travel_time_no_sites_major_hospitals
travel_time_primary_schools
time_delta_no_sites_primary_schools
travel_time_all_education
time_delta_no_sites_all_education
travel_time_no_sites_health_posts
geom

In [16]:
print(output_folder_path)

/Volumes/samsung-4tb/b2p/impact-model/cleaned_data/hex_data/


In [17]:
!tippecanoe -o all_countries_merged_hex8.mbtiles -f -P -zg -Z8 -pt -pf -pk -ab -L hex8-impact-data:/Volumes/samsung-4tb/b2p/impact-model/cleaned_data/hex_data/all_countries_merged_hex8_no_pop.geojson

/Volumes/samsung-4tb/b2p/impact-model/cleaned_data/hex_data/all_countries_merged_hex8_no_pop.geojson:256498: Found ] at top level: 
/Volumes/samsung-4tb/b2p/impact-model/cleaned_data/hex_data/all_countries_merged_hex8_no_pop.geojson:258704: Reached EOF without all containers being closed: in JSON object {"type":"FeatureCollection","crs":{"type":"name","properties":{"name":"urn:ogc:def:crs:OGC:1.3:CRS84"}},"features":[]}
2064793 features, 435098915 bytes of geometry and attributes, 35408746 bytes of string pool, 0 bytes of vertices, 0 bytes of nodes
Choosing a maxzoom of -z6 for features typically 3168 feet (966 meters) apart, and at least 1391 feet (424 meters) apart
Choosing a maxzoom of -z7 for resolution of about 3038 feet (926 meters) within features
Can't use 7 for maxzoom because minzoom is 8
  99.9%  8/124/120  


In [18]:
!tippecanoe -o all_countries_merged_hex8.pmtiles -f -P -zg -Z8 -pt -pf -pk -ab -L hex8-impact-data:/Volumes/samsung-4tb/b2p/impact-model/cleaned_data/hex_data/all_countries_merged_hex8_no_pop.geojson

/Volumes/samsung-4tb/b2p/impact-model/cleaned_data/hex_data/all_countries_merged_hex8_no_pop.geojson:256498: Found ] at top level: 
/Volumes/samsung-4tb/b2p/impact-model/cleaned_data/hex_data/all_countries_merged_hex8_no_pop.geojson:258704: Reached EOF without all containers being closed: in JSON object {"type":"FeatureCollection","crs":{"type":"name","properties":{"name":"urn:ogc:def:crs:OGC:1.3:CRS84"}},"features":[]}
2064793 features, 435098915 bytes of geometry and attributes, 35408746 bytes of string pool, 0 bytes of vertices, 0 bytes of nodes
Choosing a maxzoom of -z6 for features typically 3168 feet (966 meters) apart, and at least 1391 feet (424 meters) apart
Choosing a maxzoom of -z7 for resolution of about 3038 feet (926 meters) within features
Can't use 7 for maxzoom because minzoom is 8
  99.9%  8/148/130  
  100.0%  8/149/126  