# Human Population Size on an Earth-like Planet--a Computer Experiment

In [1]:
import sim_world_functions as swf

from pathlib import Path
import sys

import networkx as nx
from pyvis.network import Network
import numpy as np
import polars as pl
from polars import DataFrame
import pyarrow
from scipy import stats
from pert import PERT
from shapely.geometry import Point
import geopandas
import plotly.express as px

In [2]:
rng = np.random.default_rng()
pl.Config.set_fmt_str_lengths(n=100)
DATA_PATH = Path("../data/")

The response variable is the arithmetic mean of the normalized coefficients of variation for the population sizes.

$$
z_{i} = \frac{1}{T + 1}
\sum_{t=0}^{T}
    \frac{
        \sqrt{
            \hat{V} \left(
            \mathrm{logpopsize}_{i,t}
            \right)
        }
    }
    {
        \underset{j}{\mathrm{median}}\left(\mathrm{logpopsize}_{i,j,t}\right)
    } 
         
                
$$

where $i=1,2,\dots,k$ indexes the treatment, $j=1,2,\dots n_i$ indexes the replicate for the $i^{\text{th}}$ treatment, and $t=1,2,\dots,T$ indexes the time.

$\hat{V}$ denotes the unbiased sample variance which is taken of the natural logarithms of the population sizes observed for treatment $i$ at time $t$.

## Demographic Balancing Equation:

$$
x_{i,t} = b_{i, t-1} + \sum_{j}{m_{j, i, t-1}} - \sum_{j}{m_{i,j,t-1}} - d_{1, t-1}
$$

where

$$
\begin{aligned}
x_{i,t} &= \text{population size at location } i \text{ at time } t\\
b_{i, t-1} &= \text{number of births at location } i \text{ at time } t-1 \\
m_{j, i, t-1} &= \text{number of people immigrating to } i \text{ from } j \text{ at time } t-1 \\
m_{i, j, t-1} &= \text{number of people emigrating from } i \text{ to } j \text{ at time } t-1 \\
d_{1, t-1} &= \text{number of deaths at location } i \text{ at time } t-1
\end{aligned}
$$

In [3]:
SIMULATION_YEARS = 10
# Because the maximum sustainable population density 
# (per square mile) on the planet for a hunter-gatherer society
# is about 20, keep the below densities kind of small to start.
# https://en.wikipedia.org/wiki/Hunter-gatherer#:~:text=One%20group%2C%20the%20Chumash%2C%20had,21.6%20persons%20per%20square%20mile.

# All of the following initial average
# population densities per square mile
# should be set between 0 and 5.
BOREAL_FORESTS_TAIGA_INITIAL_AVG_POP_DENSITY = 0.1
DESERTS_AND_XERIC_SHRUBLANDS_INITIAL_AVG_POP_DENSITY = 0.1
FLOODED_GRASSLANDS_AND_SAVANNAS_INITIAL_AVG_POP_DENSITY = 0.5
MANGROVES_INITIAL_AVG_POP_DENSITY = 0.5
MEDITERRANEAN_FORESTS_WOODLANDS_AND_SCRUB_INITIAL_AVG_POP_DENSITY = 0.5
MONTANE_GRASSLANDS_AND_SHRUBLANDS_INITIAL_AVG_POP_DENSITY = 0.5
TEMPERATE_BROADLEAF_AND_MIXED_FORESTS_INITIAL_AVG_POP_DENSITY = 0.5
TEMPERATE_CONIFER_FORESTS_INITIAL_AVG_POP_DENSITY = 0.5
TEMPERATE_GRASSLANDS_SAVANNAS_AND_SHRUBLANDS_INITIAL_AVG_POP_DENSITY = 0.5
TROPICAL_AND_SUBTROPICAL_CONIFEROUS_FORESTS_INITIAL_AVG_POP_DENSITY = 0.5
TROPICAL_AND_SUBTROPICAL_DRY_BROADLEAF_FORESTS_INITIAL_AVG_POP_DENSITY = 0.5
TROPICAL_AND_SUBTROPICAL_GRASSLANDS_SAVANNAS_AND_SHRUBLANDS_INITIAL_AVG_POP_DENSITY = 0.5
TROPICAL_AND_SUBTROPICAL_MOIST_BROADLEAF_FORESTS_INITIAL_AVG_POP_DENSITY = 0.5
TUNDRA_INITIAL_AVG_POP_DENSITY = 0.1

# The following are the initial carrying capacities for each biome in
# units of people per square mile.
# These should be in the interval [0, 20].
# (The current population density of the West Saraha Desert
# is about 3.5 people per square mile.)
# These are called unaided because the actual initial
# carrying capacities will be calculated later as a function
# of other relevant variables.
BOREAL_FORESTS_TAIGA_INITIAL_UNAIDED_CARRYING_CAP = 10
DESERTS_AND_XERIC_SHRUBLANDS_INITIAL_UNAIDED_CARRYING_CAP = 3
FLOODED_GRASSLANDS_AND_SAVANNAS_INITIAL_UNAIDED_CARRYING_CAP = 20
MANGROVES_INITIAL_UNAIDED_CARRYING_CAP = 20
MEDITERRANEAN_FORESTS_WOODLANDS_AND_SCRUB_INITIAL_UNAIDED_CARRYING_CAP = 20
MONTANE_GRASSLANDS_AND_SHRUBLANDS_INITIAL_UNAIDED_CARRYING_CAP = 20
TEMPERATE_BROADLEAF_AND_MIXED_FORESTS_INITIAL_UNAIDED_CARRYING_CAP = 20
TEMPERATE_CONIFER_FORESTS_INITIAL_UNAIDED_CARRYING_CAP = 20
TEMPERATE_GRASSLANDS_SAVANNAS_AND_SHRUBLANDS_INITIAL_UNAIDED_CARRYING_CAP = 20
TROPICAL_AND_SUBTROPICAL_CONIFEROUS_FORESTS_INITIAL_UNAIDED_CARRYING_CAP = 20
TROPICAL_AND_SUBTROPICAL_DRY_BROADLEAF_FORESTS_INITIAL_UNAIDED_CARRYING_CAP = 20
TROPICAL_AND_SUBTROPICAL_GRASSLANDS_SAVANNAS_AND_SHRUBLANDS_INITIAL_UNAIDED_CARRYING_CAP = 20
TROPICAL_AND_SUBTROPICAL_MOIST_BROADLEAF_FORESTS_INITIAL_UNAIDED_CARRYING_CAP = 20
TUNDRA_INITIAL_UNAIDED_CARRYING_CAP = 10
# Specifying the following as distributions saves
# us from having to specifying them separately
# for each location.  The most likely value becomes
# a parameter for a PERT distribution.
########################################################
# TRANSPORTATION_TECH_LEVEL_IN_USE
########################################################
# Valid interval: [0, 0.5]
BOREAL_FORESTS_TAIGA_INITIAL_MOST_LIKELY_TRANSPORTATION_TECH_LEVEL_IN_USE = 0.1
DESERTS_AND_XERIC_SHRUBLANDS_INITIAL_MOST_LIKELY_TRANSPORTATION_TECH_LEVEL_IN_USE = 0
FLOODED_GRASSLANDS_AND_SAVANNAS_INITIAL_MOST_LIKELY_TRANSPORTATION_TECH_LEVEL_IN_USE = 0
MANGROVES_INITIAL_MOST_LIKELY_TRANSPORTATION_TECH_LEVEL_IN_USE = 0
MEDITERRANEAN_FORESTS_WOODLANDS_AND_SCRUB_INITIAL_MOST_LIKELY_TRANSPORTATION_TECH_LEVEL_IN_USE = 0
MONTANE_GRASSLANDS_AND_SHRUBLANDS_INITIAL_MOST_LIKELY_TRANSPORTATION_TECH_LEVEL_IN_USE = 0
TEMPERATE_BROADLEAF_AND_MIXED_FORESTS_INITIAL_MOST_LIKELY_TRANSPORTATION_TECH_LEVEL_IN_USE = 0
TEMPERATE_CONIFER_FORESTS_INITIAL_MOST_LIKELY_TRANSPORTATION_TECH_LEVEL_IN_USE = 0
TEMPERATE_GRASSLANDS_SAVANNAS_AND_SHRUBLANDS_INITIAL_MOST_LIKELY_TRANSPORTATION_TECH_LEVEL_IN_USE = 0
TROPICAL_AND_SUBTROPICAL_CONIFEROUS_FORESTS_INITIAL_MOST_LIKELY_TRANSPORTATION_TECH_LEVEL_IN_USE = 0
TROPICAL_AND_SUBTROPICAL_DRY_BROADLEAF_FORESTS_INITIAL_MOST_LIKELY_TRANSPORTATION_TECH_LEVEL_IN_USE = 0
TROPICAL_AND_SUBTROPICAL_GRASSLANDS_SAVANNAS_AND_SHRUBLANDS_INITIAL_MOST_LIKELY_TRANSPORTATION_TECH_LEVEL_IN_USE = 0
TROPICAL_AND_SUBTROPICAL_MOIST_BROADLEAF_FORESTS_INITIAL_MOST_LIKELY_TRANSPORTATION_TECH_LEVEL_IN_USE = 0
TUNDRA_INITIAL_MOST_LIKELY_TRANSPORTATION_TECH_LEVEL_IN_USE = 0.3

########################################################
# HEALTHCARE_TECH_LEVEL_IN_USE
########################################################
# Valid interval: [0, 0.3]
BOREAL_FORESTS_TAIGA_INITIAL_MOST_LIKELY_HEALTHCARE_TECH_LEVEL_IN_USE = 0
DESERTS_AND_XERIC_SHRUBLANDS_INITIAL_MOST_LIKELY_HEALTHCARE_TECH_LEVEL_IN_USE = 0
FLOODED_GRASSLANDS_AND_SAVANNAS_INITIAL_MOST_LIKELY_HEALTHCARE_TECH_LEVEL_IN_USE = 0
MANGROVES_INITIAL_MOST_LIKELY_HEALTHCARE_TECH_LEVEL_IN_USE = 0
MEDITERRANEAN_FORESTS_WOODLANDS_AND_SCRUB_INITIAL_MOST_LIKELY_HEALTHCARE_TECH_LEVEL_IN_USE = 0
MONTANE_GRASSLANDS_AND_SHRUBLANDS_INITIAL_MOST_LIKELY_HEALTHCARE_TECH_LEVEL_IN_USE = 0
TEMPERATE_BROADLEAF_AND_MIXED_FORESTS_INITIAL_MOST_LIKELY_HEALTHCARE_TECH_LEVEL_IN_USE = 0
TEMPERATE_CONIFER_FORESTS_INITIAL_MOST_LIKELY_HEALTHCARE_TECH_LEVEL_IN_USE = 0
TEMPERATE_GRASSLANDS_SAVANNAS_AND_SHRUBLANDS_INITIAL_MOST_LIKELY_HEALTHCARE_TECH_LEVEL_IN_USE = 0
TROPICAL_AND_SUBTROPICAL_CONIFEROUS_FORESTS_INITIAL_MOST_LIKELY_HEALTHCARE_TECH_LEVEL_IN_USE = 0
TROPICAL_AND_SUBTROPICAL_DRY_BROADLEAF_FORESTS_INITIAL_MOST_LIKELY_HEALTHCARE_TECH_LEVEL_IN_USE = 0
TROPICAL_AND_SUBTROPICAL_GRASSLANDS_SAVANNAS_AND_SHRUBLANDS_INITIAL_MOST_LIKELY_HEALTHCARE_TECH_LEVEL_IN_USE = 0
TROPICAL_AND_SUBTROPICAL_MOIST_BROADLEAF_FORESTS_INITIAL_MOST_LIKELY_HEALTHCARE_TECH_LEVEL_IN_USE = 0
TUNDRA_INITIAL_MOST_LIKELY_HEALTHCARE_TECH_LEVEL_IN_USE = 0



# Now, create objects for the relevant distributions
# using the supplied parameters.
INITIAL_HEALTHCARE_TECH_LEVEL_IN_USE = PERT(min_val=0, ml_val=0, max_val=1, lamb=40)
INITIAL_WARFARE_TECH_LEVEL_IN_USE = PERT(min_val=0, ml_val=0, max_val=1, lamb=40)
INITIAL_AGRICULTURAL_TECH_LEVEL_IN_USE = PERT(min_val=0, ml_val=0, max_val=1, lamb=40)
INITIAL_HOUSING_TECH_LEVEL_IN_USE = PERT(min_val=0, ml_val=0, max_val=1, lamb=40)
# Should be a positive number. Larger values indicate increased relevance.
RELEVANCE_OF_DIST_BASED_ON_TRANSPORTATION_TECH_LEVEL = 5

# Get Data from Other Scripts

In [4]:
# ETL for distance matrix:
dist_mat = np.loadtxt(Path(DATA_PATH, "dist_matrix.csv"), delimiter=",")
# Set num_world_locations according to pre-made
# dist_mat.
num_world_locations = dist_mat.shape[0]
# Manipulate dist_mat 
# https://stackoverflow.com/questions/16444930/copy-upper-triangle-to-lower-triangle-in-a-python-matrix
dist_mat = np.triu(dist_mat)
dist_mat = dist_mat + dist_mat.T
np.fill_diagonal(a=dist_mat, val=0)

## Get geographic data that goes along with the distance matrix.

In [5]:
# This takes 3 min on my computer.
# https://ecoregions.appspot.com/
ecoregions_2017_with_more_points = geopandas.read_file(filename=Path(DATA_PATH, "ecoregions_2017_with_more_points.shp"))

In [6]:
# Convert to an equal-area CRS so that we can find
# the areas of the ecoregions.
# https://gis.stackexchange.com/questions/285266/geopandas-proj4-reproject-to-global-equal-area-projection
ecoregions_2017_with_more_points.to_crs(crs="+proj=eck4 +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +ellps=WGS84 +units=m +no_defs", inplace=True)
# Get the area of each ecoregion in square meters.
ecoregions_2017_with_more_points["ecoregion_area_in_sq_m"] = ecoregions_2017_with_more_points.area
# Create a polars dataframe without the geometry info.
# for performance reasons.
ecoregions_2017_with_more_points_no_geo = pl.from_pandas(
    data=ecoregions_2017_with_more_points.loc[:, ["ECO_NAME", "BIOME_NAME", "REALM", "ecoregion_area_in_sq_m"]]
)
# Convert the area of each ecoregion to square miles.
ecoregions_2017_with_more_points_no_geo = (ecoregions_2017_with_more_points_no_geo
    .with_columns([
        (pl.col("ecoregion_area_in_sq_m") / (1609.34**2)).alias("ecoregion_area_in_sq_mi")
    ])
    .drop("ecoregion_area_in_sq_m")
)

In [7]:
(ecoregions_2017_with_more_points_no_geo
    .filter((pl.col("REALM") == "Antarctica") | (pl.col("REALM") == "N/A"))
    .select(["ECO_NAME", "BIOME_NAME"])
    .unique()
)

ECO_NAME,BIOME_NAME
str,str
"""Northeast Antarctic Peninsula tundra""","""Tundra"""
"""Marie Byrd Land tundra""","""Tundra"""
"""Ellsworth Land tundra""","""Tundra"""
"""Prince Charles Mountains tundra""","""Tundra"""
"""South Orkney Islands tundra""","""Tundra"""
"""Transantarctic Mountains tundra""","""Tundra"""
"""Rock and Ice""","""N/A"""
"""East Antarctic tundra""","""Tundra"""
"""Ellsworth Mountains tundra""","""Tundra"""
"""Enderby Land tundra""","""Tundra"""


In [8]:
(ecoregions_2017_with_more_points_no_geo
    .filter(pl.col("REALM") == "N/A")
    .select("ECO_NAME")
    .unique()
)

ECO_NAME
str
"""Rock and Ice"""


How many ecoregions are there?

In [9]:
(ecoregions_2017_with_more_points_no_geo
    .select("ECO_NAME")
    .unique()
    .select(pl.col("ECO_NAME").count())
)

ECO_NAME
u32
847


What are the names of the biomes?

In [10]:
(ecoregions_2017_with_more_points_no_geo
    .select("BIOME_NAME")
    .unique()
    .sort("BIOME_NAME")
)

BIOME_NAME
str
"""Boreal Forests/Taiga"""
"""Deserts & Xeric Shrublands"""
"""Flooded Grasslands & Savannas"""
"""Mangroves"""
"""Mediterranean Forests, Woodlands & Scrub"""
"""Montane Grasslands & Shrublands"""
"""N/A"""
"""Temperate Broadleaf & Mixed Forests"""
"""Temperate Conifer Forests"""
"""Temperate Grasslands, Savannas & Shrublands"""


Use ecoregions_2017_with_more_points_no_geo to store data about each location.

In [11]:
# Each row in ecoregions_2017_with_more_points_no_geo corresponds to a different
# world location.  Some of the locations are in the same ecoregion.
ecoregions_2017_with_more_points_no_geo.head()

ECO_NAME,BIOME_NAME,REALM,ecoregion_area_in_sq_mi
str,str,str,f64
"""Adelie Land tundra""","""Tundra""","""Antarctica""",68.246199
"""Admiralty Islands lowland rain forests""","""Tropical & Subtropical Moist Broadleaf Forests""","""Australasia""",815.701401
"""Aegean and Western Turkey sclerophyllous and mixed forests""","""Mediterranean Forests, Woodlands & Scrub""","""Palearctic""",51643.0957
"""Aegean and Western Turkey sclerophyllous and mixed forests""","""Mediterranean Forests, Woodlands & Scrub""","""Palearctic""",51643.0957
"""Aegean and Western Turkey sclerophyllous and mixed forests""","""Mediterranean Forests, Woodlands & Scrub""","""Palearctic""",51643.0957


In [12]:
# What are the areas of each location
# if each ecoregion in which they are located
# is divided into equal area sections according
# to the number of locations in that ecoregion?
ecoregions_2017_with_more_points_no_geo = (ecoregions_2017_with_more_points_no_geo
    .with_columns([
        (pl.col("ecoregion_area_in_sq_mi") / pl.count("ECO_NAME").over("ECO_NAME"))
        .alias("location_area_in_sq_mi")
    ])
)

Google says that the total land area of the earth is about $57268900$ square miles.
What is our total land area?

In [13]:
(ecoregions_2017_with_more_points_no_geo
    .unique()
    .group_by("BIOME_NAME")
    .agg([
        pl.sum("ecoregion_area_in_sq_mi").alias("total_biome_area_in_sq_mi")
    ])
    .select(pl.col("total_biome_area_in_sq_mi").sum().alias("total_land_area"))
)

total_land_area
f64
56624000.0


## Incorporation of Factors

In [14]:
# Store info. based on treatments for use later.
ecoregions_2017_with_more_points_no_geo = (ecoregions_2017_with_more_points_no_geo
    .with_columns([
        pl.when(
            # Don't have people here to start.
            (pl.col("REALM") == "N/A")
            |
            (pl.col("REALM") == "Antarctica")
        )
        .then(
            pl.lit(value=0, dtype=pl.Int64)
        )
        .when(
            (pl.col("BIOME_NAME") == "Boreal Forests/Taiga")
        )
        .then(
            BOREAL_FORESTS_TAIGA_INITIAL_AVG_POP_DENSITY * pl.col("location_area_in_sq_mi")
        )
        .when(
            (pl.col("BIOME_NAME") == "Deserts & Xeric Shrublands")
        )
        .then(
            DESERTS_AND_XERIC_SHRUBLANDS_INITIAL_AVG_POP_DENSITY * pl.col("location_area_in_sq_mi")
        )
        .when(
            (pl.col("BIOME_NAME") == "Flooded Grasslands & Savannas")
        )
        .then(
            FLOODED_GRASSLANDS_AND_SAVANNAS_INITIAL_AVG_POP_DENSITY * pl.col("location_area_in_sq_mi")
        )
        .when(
            (pl.col("BIOME_NAME") == "Mangroves")
        )
        .then(
            MANGROVES_INITIAL_AVG_POP_DENSITY * pl.col("location_area_in_sq_mi")
        )
        .when(
            (pl.col("BIOME_NAME") == "Mediterranean Forests, Woodlands & Scrub")
        )
        .then(
            MEDITERRANEAN_FORESTS_WOODLANDS_AND_SCRUB_INITIAL_AVG_POP_DENSITY * pl.col("location_area_in_sq_mi")
        )
        .when(
            (pl.col("BIOME_NAME") == "Montane Grasslands & Shrublands")
        )
        .then(
            MONTANE_GRASSLANDS_AND_SHRUBLANDS_INITIAL_AVG_POP_DENSITY * pl.col("location_area_in_sq_mi")
        )
        .when(
            (pl.col("BIOME_NAME") == "Temperate Broadleaf & Mixed Forests")
        )
        .then(
            TEMPERATE_BROADLEAF_AND_MIXED_FORESTS_INITIAL_AVG_POP_DENSITY * pl.col("location_area_in_sq_mi")
        )
        .when(
            (pl.col("BIOME_NAME") == "Temperate Conifer Forests")
        )
        .then(
            TEMPERATE_CONIFER_FORESTS_INITIAL_AVG_POP_DENSITY * pl.col("location_area_in_sq_mi")
        )
        .when(
            (pl.col("BIOME_NAME") == "Temperate Grasslands, Savannas & Shrublands")
        )
        .then(
            TEMPERATE_GRASSLANDS_SAVANNAS_AND_SHRUBLANDS_INITIAL_AVG_POP_DENSITY * pl.col("location_area_in_sq_mi")
        )
        .when(
            (pl.col("BIOME_NAME") == "Tropical & Subtropical Coniferous Forests")
        )
        .then(
            TROPICAL_AND_SUBTROPICAL_CONIFEROUS_FORESTS_INITIAL_AVG_POP_DENSITY * pl.col("location_area_in_sq_mi")
        )
        .when(
            (pl.col("BIOME_NAME") == "Tropical & Subtropical Dry Broadleaf Forests")
        )
        .then(
            TROPICAL_AND_SUBTROPICAL_DRY_BROADLEAF_FORESTS_INITIAL_AVG_POP_DENSITY * pl.col("location_area_in_sq_mi")
        )
        .when(
            (pl.col("BIOME_NAME") == "Tropical & Subtropical Grasslands, Savannas & Shrublands")
        )
        .then(
            TROPICAL_AND_SUBTROPICAL_GRASSLANDS_SAVANNAS_AND_SHRUBLANDS_INITIAL_AVG_POP_DENSITY * pl.col("location_area_in_sq_mi")
        )
        .when(
            (pl.col("BIOME_NAME") == "Tropical & Subtropical Moist Broadleaf Forests")
        )
        .then(
            TROPICAL_AND_SUBTROPICAL_MOIST_BROADLEAF_FORESTS_INITIAL_AVG_POP_DENSITY * pl.col("location_area_in_sq_mi")
        )
        .when(
            (pl.col("BIOME_NAME") == "Tundra")
        )
        .then(
            TUNDRA_INITIAL_AVG_POP_DENSITY * pl.col("location_area_in_sq_mi")
        )
        .round(0)
        .alias("pop_size")
    ])
)

ecoregions_2017_with_more_points_no_geo.head()

ECO_NAME,BIOME_NAME,REALM,ecoregion_area_in_sq_mi,location_area_in_sq_mi,pop_size
str,str,str,f64,f64,f64
"""Adelie Land tundra""","""Tundra""","""Antarctica""",68.246199,68.246199,0.0
"""Admiralty Islands lowland rain forests""","""Tropical & Subtropical Moist Broadleaf Forests""","""Australasia""",815.701401,815.701401,408.0
"""Aegean and Western Turkey sclerophyllous and mixed forests""","""Mediterranean Forests, Woodlands & Scrub""","""Palearctic""",51643.0957,17214.365233,8607.0
"""Aegean and Western Turkey sclerophyllous and mixed forests""","""Mediterranean Forests, Woodlands & Scrub""","""Palearctic""",51643.0957,17214.365233,8607.0
"""Aegean and Western Turkey sclerophyllous and mixed forests""","""Mediterranean Forests, Woodlands & Scrub""","""Palearctic""",51643.0957,17214.365233,8607.0


# Construct Complete Graph and Initialize it With Starting Attributes for Experiment

In [15]:
pre_world = nx.complete_graph(num_world_locations)

In [16]:
# Make edge weights between the world locations
# to represent the distance between those locations.
# https://stackoverflow.com/questions/17051589/parsing-through-edges-in-networkx-graph
for (v1, v2, weight) in pre_world.edges.data('weight'):
    # https://trenton3983.github.io/files/projects/2020-05-21_intro_to_network_analysis_in_python/2020-05-21_intro_to_network_analysis_in_python.html
    # https://stackoverflow.com/questions/40128692/networkx-how-to-add-weights-to-an-existing-g-edges

    pre_world[v1][v2]["weight"] = dist_mat[v1, v2]

In [19]:
ecoregions_2017_with_more_points_no_geo.columns

['ECO_NAME',
 'BIOME_NAME',
 'REALM',
 'ecoregion_area_in_sq_mi',
 'location_area_in_sq_mi',
 'pop_size']

In [None]:
# Update initial node attributes
pre_world_node_attributes = {}
for node_id in range(num_world_locations):
    pre_world_node_attributes[node_id] = {
        "pop_size": ecoregions_2017_with_more_points_no_geo[node_id, "pop_size"], 
        "carrying_capacity": round(initial_avg_carrying_capacity_per_location),
        "transportation_technology_level_in_use": INITIAL_TRANSPORTATION_TECH_LEVEL_IN_USE.rvs().item(),
        "sortino_ratio": 0.5,
        # 0 = no knowledge
        # 1 = perfect knowledge
        "knowledge_of_neighbors": np.zeros(shape=num_world_locations),
        # calculate this as a softargmax of sortino ratios 
        "proportion_desirous_to_emigrate": 0.2,
        "emigration_success_rate": 0.5,
        "energy_hills_to_neighbors": np.zeros(shape=num_world_locations)
    }
    for neighbor in pre_world.neighbors(node_id):
        # Calculate stuff we need to store in the current 
        # node in relation to its neighbors.
        relevance_of_dist = swf.weight_0_more(
            x=pre_world_node_attributes[node_id]["transportation_technology_level_in_use"],
            b=RELEVANCE_OF_DIST_BASED_ON_TRANSPORTATION_TECH_LEVEL
        )
        energy_hill_to_neighbor = relevance_of_dist * pre_world[node_id][neighbor]["weight"] 
        # Add info. to attribute dict for node_id.
        pre_world_node_attributes[node_id]["energy_hills_to_neighbors"][neighbor] = energy_hill_to_neighbor

# Set our node attributes.
nx.set_node_attributes(pre_world, pre_world_node_attributes)

# Test Run

In [None]:
# Test looping structure
# https://stackoverflow.com/questions/53508805/simple-way-for-modifying-attributes-of-single-nodes-in-networkx-2-1?rq=3
for t in range(SIMULATION_YEARS):
    for node_id in range(num_world_locations):
        carrying_cap = pre_world_node_attributes[node_id]["carrying_capacity"]
        pop_size = pre_world_node_attributes[node_id]["pop_size"]

        pre_world_node_attributes[node_id]["pop_size"] = swf.logistic_growth(
            previous_pop=pop_size,
            r=0.05,
            carrying_cap=carrying_cap
        )
        
        
# Set our node attributes.
nx.set_node_attributes(pre_world, pre_world_node_attributes)

In [None]:
pre_world_betweenness_centralities = nx.betweenness_centrality(
    G=pre_world,
    weight="weight"
)

pre_biomes_betweenness_centralities = nx.betweenness_centrality(
    G=pre_biomes
)

# Get a node with a maximal betweenness centrality.
# This node will hold our starting population.
# https://stackoverflow.com/a/280156/8423001
starting_node = max(
    pre_world_betweenness_centralities, 
    key=pre_world_betweenness_centralities.get
)

starting_node_biome_id = max(
    pre_biomes_betweenness_centralities, 
    key=pre_biomes_betweenness_centralities.get
)

starting_node_biome = BIOMES[starting_node_biome_id]

In [None]:
sorted(list(pre_biomes_betweenness_centralities.values()))

In [None]:
# https://stackoverflow.com/a/3071441/8423001
(
    stats.rankdata(
        a=list(pre_biomes_betweenness_centralities.values()),
        method="dense"
    )
    # Because the ranks start at 1 but Python is 0-indexed,
    # subtract 1.
    - 1
)

In [None]:
sum(np.array(list(pre_biomes_betweenness_centralities.values())) <= 0.01)

In [None]:
stats.rankdata(
        a=[-2, 0, 3, 3, 3],
        method="max"
    )

In [None]:
# def stochastic_func(
#     x,
#     b,
#     corr
# ):
#     rng = np.random.default_rng()
#     std_x = np.std(x)
#     if std_x == 0:
#         y = rng.choice(np.arange(b + 1))
#     else:
#         x_normalized = (x - np.mean(x))/np.std(x)
 
#         y_normalized = corr * x_normalized
#         std_ints = np.std(np.arange(b + 1))
#         mean_ints = (1 + b)/2
#         y = y_normalized * std_ints + mean_ints
#     return y

Copula Stuff

In [None]:
def gaussian_copula(*args, **kwargs):
    """Get the value of a Gaussian Copula."""
    # https://en.wikipedia.org/wiki/Copula_(probability_theory)#Gaussian_copula
    # Arguments provided via position should be 
    # real numbers in [0, 1].  
    # kwargs should contain a key=value combination
    # where the key is cov.
    #
    # The multivariate_normal.cdf returns nan when the corresponding
    # probability law is at least two dimensional and at least one of 
    # the values supplied to x is -inf.  However, we think that it is
    # reasonable for it just to return 0 instead of nan.
    x = stats.norm.ppf(q=args)
    if (x == float("-inf")).any():
        cdf = 0
    else:
        cdf = stats.multivariate_normal.cdf(
            x=stats.norm.ppf(q=args),
            mean=np.zeros(shape=len(args)),
            allow_singular=True,
            **kwargs      
        )

    return cdf

In [None]:
def bivariate_discrete_copula_pmf(C, u:int, v:int, R:int, S:int, **kwargs) -> float:
    """Get the value of the probability mass function
    at (u, v) using the copula function C.

    source: https://doi.org/10.1515/demo-2020-0022
    see: equation 7.1
    """
    if (u < 0) or (u > (R - 1)):
        raise ValueError("u must be in {0, 1, ..., R - 1}")
    if (v < 0) or (v > (S - 1)):
        raise ValueError("v must be in {0, 1, ..., S - 1}")

    pmf = C((u + 1)/R, (v + 1)/S, **kwargs) \
        - C(u/R, (v + 1)/S, **kwargs) \
        - C((u + 1)/R, v/S, **kwargs) \
        + C(u/R, v/S, **kwargs)
    
    if (pmf < (0 - sys.float_info.epsilon)) or (pmf > (1 + sys.float_info.epsilon)):
        raise RuntimeError("C appears to be an invalid copula.")
    
    return pmf

In [None]:
def make_conditional_pmf(C, R:int, S:int, **kwargs):
    """Make conditional PMF array.  
    
    For all u in {0, 1, ..., R - 1},
    determine the conditional distribution:
    P(V=v|U=u).
    Save this as a two-dimensional array
    where the (i, j) entry in the array
    represents P(V=j|U=i).

    Args:
        C: function. This is the function for a copula.

        **kwargs: additional name=value pairs that can
            be passed to C.
  
    Returns:
        numpy.ndarray.    
    """
    
    conditional_pmf_array = np.empty(shape=(R, S))
    for u in range(R):
        for v in range(S):
            # Save a preliminary value.
            conditional_pmf_array[u, v] = bivariate_discrete_copula_pmf(
                C=gaussian_copula, 
                u=u, 
                v=v, 
                R=R, 
                S=S, 
                **kwargs
            )
        # Now, after getting part of the array filled out,
        # do some rescaling to make sure we are
        # constructing a valid probability distribution.
        probs_as_ints = (conditional_pmf_array[u, :] * (2 ** (32 - 1))).astype(np.int32)
        probs_as_probs = (probs_as_ints / probs_as_ints.sum())
        conditional_pmf_array[u, :] = probs_as_probs

    return conditional_pmf_array

In [None]:
num_biomes = len(BIOMES)
corr = np.array([
    [1, 0.9],
    [0.9, 1]
])

conditional_pmf = make_conditional_pmf(
    C=gaussian_copula,
    R=NUM_WORLD_LOCATIONS,
    S=num_biomes,
    cov=corr
)

In [None]:
def get_correlated_ranks(
    conditional_pmf,
    rng
):
    """Given a bivariate conditional_pmf formatted
    as an array, return 0-index-based ranks.

    Args:
        conditional_pmf: numpy.ndarray
        rng: numpy.random._generator.Generator
    
    Returns:
        numpy.ndarray. The order of the elements
        in the 1-dimensional array is significant.
    """
    conditional_pmf_shape = conditional_pmf.shape
    num_x_ranks = conditional_pmf_shape[0]
    num_y_ranks = conditional_pmf_shape[1]

    if num_x_ranks < num_y_ranks:
        raise NotImplementedError(
"num_y_ranks must be <= num_x_ranks\n \
Please make sure that conditional_pmf has a \
number of rows greater than or equal to its \
number of columns.  Also, make sure that \
each row is a valid probability distribution."
        )  
      
    y_ranks = np.empty(shape=num_x_ranks, dtype=int)
    
    # Before loop
    is_surjective = False

    # Repeatedly generate possible realizations of 
    # ranks for the Y random variable
    # until surjectivity is achieved.
    while is_surjective is False:
        for x_rank in range(num_x_ranks):
            # Choose y_ranks[x_rank] based on 
            # the conditional PMF for 
            # the current value of x_rank.
            y_ranks[x_rank] = rng.choice(
                # Choose from all of the possible
                # Y ranks.
                a=num_y_ranks, 
                # Weight the choice according to
                # the conditional_pmf.
                p=conditional_pmf[x_rank, :], 
                size=1,
                replace=True,
                shuffle=False
            ).item()

        # Test for surjectivity after building out y_ranks
        is_surjective = bool(
            np.isin(
                element=np.arange(num_y_ranks), 
                test_elements=y_ranks
            ).all()
        )

    return y_ranks

In [None]:
biome_indices_for_world_locations = get_correlated_ranks(
    conditional_pmf=conditional_pmf,
    rng=rng
)

biomes_for_world_locations = [BIOMES[b] for b in biome_indices_for_world_locations]

In [None]:
# https://realpython.com/iterate-through-dictionary-python/#iterating-through-dictionaries-comprehension-examples
{n: {"biome": biomes_for_world_locations[n]} for n in range(NUM_WORLD_LOCATIONS)}

In [None]:
# We plan on assigning biomes to the nodes in our world.
# But, we must consider that some biomes are more likely
# to be connected.  Thus, we assign the biomes randomly
# while taking account of the betweenness centralities.
# With probability 0.5, we assign neighbors the same
# biome, while with probability 0.5, we assign neighbors
# a new biome of similar betweenness centrality.
1.0 / NUM_WORLD_LOCATIONS
sorted(pre_biomes_betweenness_centralities.values())
# Given a value of the ECDF of pre_world_betweenness_centralities
# generate an appropriately positioned random rank
# within pre_biomes_betweenness_centralities.
# First, rank the pre_world_betweenness_centralities.
sorted(pre_world_betweenness_centralities.values())
# Second, find the find the value of the ECDF for each rank.

In [None]:
np.quantile(
    a=list(pre_world_betweenness_centralities.values()),
    q=0.5
)

In [None]:
for id in pre_biomes.neighbors(starting_node_biome_id):
    print(id)

In [None]:
# Loop through nodes and set initial parameters.
for node in nx.nodes(G=pre_world):
    nx.set_node_attributes(
        G=pre_world, 
        # https://realpython.com/iterate-through-dictionary-python/#iterating-through-dictionaries-comprehension-examples
        values={n: {"biome": biomes_for_world_locations[n]} for n in range(NUM_WORLD_LOCATIONS)}
        # {
        #     node: {"carrying_capacity": 1000000},
        # }
    )

In [None]:
# https://github.com/WestHealth/pyvis/issues/48
world_layout = nx.spring_layout(G=pre_world, iterations=1, threshold=0.01)

In [None]:
world.from_nx(nx_graph=pre_world, show_edge_weights=True)
for node in world.nodes:
    node["x"] = world_layout[node["id"]][0] * 1000
    node["y"] = world_layout[node["id"]][1] * 1000
world.toggle_physics(False)
world.show("fast_world.html")

In [None]:
world.from_nx(nx_graph=pre_world, show_edge_weights=True)
world.show("world.html")