# Human Population Size on an Earth-like Planet--a Computer Experiment

In [1]:
from pathlib import Path
import sys

import networkx as nx
from pyvis.network import Network
import numpy as np
from scipy import stats
# from pert import PERT
import plotly.express as px

In [None]:
rng = np.random.default_rng()

The response variable is total population size after SIMULATION_YEARS.

In [2]:
# https://en.wikipedia.org/wiki/Global_200
# The WWF has identified 142 terrestrial ecoregions.
# Change NUM_WORLD_LOCATIONS to 37067 to get each location 
# about the size of Rhode Island (based on the total
# land area of the earth).
# However, don't try to visualize this many nodes 
# because it will take forever and not look very good.
NUM_WORLD_LOCATIONS = 100
SIMULATION_YEARS = 10000
INITIAL_POPULATION_PROPORTION = 0.0001
# TOTAL_LAND_AREA is in square miles. 
TOTAL_LAND_AREA = 57268900 
# The average maximum sustainable population density 
# (per square mile) on the planet for a hunter-gatherer society.
# https://en.wikipedia.org/wiki/Hunter-gatherer#:~:text=One%20group%2C%20the%20Chumash%2C%20had,21.6%20persons%20per%20square%20mile.
INITIAL_MAX_POP_DENSITY = 20
INITIAL_AVG_CARRYING_CAPACITY_PER_LOCATION = (INITIAL_MAX_POP_DENSITY * TOTAL_LAND_AREA) / NUM_WORLD_LOCATIONS

print(
    f"INITIAL_AVG_CARRYING_CAPACITY_PER_LOCATION: {INITIAL_AVG_CARRYING_CAPACITY_PER_LOCATION}"
)

# https://en.wikipedia.org/wiki/Global_200#Global_200:_Terrestrial
BIOMES = [
    "tropical and subtropical moist broadleaf forests",
    "tropical and subtropical dry broadleaf forests", 
    "tropical and subtropical coniferous forests",
    "temperate broadleaf and mixed forests",
    "temperate coniferous forests",
    "boreal forests/taiga",
    "tropical and subtropical grasslands, savannas, and shrublands",
    "temperate grasslands, savannas, and shrublands",
    "flooded grasslands and savannas",
    "montane grasslands and shrublands",
    "tundra",
    "Mediterranean forests, woodlands, and scrub or sclerophyll forests",
    "deserts and xeric shrublands",
    "mangrove"
]

INITIAL_AVG_CARRYING_CAPACITY_PER_LOCATION: 11453780.0


In [3]:
# Associate each biome with a color.
my_biome_colors = px.colors.qualitative.Prism
my_biome_colors.extend(px.colors.qualitative.Pastel2)

In [4]:
# Before loop
biomes_with_colors = {}
c = 0
for biome in BIOMES:
    # Add key-value pair
    biomes_with_colors[biome] = my_biome_colors[c]
    # Prep for next iteration
    c += 1

biomes_with_colors

{'tropical and subtropical moist broadleaf forests': 'rgb(95, 70, 144)',
 'tropical and subtropical dry broadleaf forests': 'rgb(29, 105, 150)',
 'tropical and subtropical coniferous forests': 'rgb(56, 166, 165)',
 'temperate broadleaf and mixed forests': 'rgb(15, 133, 84)',
 'temperate coniferous forests': 'rgb(115, 175, 72)',
 'boreal forests/taiga': 'rgb(237, 173, 8)',
 'tropical and subtropical grasslands, savannas, and shrublands': 'rgb(225, 124, 5)',
 'temperate grasslands, savannas, and shrublands': 'rgb(204, 80, 62)',
 'flooded grasslands and savannas': 'rgb(148, 52, 110)',
 'montane grasslands and shrublands': 'rgb(111, 64, 112)',
 'tundra': 'rgb(102, 102, 102)',
 'Mediterranean forests, woodlands, and scrub or sclerophyll forests': 'rgb(179,226,205)',
 'deserts and xeric shrublands': 'rgb(253,205,172)',
 'mangrove': 'rgb(203,213,232)'}

In [5]:
pre_biomes_node_attributes = {}
for node_id, biome in enumerate(BIOMES):
    pre_biomes_node_attributes[node_id] = {"biome": biome, "color": biomes_with_colors[biome]}
pre_biomes_node_attributes

{0: {'biome': 'tropical and subtropical moist broadleaf forests',
  'color': 'rgb(95, 70, 144)'},
 1: {'biome': 'tropical and subtropical dry broadleaf forests',
  'color': 'rgb(29, 105, 150)'},
 2: {'biome': 'tropical and subtropical coniferous forests',
  'color': 'rgb(56, 166, 165)'},
 3: {'biome': 'temperate broadleaf and mixed forests',
  'color': 'rgb(15, 133, 84)'},
 4: {'biome': 'temperate coniferous forests', 'color': 'rgb(115, 175, 72)'},
 5: {'biome': 'boreal forests/taiga', 'color': 'rgb(237, 173, 8)'},
 6: {'biome': 'tropical and subtropical grasslands, savannas, and shrublands',
  'color': 'rgb(225, 124, 5)'},
 7: {'biome': 'temperate grasslands, savannas, and shrublands',
  'color': 'rgb(204, 80, 62)'},
 8: {'biome': 'flooded grasslands and savannas', 'color': 'rgb(148, 52, 110)'},
 9: {'biome': 'montane grasslands and shrublands',
  'color': 'rgb(111, 64, 112)'},
 10: {'biome': 'tundra', 'color': 'rgb(102, 102, 102)'},
 11: {'biome': 'Mediterranean forests, woodlands, a

In [None]:
# We actually don't care about making the PyVis graph
# for just the biomes, so delete this part later.
biomes = Network(
    directed=False,
    neighborhood_highlight=False, 
    select_menu=True, 
    filter_menu=True,
    # https://pyvis.readthedocs.io/en/latest/tutorial.html
    bgcolor="#222222", 
    font_color="white",
    cdn_resources="in_line"
)

pre_biomes = nx.Graph()

# https://stackoverflow.com/a/47555011/8423001
pre_biomes.add_nodes_from(
    # pre_biomes_node_attributes
    [(node, attribute_dict) 
        for (node, attribute_dict) 
        in pre_biomes_node_attributes.items()
    ] 
)

# For all vertices a and b in pre_biomes, a and b
# are adjacent if a and b are geographically adjacent
# when looking at a map of biomes on Earth. 
pre_biomes.add_edges_from(
    [
        (0, 1),
        (0, 2),
        (0, 3),
        (0, 4),
        (0, 6),
        (0, 9),
        (0, 12),
        (0, 13),
        (1, 2),
        (1, 6),
        (1, 8),
        (1, 9),
        (1, 12),
        (1, 13),
        (2, 3),
        (2, 4),
        (2, 6),
        (2, 8),
        (2, 9),
        (2, 12),
        (2, 13),
        (3, 4),
        (3, 5),
        (3, 6),
        (3, 7),
        (3, 8),
        (3, 9),
        (3, 11),
        (3, 12),
        (3, 13),
        (4, 5),
        (4, 6),
        (4, 7),
        (4, 8),
        (4, 9),
        (4, 10),
        (4, 11),
        (4, 12),
        (4, 13),
        (5, 7),
        (5, 8),
        (5, 9),
        (5, 10),
        (6, 7),
        (6, 8),
        (6, 9),
        (6, 12),
        (6, 13),
        (7, 8),
        (7, 9),
        (7, 11),
        (7, 12),
        (8, 9),
        (8, 11),
        (8, 12),
        (8, 13),
        (9, 11),
        (9, 12),
        (9, 13),
        (11, 12),
        (11, 13),
        (12, 13)
    ]
)

In [None]:
# https://github.com/WestHealth/pyvis/issues/45
# PyVis does not inherit all properties from Networkx object!
biomes.from_nx(nx_graph=pre_biomes)

In [None]:
biomes.show("biomes.html")

In [6]:
world = Network(
    directed=True,
    neighborhood_highlight=True, 
    select_menu=False, 
    filter_menu=False,
    cdn_resources="in_line"
)

pre_world = nx.connected_watts_strogatz_graph(
    n=NUM_WORLD_LOCATIONS,
    k=5,
    p=0.5
)

In [None]:
# Make edge weights between the world locations
# to represent the ease of travel between those locations.
for (v1, v2, weight) in pre_world.edges.data('weight'):
    # https://trenton3983.github.io/files/projects/2020-05-21_intro_to_network_analysis_in_python/2020-05-21_intro_to_network_analysis_in_python.html
    # https://stackoverflow.com/questions/40128692/networkx-how-to-add-weights-to-an-existing-g-edges

    # Here, the weights represent the ease of travelling between nodes.
    # A high weight indicates that travel is easy.
    pre_world[v1][v2]["weight"] = stats.expon.rvs(scale=1)

# Make the graph directed to indicate
# allowable population movements.
pre_world = pre_world.to_directed()

In [None]:
# Skip the last row because we only care about
# the upper triangle exclusive of the main diagonal
# of the adjacency matrix.
for v1 in range(NUM_WORLD_LOCATIONS - 1):
    for v2 in range(v1 + 1, NUM_WORLD_LOCATIONS):
        current_edge_data = pre_world.get_edge_data(v1, v2)
        if current_edge_data is None:
            # There is no need to update current_weight.
            continue

        # Extract weight attribute
        current_weight = current_edge_data["weight"]
        if current_weight < 1:
            # Make the ease of travel different
            # for one of the edges connecting the same
            # pair of nodes to simulate ocean currents.
            pre_world[v1][v2]["weight"] = stats.expon.rvs(scale=0.2)


In [None]:
pre_world_betweenness_centralities = nx.betweenness_centrality(
    G=pre_world,
    weight="weight"
)

pre_biomes_betweenness_centralities = nx.betweenness_centrality(
    G=pre_biomes
)

# Get a node with a maximal betweenness centrality.
# This node will hold our starting population.
# https://stackoverflow.com/a/280156/8423001
starting_node = max(
    pre_world_betweenness_centralities, 
    key=pre_world_betweenness_centralities.get
)

starting_node_biome_id = max(
    pre_biomes_betweenness_centralities, 
    key=pre_biomes_betweenness_centralities.get
)

starting_node_biome = BIOMES[starting_node_biome_id]

In [None]:
sorted(list(pre_biomes_betweenness_centralities.values()))

In [None]:
# https://stackoverflow.com/a/3071441/8423001
(
    stats.rankdata(
        a=list(pre_biomes_betweenness_centralities.values()),
        method="dense"
    )
    # Because the ranks start at 1 but Python is 0-indexed,
    # subtract 1.
    - 1
)

In [None]:
sum(np.array(list(pre_biomes_betweenness_centralities.values())) <= 0.01)

In [None]:
stats.rankdata(
        a=[-2, 0, 3, 3, 3],
        method="max"
    )

In [None]:
# def stochastic_func(
#     x,
#     b,
#     corr
# ):
#     rng = np.random.default_rng()
#     std_x = np.std(x)
#     if std_x == 0:
#         y = rng.choice(np.arange(b + 1))
#     else:
#         x_normalized = (x - np.mean(x))/np.std(x)
 
#         y_normalized = corr * x_normalized
#         std_ints = np.std(np.arange(b + 1))
#         mean_ints = (1 + b)/2
#         y = y_normalized * std_ints + mean_ints
#     return y

Copula Stuff

In [None]:
def gaussian_copula(*args, **kwargs):
    """Get the value of a Gaussian Copula."""
    # https://en.wikipedia.org/wiki/Copula_(probability_theory)#Gaussian_copula
    # Arguments provided via position should be 
    # real numbers in [0, 1].  
    # kwargs should contain a key=value combination
    # where the key is cov.
    #
    # The multivariate_normal.cdf returns nan when the corresponding
    # probability law is at least two dimensional and at least one of 
    # the values supplied to x is -inf.  However, we think that it is
    # reasonable for it just to return 0 instead of nan.
    x = stats.norm.ppf(q=args)
    if (x == float("-inf")).any():
        cdf = 0
    else:
        cdf = stats.multivariate_normal.cdf(
            x=stats.norm.ppf(q=args),
            mean=np.zeros(shape=len(args)),
            allow_singular=True,
            **kwargs      
        )

    return cdf

In [None]:
def bivariate_discrete_copula_pmf(C, u:int, v:int, R:int, S:int, **kwargs) -> float:
    """Get the value of the probability mass function
    at (u, v) using the copula function C.

    source: https://doi.org/10.1515/demo-2020-0022
    see: equation 7.1
    """
    if (u < 0) or (u > (R - 1)):
        raise ValueError("u must be in {0, 1, ..., R - 1}")
    if (v < 0) or (v > (S - 1)):
        raise ValueError("v must be in {0, 1, ..., S - 1}")

    pmf = C((u + 1)/R, (v + 1)/S, **kwargs) \
        - C(u/R, (v + 1)/S, **kwargs) \
        - C((u + 1)/R, v/S, **kwargs) \
        + C(u/R, v/S, **kwargs)
    
    if (pmf < (0 - sys.float_info.epsilon)) or (pmf > (1 + sys.float_info.epsilon)):
        raise RuntimeError("C appears to be an invalid copula.")
    
    return pmf

In [None]:
def make_conditional_pmf(C, R:int, S:int, **kwargs):
    """Make conditional PMF array.  
    
    For all u in {0, 1, ..., R - 1},
    determine the conditional distribution:
    P(V=v|U=u).
    Save this as a two-dimensional array
    where the (i, j) entry in the array
    represents P(V=j|U=i).

    Args:
        C: function. This is the function for a copula.

        **kwargs: additional name=value pairs that can
            be passed to C.
  
    Returns:
        numpy.ndarray.    
    """
    
    conditional_pmf_array = np.empty(shape=(R, S))
    for u in range(R):
        for v in range(S):
            # Save a preliminary value.
            conditional_pmf_array[u, v] = bivariate_discrete_copula_pmf(
                C=gaussian_copula, 
                u=u, 
                v=v, 
                R=R, 
                S=S, 
                **kwargs
            )
        # Now, after getting part of the array filled out,
        # do some rescaling to make sure we are
        # constructing a valid probability distribution.
        probs_as_ints = (conditional_pmf_array[u, :] * (2 ** (32 - 1))).astype(np.int32)
        probs_as_probs = (probs_as_ints / probs_as_ints.sum())
        conditional_pmf_array[u, :] = probs_as_probs

    return conditional_pmf_array

In [None]:
num_biomes = len(BIOMES)
corr = np.array([
    [1, 0.9],
    [0.9, 1]
])

conditional_pmf = make_conditional_pmf(
    C=gaussian_copula,
    R=NUM_WORLD_LOCATIONS,
    S=num_biomes,
    cov=corr
)

In [None]:
def get_correlated_ranks(
    conditional_pmf,
    rng
):
    """Given a bivariate conditional_pmf formatted
    as an array, return 0-index-based ranks.

    Args:
        conditional_pmf: numpy.ndarray
        rng: numpy.random._generator.Generator
    
    Returns:
        numpy.ndarray. The order of the elements
        in the 1-dimensional array is significant.
    """
    conditional_pmf_shape = conditional_pmf.shape
    num_x_ranks = conditional_pmf_shape[0]
    num_y_ranks = conditional_pmf_shape[1]

    if num_x_ranks < num_y_ranks:
        raise NotImplementedError(
"num_y_ranks must be <= num_x_ranks\n \
Please make sure that conditional_pmf has a \
number of rows greater than or equal to its \
number of columns.  Also, make sure that \
each row is a valid probability distribution."
        )  
      
    y_ranks = np.empty(shape=num_x_ranks, dtype=int)
    
    # Before loop
    is_surjective = False

    # Repeatedly generate possible realizations of 
    # ranks for the Y random variable
    # until surjectivity is achieved.
    while is_surjective is False:
        for x_rank in range(num_x_ranks):
            # Choose y_ranks[x_rank] based on 
            # the conditional PMF for 
            # the current value of x_rank.
            y_ranks[x_rank] = rng.choice(
                # Choose from all of the possible
                # Y ranks.
                a=num_y_ranks, 
                # Weight the choice according to
                # the conditional_pmf.
                p=conditional_pmf[x_rank, :], 
                size=1,
                replace=True,
                shuffle=False
            ).item()

        # Test for surjectivity after building out y_ranks
        is_surjective = bool(
            np.isin(
                element=np.arange(num_y_ranks), 
                test_elements=y_ranks
            ).all()
        )

    return y_ranks

In [None]:
biome_indices_for_world_locations = get_correlated_ranks(
    conditional_pmf=conditional_pmf,
    rng=rng
)

biomes_for_world_locations = [BIOMES[b] for b in biome_indices_for_world_locations]

In [None]:
# https://realpython.com/iterate-through-dictionary-python/#iterating-through-dictionaries-comprehension-examples
{n: {"biome": biomes_for_world_locations[n]} for n in range(NUM_WORLD_LOCATIONS)}

In [None]:
# We plan on assigning biomes to the nodes in our world.
# But, we must consider that some biomes are more likely
# to be connected.  Thus, we assign the biomes randomly
# while taking account of the betweenness centralities.
# With probability 0.5, we assign neighbors the same
# biome, while with probability 0.5, we assign neighbors
# a new biome of similar betweenness centrality.
1.0 / NUM_WORLD_LOCATIONS
sorted(pre_biomes_betweenness_centralities.values())
# Given a value of the ECDF of pre_world_betweenness_centralities
# generate an appropriately positioned random rank
# within pre_biomes_betweenness_centralities.
# First, rank the pre_world_betweenness_centralities.
sorted(pre_world_betweenness_centralities.values())
# Second, find the find the value of the ECDF for each rank.

In [None]:
np.quantile(
    a=list(pre_world_betweenness_centralities.values()),
    q=0.5
)

In [None]:
for id in pre_biomes.neighbors(starting_node_biome_id):
    print(id)

In [None]:
# Loop through nodes and set initial parameters.
for node in nx.nodes(G=pre_world):
    nx.set_node_attributes(
        G=pre_world, 
        # https://realpython.com/iterate-through-dictionary-python/#iterating-through-dictionaries-comprehension-examples
        values={n: {"biome": biomes_for_world_locations[n]} for n in range(NUM_WORLD_LOCATIONS)}
        # {
        #     node: {"carrying_capacity": 1000000},
        # }
    )

In [None]:
# https://github.com/WestHealth/pyvis/issues/48
world_layout = nx.spring_layout(G=pre_world, iterations=1, threshold=0.01)

In [None]:
world.from_nx(nx_graph=pre_world, show_edge_weights=True)
for node in world.nodes:
    node["x"] = world_layout[node["id"]][0] * 1000
    node["y"] = world_layout[node["id"]][1] * 1000
world.toggle_physics(False)
world.show("fast_world.html")

In [None]:
world.from_nx(nx_graph=pre_world, show_edge_weights=True)
world.show("world.html")