# 1. Calculation of indicator record value: 

This notebook contains a QA of the queries implemented for the impact calculation in the main LG application. The main purpose is to update these calculations according to the improvements on the methodology: https://docs.google.com/document/d/1IDuYWOllQ2fTf2ZeBUmOtEZpqht2rqMot3G9W3CjEjE/edit#

As part of new strategy the indicator record entity will include:

    - Indicator record value: value of impact in my geometry
    - Indicator recod scaler: equivalent to total commodity production in my location
    - Pointer: h3 table and column name to distribute the impact

In this notebook we will also be covering two approaches to compute the indicator record value:

    a. Get the total indicator record value in my geometry by summing the impact in all the hexagons within my geometry.
    b. Get the indicator record value in my geometry by computing the average risk in all the hexagons within my geometry and multiply that value by the total volume.

In [1]:
# import libraries
from psycopg2.pool import ThreadedConnectionPool

import pandas as pd
from tqdm import tqdm
import json

In [2]:
#set env
## env file for gcs upload
env_path = ".env"
with open(env_path) as f:
    env = {}
    for line in f:
        env_key, _val = line.split("=", 1)
        env_value = _val.split("\n")[0]
        env[env_key] = env_value
        
#list(env.keys())

# set conexion to local ddbb
postgres_thread_pool = ThreadedConnectionPool(
    1, 
    50,
    host=env['API_POSTGRES_HOST'],
    port=env['API_POSTGRES_PORT'],
    user=env['API_POSTGRES_USERNAME'],
    password=env['API_POSTGRES_PASSWORD']
)

#get list of sourcing records to iterate:
conn = postgres_thread_pool.getconn()
cursor = conn.cursor()

##  Compute indicator record value as sum of impacts in area:

As summary the formulas to compute each of the landgriffon impact indicators are shown below:

    Probability purchase area (ppa) = (ha / total ha) * Purchase Volume (tonnes)
    
### Water impacts 

        water imapct (m3/yr) = (BWF * 0.001 / Prod all crops) * ppa
    
    equal to:
    
        water imapct (m3/yr) = (BWF * 0.001 / Prod all crops) * (ha / total ha) * Volume
    

### Land impact:

        Land impact (ha/yr)  = (Harvested area (ha) / Production) *  ppa
        
     equal to:
   
        Land impact (ha/yr)  = (Harvested area (ha) / Production) *  (Ha / total ha) * Volume
    
### Deforestation:

        Deforestation impact (ha/yr) = land impact (ha) * deforestation mask (unitless)
    
    equal to:
    
        Deforestation impact (ha/yr) = (Harvested area (ha) / Production) *  ppa * deforestation mask (unitless)
        Deforestation impact (ha/yr) = (Harvested area (ha) / Production) *  (Ha / total ha) * Volume * deforestation mask (unitless)
        
### Carbon:

        Carbon impact (tCO2e/yr) = net forest carbo emisions (t CO2e) * Deforestation impact (ha)
    
    equal to: 
    
        Carbon impact (tCO2e/yr) = net forest carbo emisions (t CO2e) * (Harvested area (ha) / Production) *  ppa * deforestation mask (unitless)
        Carbon impact (tCO2e/yr) = net forest carbo emisions (t CO2e) * (Harvested area (ha) / Production) *  (Ha / total ha) * Volume * deforestation mask (unitless)
     
    
### Biodiversity:

        Biodiversity  impact  (PDF/yr)  = PSL(PDF m⁻²) * 10⁴(m² ha⁻¹) * Deforestation impact (ha)
    
    equal to: 
    
        Biodiversity  impact  (PDF/yr)  = PSL(PDF m⁻²) * 10⁴(m² ha⁻¹) * (Harvested area (ha) / Production) *  ppa * deforestation mask (unitless)
        Biodiversity  impact  (PDF/yr)  = PSL(PDF m⁻²) * 10⁴(m² ha⁻¹) * (Harvested area (ha) / Production) *   (Ha / total ha) * Volume * deforestation mask (unitless)
         

In [3]:
def psql(query):
    try:
        cursor.execute(query)
        return cursor.fetchall()
    except Exception as e:
        conn.rollback()
        print(e)


In [125]:
SQL_GET_H3_UNCOMPACT_GEO_REGION = """
CREATE OR REPLACE FUNCTION get_h3_uncompact_geo_region(geo_region_id uuid, h3_resolution int)
RETURNS TABLE (h3index h3index) AS 
$$
    SELECT h3_uncompact(geo_region."h3Compact"::h3index[], h3_resolution) h3index
    FROM geo_region WHERE geo_region.id = geo_region_id
$$ 
LANGUAGE SQL;
"""

SQL_SUM_H3_GRID_OVER_GEO_REGION = SQL_GET_H3_UNCOMPACT_GEO_REGION+"""
CREATE OR REPLACE FUNCTION sum_h3_grid_over_georegion(
    geo_region_id uuid, 
    h3_resolution int,
    h3_table_name varchar, 
    h3_column_name varchar
)
RETURNS float AS
$$
    DECLARE
        sum float;
    BEGIN
        EXECUTE format(
            'SELECT sum(h3grid.%I)
                FROM
                    get_h3_uncompact_geo_region($1, $2) geo_region
                    INNER JOIN %I h3grid ON h3grid.h3index = geo_region.h3index;
            ', h3_column_name, h3_table_name)
            USING geo_region_id, h3_resolution
            INTO sum;
        RETURN sum;
    END;
$$
LANGUAGE plpgsql;
"""

SQL_SUMPROD_H3_GRIDS_OVER_GEOREGION = SQL_GET_H3_UNCOMPACT_GEO_REGION+"""
CREATE OR REPLACE FUNCTION sumprod_h3_grids_over_georegion(
    geo_region_id uuid,
    h3_resolution int,
    h3_table_name_1 varchar,
    h3_column_name_1 varchar,
    h3_table_name_2 varchar,
    h3_column_name_2 varchar
)
RETURNS float AS
$$
    DECLARE
        sumprod float;
    BEGIN
        EXECUTE format(
            'SELECT sum(h3grid_1.%I * h3grid_2.%I)
                FROM
                    get_h3_uncompact_geo_region($1, $2) geo_region
                    INNER JOIN %I h3grid_1 ON h3grid_1.h3index = geo_region.h3index
                    INNER JOIN %I h3grid_2 ON h3grid_2.h3index = geo_region.h3index;
            ', h3_column_name_1, h3_column_name_2, h3_table_name_1, h3_table_name_2)
            USING geo_region_id, h3_resolution
            INTO sumprod;
        RETURN sumprod;
    END;
$$
LANGUAGE plpgsql;
"""

SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL = """
CREATE OR REPLACE FUNCTION get_h3_table_column_for_material(material_id uuid, h3_data_type material_to_h3_type_enum)
RETURNS TABLE (h3_table_name varchar, h3_column_name varchar, h3_resolution int) AS
$$
    SELECT h3_data."h3tableName", h3_data."h3columnName", h3_data."h3resolution"
    FROM h3_data
        INNER JOIN material_to_h3 ON material_to_h3."h3DataId" = h3_data.id
    WHERE material_to_h3."materialId" = material_id 
        AND material_to_h3.type = h3_data_type
    LIMIT 1;
$$
LANGUAGE SQL;
"""

SQL_SUM_MATERIAL_OVER_GEO_REGION = SQL_SUM_H3_GRID_OVER_GEO_REGION+SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL+"""
CREATE OR REPLACE FUNCTION sum_material_over_georegion(
    geo_region_id uuid, 
    material_id uuid,
    h3_data_type material_to_h3_type_enum
)
RETURNS float AS
$$
    DECLARE
        h3_table_name varchar;
        h3_column_name varchar;
        h3_resolution integer;
        sum float;

    BEGIN
        -- Get h3data table name and column name for given material
        SELECT * INTO h3_table_name, h3_column_name, h3_resolution
        FROM get_h3_table_column_for_material(material_id, h3_data_type);

        -- Sum table column over region
        SELECT sum_h3_grid_over_georegion(geo_region_id, h3_resolution, h3_table_name, h3_column_name) 
        INTO sum;
        RETURN sum;
    END;
$$
LANGUAGE plpgsql;
"""

SQL_SUM_WEIGHTED_DEFORESTATION_OVER_GEO_REGION = """
CREATE OR REPLACE FUNCTION sum_weighted_deforestation_over_georegion(
    geo_region_id uuid, 
    material_id uuid,
    h3_data_type material_to_h3_type_enum
)
RETURNS float AS
$$
    DECLARE
        material_h3_table_name varchar;
        material_h3_column_name varchar;
        h3_resolution integer;
        deforestation_h3_table_name varchar := 'h3_grid_deforestation_global';
        deforestation_h3_column_name varchar := 'hansenLoss2019';
        sum float;

    BEGIN
        -- Get h3data table name and column name for given material
        SELECT * INTO material_h3_table_name, material_h3_column_name, h3_resolution
        FROM get_h3_table_column_for_material(material_id, h3_data_type);

        -- Sum table column over region
        EXECUTE format(
            'SELECT sum(h3grid_mat.%I * h3grid_def.%I)
                FROM
                    get_h3_uncompact_geo_region($1, $2) geo_region
                    INNER JOIN %I h3grid_mat ON h3grid_mat.h3index = geo_region.h3index
                    INNER JOIN %I h3grid_def ON h3grid_def.h3index = geo_region.h3index;
            ', material_h3_column_name, deforestation_h3_column_name, material_h3_table_name, deforestation_h3_table_name)
            USING geo_region_id, h3_resolution
            INTO sum;
        RETURN sum;
    END;
$$
LANGUAGE plpgsql;
"""

SQL_SUM_WEIGHTED_BIODIVERSITY_OVER_GEO_REGION = """
CREATE OR REPLACE FUNCTION sum_weighted_biodiversity_over_georegion(
    geo_region_id uuid, 
    material_id uuid,
    h3_data_type material_to_h3_type_enum
)
RETURNS float AS
$$
    DECLARE
        material_h3_table_name varchar;
        material_h3_column_name varchar;
        h3_resolution integer;
        deforestation_h3_table_name varchar := 'h3_grid_deforestation_global';
        deforestation_h3_column_name varchar := 'hansenLoss2019';
        bio_h3_table_name varchar := 'h3_grid_bio_global';
        bio_h3_column_name varchar := 'lciaPslRPermanentCrops';
        sum float;

    BEGIN
        -- Get h3data table name and column name for given material --
        SELECT * INTO material_h3_table_name, material_h3_column_name, h3_resolution
        FROM get_h3_table_column_for_material(material_id, h3_data_type);

        -- Sum deforestation times biodiversity where material is produced --
        EXECUTE format(
                'SELECT sum(h3grid_mat.%I * h3grid_def.%I * h3grid_bio.%I)
                    FROM get_h3_uncompact_geo_region($1, $2) geo_region
                        INNER JOIN %I h3grid_mat ON h3grid_mat.h3index = geo_region.h3index
                        INNER JOIN %I h3grid_def ON h3grid_def.h3index = geo_region.h3index
                        INNER JOIN %I h3grid_bio ON h3grid_bio.h3index = geo_region.h3index;', 
                material_h3_column_name, 
                deforestation_h3_column_name, 
                bio_h3_column_name,
                material_h3_table_name,
                deforestation_h3_table_name,
                bio_h3_table_name
            )
            USING geo_region_id, h3_resolution
            INTO sum;
        RETURN sum;
    END;
$$
LANGUAGE plpgsql;
"""


In [126]:

sourcing_locations = pd.read_sql_query(
    SQL_SUM_MATERIAL_OVER_GEO_REGION \
    + SQL_SUM_WEIGHTED_DEFORESTATION_OVER_GEO_REGION \
    + SQL_SUM_WEIGHTED_BIODIVERSITY_OVER_GEO_REGION \
    + """
    SELECT
        id,
        sum_material_over_georegion("geoRegionId", "materialId", 'producer') as production,
        sum_material_over_georegion("geoRegionId", "materialId", 'harvest') as harvested_area,
        sum_weighted_deforestation_over_georegion("geoRegionId", "materialId", 'harvest') as raw_deforestation,
        sum_weighted_biodiversity_over_georegion("geoRegionId", "materialId", 'harvest') as raw_biodiversity
    FROM
        sourcing_location
    """, conn)

sourcing_locations

Unnamed: 0,id,production,harvested_area,raw_deforestation,raw_biodiversity
0,c476c48c-a26c-485a-8348-a1ee3006b50f,31973.94,31494.0,29655.25,413836.9
1,ac9c7d06-4a8d-4d69-964b-ab74c61db274,13.30938,4144.1,4144.1,1894.969
2,66821c49-6f38-423a-9929-8e826bd94f6f,525978.9,761829.9,586438.5,6418257.0
3,c5ed9e7c-1897-4331-a6ce-c6989b2dcb37,98064.74,18729.13,18617.7,174455.3
4,2a0174d4-fa70-41b4-9c6c-4bfd0ca6b917,79511.8,26490.87,14916.8,79090.62
5,93912807-a517-4a2d-855b-28521479b905,37175010.0,25243840.0,1008164.0,3784941.0
6,13e41077-eb35-4114-80b6-319f01789297,3775885.0,5318659.0,5066399.0,29749670.0
7,5494d53c-cd0c-4eb4-bcee-14a93017647b,25177.53,19161.79,19062.7,84118.93
8,01ca2a2c-8ba9-486a-9c7f-713d97e67395,1305452.0,867657.6,719653.3,11106950.0
9,01b00a66-820c-4002-bcfa-45e57315b523,171.0592,1168384.0,954642.8,9483591.0


In [127]:
sourcing_records = pd.read_sql_query(
    SQL_SUM_MATERIAL_OVER_GEO_REGION \
    + SQL_SUM_WEIGHTED_DEFORESTATION_OVER_GEO_REGION \
    + SQL_SUM_WEIGHTED_BIODIVERSITY_OVER_GEO_REGION \
    + """
    SELECT
        sr.id,
        sr.tonnage,
        sr.year,
        sl.id,
        sl.production,
        sl.harvested_area,
        sl.raw_deforestation,
        sl.raw_biodiversity
    FROM
        sourcing_records sr
        INNER JOIN
            (
                SELECT
                    id,
                    sum_material_over_georegion("geoRegionId", "materialId", 'producer') as production,
                    sum_material_over_georegion("geoRegionId", "materialId", 'harvest') as harvested_area,
                    sum_weighted_deforestation_over_georegion("geoRegionId", "materialId", 'harvest') as raw_deforestation,
                    sum_weighted_biodiversity_over_georegion("geoRegionId", "materialId", 'harvest') as raw_biodiversity
                FROM
                    sourcing_location
            ) as sl
            on sr."sourcingLocationId" = sl.id
""", conn)

sourcing_records['land_per_ton'] = sourcing_records['harvested_area'] / sourcing_records['production']

sourcing_records['deforestation_per_ha_landuse'] = sourcing_records['raw_deforestation'] / sourcing_records['harvested_area']
sourcing_records['bio_per_ha_landuse'] = sourcing_records['raw_biodiversity'] / sourcing_records['harvested_area']
sourcing_records['land_use'] = sourcing_records['land_per_ton'] * sourcing_records['tonnage']

sourcing_records['deforestation'] = sourcing_records['deforestation_per_ha_landuse'] * sourcing_records['land_use']
sourcing_records['biodiversity_loss'] = sourcing_records['bio_per_ha_landuse'] * sourcing_records['land_use']

# Farm impact scaler = production
# Land use change impact scaler = harvested_area

sourcing_records.to_csv('test_impact_calc.csv')

sourcing_records


Unnamed: 0,id,tonnage,year,id.1,production,harvested_area,raw_deforestation,raw_biodiversity,land_per_ton,deforestation_per_ha_landuse,bio_per_ha_landuse,land_use,deforestation,biodiversity_loss
0,a697bc74-e4c4-40b2-8b1f-68b7ae57d57a,650.0,2010,c476c48c-a26c-485a-8348-a1ee3006b50f,31973.941406,31494.000000,29655.245346,413836.895791,0.984990,0.941616,13.140182,640.243245,602.863101,8412.912842
1,e65f3c07-8e79-4f7c-8049-e1af61bb9e8a,657.0,2011,c476c48c-a26c-485a-8348-a1ee3006b50f,31973.941406,31494.000000,29655.245346,413836.895791,0.984990,0.941616,13.140182,647.138172,609.355473,8503.513442
2,7f1cfe9c-b5f8-46d6-936e-1293fd86e13c,664.0,2012,c476c48c-a26c-485a-8348-a1ee3006b50f,31973.941406,31494.000000,29655.245346,413836.895791,0.984990,0.941616,13.140182,654.033099,615.847845,8594.114042
3,0404e1ab-3de0-43c3-adcd-02141feffd9b,671.0,2013,c476c48c-a26c-485a-8348-a1ee3006b50f,31973.941406,31494.000000,29655.245346,413836.895791,0.984990,0.941616,13.140182,660.928027,622.340217,8684.714641
4,5b73c867-8cda-4314-9b4a-ff9218d96993,678.0,2014,c476c48c-a26c-485a-8348-a1ee3006b50f,31973.941406,31494.000000,29655.245346,413836.895791,0.984990,0.941616,13.140182,667.822954,628.832589,8775.315241
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
490,105e0457-c50f-42c7-9828-065e8c3ba3cb,752.0,2016,a4b8f62f-3138-43fc-8b95-96fb058f82b6,96946.023438,62144.902344,46080.337963,547870.855561,0.641026,0.741498,8.816022,482.051402,357.440284,4249.775997
491,8302bfe5-d1fd-419a-9987-c5c3fa3712f0,760.0,2017,a4b8f62f-3138-43fc-8b95-96fb058f82b6,96946.023438,62144.902344,46080.337963,547870.855561,0.641026,0.741498,8.816022,487.179609,361.242840,4294.986380
492,a80e8c95-6331-4ce1-8d6e-54bff80664b8,768.0,2018,a4b8f62f-3138-43fc-8b95-96fb058f82b6,96946.023438,62144.902344,46080.337963,547870.855561,0.641026,0.741498,8.816022,492.307815,365.045396,4340.196763
493,e8ad2eee-fc2d-4bc8-adca-1cd01ed171fa,776.0,2019,a4b8f62f-3138-43fc-8b95-96fb058f82b6,96946.023438,62144.902344,46080.337963,547870.855561,0.641026,0.741498,8.816022,497.436022,368.847953,4385.407146


In [118]:
query1 = SQL_SUM_MATERIAL_OVER_GEO_REGION \
    + SQL_SUM_WEIGHTED_DEFORESTATION_OVER_GEO_REGION \
    + SQL_SUM_WEIGHTED_BIODIVERSITY_OVER_GEO_REGION \
    + """
EXPLAIN ANALYZE 
SELECT 
    sum_material_over_georegion('68ed9c70-0f01-495f-9a53-68e5cb35c7ca', '0d7b1be5-dc86-47b8-ba3a-25190a275011', 'producer'),
    sum_weighted_deforestation_over_georegion('68ed9c70-0f01-495f-9a53-68e5cb35c7ca', '0d7b1be5-dc86-47b8-ba3a-25190a275011', 'harvest'),
    sum_weighted_bio_over_georegion('68ed9c70-0f01-495f-9a53-68e5cb35c7ca', '0d7b1be5-dc86-47b8-ba3a-25190a275011', 'harvest')
"""
print(psql(query1))

"""
EXPLAIN ANALYZE
SELECT sum(prod."earthstat2000GlobalRubberProduction") as value
FROM
    (SELECT h3_uncompact(geo_region."h3Compact"::h3index[], 6) h3index
        FROM geo_region WHERE geo_region.id = '68ed9c70-0f01-495f-9a53-68e5cb35c7ca'
    ) geom
    INNER JOIN h3_grid_earthstat2000_global_prod prod
    on geom.h3index = prod.h3index
"""
#print(psql(query2))

%timeit psql("SELECT sum_material_over_georegion('68ed9c70-0f01-495f-9a53-68e5cb35c7ca', '0d7b1be5-dc86-47b8-ba3a-25190a275011', 'producer')")
%timeit psql("""SELECT sum(prod."earthstat2000GlobalRubberProduction") as value FROM (SELECT h3_uncompact(geo_region."h3Compact"::h3index[], 6) h3index FROM geo_region WHERE geo_region.id = '68ed9c70-0f01-495f-9a53-68e5cb35c7ca') geom INNER JOIN h3_grid_earthstat2000_global_prod prod on geom.h3index = prod.h3index""")


function sum_weighted_bio_over_georegion(unknown, unknown, unknown) does not exist
LINE 154:     sum_weighted_bio_over_georegion('68ed9c70-0f01-495f-9a53...
              ^
HINT:  No function matches the given name and argument types. You might need to add explicit type casts.

None
function sum_material_over_georegion(unknown, unknown, unknown) does not exist
LINE 1: SELECT sum_material_over_georegion('68ed9c70-0f01-495f-9a53-...
               ^
HINT:  No function matches the given name and argument types. You might need to add explicit type casts.

function sum_material_over_georegion(unknown, unknown, unknown) does not exist
LINE 1: SELECT sum_material_over_georegion('68ed9c70-0f01-495f-9a53-...
               ^
HINT:  No function matches the given name and argument types. You might need to add explicit type casts.

function sum_material_over_georegion(unknown, unknown, unknown) does not exist
LINE 1: SELECT sum_material_over_georegion('68ed9c70-0f01-495f-9a53-...
               ^


[('producer',), ('harvest',)]