# Review methodology for the deforestation and carbon equations:
# Include new indicators:

### UPDATED DEFORESTATION AND CLIMATE RISK FORMULAS:

Deforestation risk:

    Deforestation risk (ha) = Idef(unitless) * Ilanduse(ha) =>
    
    where:
    
        
        
        I def (unitless) = Sum(Buffered_deforestation_per_ha_human_land_use * Production_commodity) / Sum(Production_commodity)
        
        Ilanduse (ha) = Sum(harvest_area_in_georegion)/sum(production_in_georegion) * Purchased_volume
    

Carbon risk:

    Carbon risk (tco2e) = Icarbonemissions (tco2e/ha) * Ilanduse(ha) =>
    
    where:
        
        Icarbonemissions (tco2e/ha) = Sum(Buffered_carbon_emissions_per_ha_human_land_use * 3.66 * Production_commodity) / Sum(Production_commodity)
        
        Ilanduse (ha) = Sum(harvest_area_in_georegion)/sum(production_in_georegion) * Purchased_volume
        
NOTE: Multiply the carbon emissions times 3.66 to get the datain TCO2e

        
### NEW INDICATORS:

Water quality:

    Water_quality (Mm3/yr) =  GWF (m3/t) * Purchase volume (t) /1000


Natural ecosystems conversion risk:


    Natural ecosystems conversion risk (ha) = Inaturalconversionrisk (uniteless) * Ilanduse(ha) =>
    
    where:
        
        Inaturalconversionrisk (uniteless) = Sum(Natural ecosystems conversion risk per hectare of land use (unitless) * Production_commodity) / Sum(Production_commodity)
        
        Ilanduse (ha) = Sum(harvest_area_in_georegion)/sum(production_in_georegion) * Purchased_volume


Greenhouse gas emissions from farm production:

    -TBD
    

In [1]:
#import libraries

from psycopg2.pool import ThreadedConnectionPool
import pandas as pd

In [2]:
#set env
#set env
## env file for gcs upload
env_path = ".env"
with open(env_path) as f:
    env = {}
    for line in f:
        env_key, _val = line.split("=", 1)
        env_value = _val.split("\n")[0]
        env[env_key] = env_value
        
#list(env.keys())

# set conexion to local ddbb
postgres_thread_pool = ThreadedConnectionPool(
    1, 
    50,
    host=env['API_POSTGRES_HOST'],
    port=env['API_POSTGRES_PORT'],
    user=env['API_POSTGRES_USERNAME'],
    password=env['API_POSTGRES_PASSWORD']
)


#get list of sourcing records to iterate:
conn = postgres_thread_pool.getconn()
cursor = conn.cursor()

In [3]:
def psql(query):
    try:
        cursor.execute(query)
        return cursor.fetchall()
    except Exception as e:
        conn.rollback()
        print(e)

In [7]:
#define queries
#uncompact h3 georegion
SQL_GET_H3_UNCOMPACT_GEO_REGION = """
CREATE OR REPLACE FUNCTION get_h3_uncompact_geo_region(geo_region_id uuid, h3_resolution int)
RETURNS TABLE (h3index h3index) AS 
$$
    SELECT h3_uncompact(geo_region."h3Compact"::h3index[], h3_resolution) h3index
    FROM geo_region WHERE geo_region.id = geo_region_id
$$ 
LANGUAGE SQL;
"""

#asuming that we have just a column by indicator
SQL_GET_H3_TABLE_COLUMN_FOR_INDICATORS = """
CREATE OR REPLACE FUNCTION get_h3_table_column_for_indicators(shortName text)
RETURNS TABLE (h3_table_name varchar, h3_column_name varchar, h3_resolution int) AS
$$
    SELECT h3_data."h3tableName", h3_data."h3columnName", h3_data."h3resolution"
    FROM h3_data
        INNER JOIN "indicator" ind ON ind."id" = h3_data."indicatorId"
    WHERE ind."nameCode" = shortName 
    LIMIT 1;
$$
LANGUAGE SQL;
"""
#get h3 table column for material
SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL = """
CREATE OR REPLACE FUNCTION get_h3_table_column_for_material(material_id uuid, h3_data_type material_to_h3_type_enum)
RETURNS TABLE (h3_table_name varchar, h3_column_name varchar, h3_resolution int) AS
$$
    SELECT h3_data."h3tableName", h3_data."h3columnName", h3_data."h3resolution"
    FROM h3_data
        INNER JOIN material_to_h3 ON material_to_h3."h3DataId" = h3_data.id
    WHERE material_to_h3."materialId" = material_id 
        AND material_to_h3.type = h3_data_type
    LIMIT 1;
$$
LANGUAGE SQL;
"""
## sum h3grid over georegion - I THINK WE ARE NOT USING THIS ONE
SQL_SUM_H3_GRID_OVER_GEO_REGION = SQL_GET_H3_UNCOMPACT_GEO_REGION+"""
CREATE OR REPLACE FUNCTION sum_h3_grid_over_georegion(
    geo_region_id uuid, 
    h3_resolution int,
    h3_table_name varchar, 
    h3_column_name varchar
)
RETURNS float AS
$$
    DECLARE
        sum float;
    BEGIN
        EXECUTE format(
            'SELECT sum(h3grid.%I)
                FROM
                    get_h3_uncompact_geo_region($1, $2) geo_region
                    INNER JOIN %I h3grid ON h3grid.h3index = geo_region.h3index;
            ', h3_column_name, h3_table_name)
            USING geo_region_id, h3_resolution
            INTO sum;
        RETURN sum;
    END;
$$
LANGUAGE plpgsql;
"""
# sum material over georegion
SQL_SUM_MATERIAL_OVER_GEO_REGION = SQL_SUM_H3_GRID_OVER_GEO_REGION+SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL+"""
CREATE OR REPLACE FUNCTION sum_material_over_georegion(
    geo_region_id uuid, 
    material_id uuid,
    h3_data_type material_to_h3_type_enum
)
RETURNS float AS
$$
    DECLARE
        h3_table_name varchar;
        h3_column_name varchar;
        h3_resolution integer;
        sum float;

    BEGIN
        -- Get h3data table name and column name for given material
        SELECT * INTO h3_table_name, h3_column_name, h3_resolution
        FROM get_h3_table_column_for_material(material_id, h3_data_type);

        -- Sum table column over region
        SELECT sum_h3_grid_over_georegion(geo_region_id, h3_resolution, h3_table_name, h3_column_name) 
        INTO sum;
        RETURN sum;
    END;
$$
LANGUAGE plpgsql;
"""

##NOTE: Assuming that deforestation-ghg emissions/human land use has been preprocessed and stored in the ddbb
## UPDATED DEFORESTATION, CLIMATE RISK FORMULAS/CARBON, NATURAL CONVERSION:

SQL_GET_ANNUAL_LANDSCAPE_IMPACT_OVER_GEO_REGION = SQL_GET_H3_TABLE_COLUMN_FOR_INDICATORS + """
CREATE OR REPLACE FUNCTION get_annual_landscape_impact_over_georegion(
    geo_region_id uuid,
    shortName text,
    material_id uuid,
    h3_data_type material_to_h3_type_enum
)
RETURNS float AS
$$
    DECLARE
        h3_resolution integer;
        indicator_h3_table_name varchar;
        indicator_h3_column_name varchar;
        h3_indicator_resolution varchar;
        material_h3_table_name varchar;
        material_h3_column_name varchar;
        sum float;
    BEGIN
    
        -- Get h3data table name, column
        SELECT * INTO indicator_h3_table_name, indicator_h3_column_name, h3_indicator_resolution
        FROM get_h3_table_column_for_indicators(shortName);
        
        -- Get h3data table name, column and resolution for the material production
        SELECT * INTO material_h3_table_name, material_h3_column_name, h3_resolution
        FROM get_h3_table_column_for_material(material_id, h3_data_type);
        
        -- Sum landscape impact values
        EXECUTE format(
            'SELECT sum(h3ind.%I * h3prod.%I )
                FROM
                    get_h3_uncompact_geo_region($1, $2) geo_region
                INNER JOIN %I h3ind ON h3ind.h3index = geo_region.h3index
                INNER JOIN %I h3prod ON h3ind.h3index = h3prod.h3index;
            ', indicator_h3_column_name, material_h3_column_name, indicator_h3_table_name, material_h3_table_name)
            USING geo_region_id, h3_resolution
            INTO sum;
        RETURN sum;
    END;
$$
LANGUAGE plpgsql;
"""

## NEW INDICATORS:

## Water quality - use the same function as the blue water footprint
#I have slightly modified the formula to make it dependent on the indicator table

SQL_GET_INDICATOR_COEFFICIENT_IMPACT = """
CREATE OR REPLACE FUNCTION get_indicator_coefficient_impact(
    shortName text,
    adminRegionId uuid,
    material_id uuid)
RETURNS float AS
$$
    DECLARE
        indicator_id uuid;
        indicator_coeficient_table varchar := 'indicator_coefficient';
        indicator_coeficient_value varchar := 'value';
        value float;
    BEGIN
    
        --get indicatorId
        SELECT i."id" INTO indicator_id FROM "indicator" i
        WHERE i."nameCode" = shortName;
       
        -- get water footprint value by location, material and indicator
        EXECUTE format(
            'SELECT reduced.%I /1000 --convert the m3 to Mm3
                FROM (
                    SELECT ind_coef.%I
                    FROM %I ind_coef
                    WHERE (ind_coef."adminRegionId" = $1 
                        AND ind_coef."materialId" = $2 
                        AND ind_coef."indicatorId" = $3)
                    OR (ind_coef."materialId" = $2 
                        AND ind_coef."adminRegionId" IS NULL
                        AND ind_coef."indicatorId" = $3)  
                    )reduced
                WHERE reduced.%I is not null;',
                indicator_coeficient_value,
                indicator_coeficient_value,
                indicator_coeficient_table,
                indicator_coeficient_value
                )
            USING adminRegionId, material_id, indicator_id
            INTO value;
        RETURN value;
    END;
$$
LANGUAGE plpgsql;
"""

SQL_GET_PERCENTAGE_WATER_STRESS_AREA = SQL_GET_H3_TABLE_COLUMN_FOR_INDICATORS + """
CREATE OR REPLACE FUNCTION get_percentage_water_stress_area(
geo_region_id uuid,
shortName text
)
RETURNS float AS
$$
    DECLARE
        aqueduct_h3_table_name varchar;
        aqueduct_h3_column_name varchar;
        h3_resolution integer;
        percentage float;

    BEGIN
    
        -- Get h3data table name, column
        SELECT * INTO aqueduct_h3_table_name, aqueduct_h3_column_name, h3_resolution
        FROM get_h3_table_column_for_indicators(shortName);
        
        
        EXECUTE format(
            'SELECT reduced.ws_area/ reduced.g_area as percentage
            FROM
                (SELECT 
                    sum(case when aqueduct.%I > 3 then 1 else 0 end) ws_area, count(aqueduct.%I) g_area
                FROM get_h3_uncompact_geo_region($1, $2) geo_region
                INNER JOIN %I aqueduct ON aqueduct.h3index = geo_region.h3index) reduced
            WHERE reduced.g_area > 0;',
            aqueduct_h3_column_name,
            aqueduct_h3_column_name,
            aqueduct_h3_table_name
                )
            USING geo_region_id, h3_resolution
            INTO percentage;
        RETURN percentage;
    END;
$$
LANGUAGE plpgsql;
"""

In [8]:
sourcing_records = pd.read_sql_query(
    SQL_SUM_MATERIAL_OVER_GEO_REGION \
    + SQL_GET_H3_TABLE_COLUMN_FOR_INDICATORS \
    + SQL_GET_ANNUAL_LANDSCAPE_IMPACT_OVER_GEO_REGION \
    + SQL_GET_INDICATOR_COEFFICIENT_IMPACT \
    + SQL_GET_PERCENTAGE_WATER_STRESS_AREA \
    + """
    SELECT 
        sr.id,
        sr.tonnage,
        sl.production,
        sl.harvest,
        sl.def_per_humanLandUse_annual,
        --sl.nat_conversion_per_humanLandUse_annual,
        sl.emissions_per_humanLandUse_annual,
        --sl.raw_ghg_emissions,
        sl.raw_water_use,
        sl.water_stress_perct
        --sl.raw_water_quality
    FROM 
        sourcing_records sr
    INNER JOIN
        (
        
        SELECT 
            id,
            "geoRegionId",
            "locationCountryInput",
            sum_material_over_georegion("geoRegionId", "materialId", 'producer') as production,
            sum_material_over_georegion("geoRegionId", "materialId", 'harvest') as harvest,
            get_annual_landscape_impact_over_georegion("geoRegionId", 'DF_LUC_T', "materialId", 'producer') def_per_humanLandUse_annual,
            --get_annual_landscape_impact_over_georegion("geoRegionId", 'NECR', "materialId", 'producer') nat_conversion_per_humanLandUse_annual,
            get_annual_landscape_impact_over_georegion("geoRegionId",'GHG_LUC_T', "materialId", 'producer') emissions_per_humanLandUse_annual,
            --get_annual_deforestation_or_climate_risk_ghg_over_georegion("geoRegionId",'GHG', "materialId", 'producer') raw_ghg_emissions,
            get_indicator_coefficient_impact('UWU_T', "adminRegionId", "materialId") raw_water_use,
            get_percentage_water_stress_area("geoRegionId", 'UWUSR_T') as water_stress_perct
            --get_indicator_coefficient_impact('WQ', "adminRegionId", "materialId") raw_water_quality,
            
        FROM
            sourcing_location
        ) AS sl
        ON sr."sourcingLocationId" = sl."id"
        """, conn)

sourcing_records['land_per_ton'] = sourcing_records['harvest'] / sourcing_records['production']
sourcing_records['land_use'] = sourcing_records['land_per_ton'] * sourcing_records['tonnage']



##Assuming that all forest loss is due to human land use (crop/pasture/managed forest/urban) and all human land use within 50km of the deforested pixel is equally responsible: 
#What is the average number of hectares of forest lost per hectare of cropland in the local area/jurisdiction?
#NOTE: Should we do this with buffer or withouth?

#DEFORESTATION:
# 1. Calculate the raw deforestation. This would be my getting the sum of deforestation per ha of human land use time the production of a commodity by pixel level.
# 2. Divide the raw deforestation by the total production to get the row feforestation weighted by the production
# 3. Multiply that by the land use impact of my material

sourcing_records['raw_deforestation_risk'] = sourcing_records['def_per_humanlanduse_annual'] / sourcing_records['production'] 
sourcing_records['deforestation_risk'] = sourcing_records['raw_deforestation_risk'] * sourcing_records['land_use']
#
#CARBON:
# 1. Calculate the total carbon emissions in georegion per hectare of human land use
# 2. Multiply the carbon emissions by the commodity production to weight the impact
# 3. Multiply that by the land use impact
                                                                               
sourcing_records['raw_emissions'] = sourcing_records['emissions_per_humanlanduse_annual'] / sourcing_records['production'] 
sourcing_records['emissions_risk'] = sourcing_records['raw_emissions'] * sourcing_records['land_use']

#NATURAL ECOSYSTEMS CONVERSION RISK
#sourcing_records['raw_conversion_risk'] = sourcing_records['nat_conversion_per_humanLandUse_annual'] / sourcing_records['production'] 
#sourcing_records['natural_conversion_risk'] = sourcing_records['raw_conversion_risk'] * sourcing_records['land_use']


#WATER USE CALCULATION
sourcing_records['water_use'] = sourcing_records['raw_water_use'] * sourcing_records['tonnage']
sourcing_records['unustainable_water_use'] = sourcing_records['water_use'] * sourcing_records['water_stress_perct']

#WATER QUALITY CALCULATION
#sourcing_records['water_quality'] = sourcing_records['raw_water_quality'] * sourcing_records['tonnage']


#GHG EMISSIONS

#


sourcing_records.head()

Unnamed: 0,id,tonnage,production,harvest,def_per_humanlanduse_annual,emissions_per_humanlanduse_annual,raw_water_use,water_stress_perct,land_per_ton,land_use,raw_deforestation_risk,deforestation_risk,raw_emissions,emissions_risk,water_use,unustainable_water_use
0,c180595c-6e7e-43c8-abbb-f82973482227,2194.0,0.0,0.0,0.0,0.0,0.361,0.0,,,,,,,792.034,0.0
1,b8accade-a19f-4a9b-a8d0-88062fa2b8e7,9065.0,0.0,0.0,0.0,0.0,0.361,0.0,,,,,,,3272.465,0.0
2,8a5b26b1-ef50-4315-b7c4-ae3e3d709f56,6711.0,0.0,0.0,0.0,0.0,0.361,0.0,,,,,,,2422.671,0.0
3,4229956b-b46a-4694-a5d1-6e63d8ab459b,4365.0,0.0,0.0,0.0,0.0,0.361,0.0,,,,,,,1575.765,0.0
4,d3c98154-0975-4a6b-96ff-74ed325d6652,173.0,0.0,0.0,0.0,0.0,0.361,0.0,,,,,,,62.453,0.0


In [9]:
sourcing_records.dropna()

Unnamed: 0,id,tonnage,production,harvest,def_per_humanlanduse_annual,emissions_per_humanlanduse_annual,raw_water_use,water_stress_perct,land_per_ton,land_use,raw_deforestation_risk,deforestation_risk,raw_emissions,emissions_risk,water_use,unustainable_water_use
11,e58227a3-9098-44a2-a044-84919376cf01,5221.0,8963.333984,2786.306152,88.501312,1.796853e+09,0.361,0.0,0.310856,1622.979178,0.009874,16.024817,200466.992655,3.253538e+08,1884.781,0.0
12,2e141139-f417-4ec7-abd2-09ca38548da6,3343.0,8963.333984,2786.306152,88.501312,1.796853e+09,0.361,0.0,0.310856,1039.191609,0.009874,10.260671,200466.992655,2.083236e+08,1206.823,0.0
13,935161b2-a1b9-4167-b433-37d646ec6b71,1998.0,8963.333984,2786.306152,88.501312,1.796853e+09,0.361,0.0,0.310856,621.090289,0.009874,6.132462,200466.992655,1.245081e+08,721.278,0.0
14,2e902171-ba5d-4786-8570-e3d59e39ef50,78.0,8963.333984,2786.306152,88.501312,1.796853e+09,0.361,0.0,0.310856,24.246768,0.009874,0.239405,200466.992655,4.860677e+06,28.158,0.0
15,0a42ebd6-674c-4fc5-8aea-b6de39ee0aef,522.0,8963.333984,2786.306152,88.501312,1.796853e+09,0.361,0.0,0.310856,162.266832,0.009874,1.602175,200466.992655,3.252914e+07,188.442,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14229,f741f19e-6d57-4ff7-b2b1-4db603e57699,9478.0,58911.917969,40602.492188,5.008143,6.122462e+07,0.965,0.0,0.689207,6532.301684,0.000085,0.555316,1039.256865,6.788739e+06,9146.270,0.0
14230,40722023-2616-4bf0-95ec-513361f2836c,7448.0,58911.917969,40602.492188,5.008143,6.122462e+07,0.965,0.0,0.689207,5133.211958,0.000085,0.436378,1039.256865,5.334726e+06,7187.320,0.0
14231,4f763e56-aaae-4250-bce9-9c1a7326e0de,8510.0,58911.917969,40602.492188,5.008143,6.122462e+07,0.965,0.0,0.689207,5865.149539,0.000085,0.498600,1039.256865,6.095397e+06,8212.150,0.0
14232,0d6dfdba-6e55-4947-bdc2-dedcc6f3bcfd,9962.0,58911.917969,40602.492188,5.008143,6.122462e+07,0.965,0.0,0.689207,6865.877756,0.000085,0.583673,1039.256865,7.135411e+06,9613.330,0.0


In [129]:
#export to csv
sourcing_records.to_csv('../../datasets/raw/TRASE_data/carbon_deforestation_updated_values.csv')