# Review methodology for the deforestation and carbon equations:

Reviewed formulas to discuss:

Deforestation risk:

    Deforestation risk (ha) = Idef(unitless) * Ilanduse(ha) =>
    
    where:
    
        
        
        I def (unitless) = Sum(buffered_def_in_georegion * Production_commodity)/ sum(Human_land use) * Sum(Production_commodity)
        
        Ilanduse (ha) = Sum(harvest_area_in_georegion)/sum(production_in_georegion) * Purchased_volume
    

Carbon risk:

    Carbon risk () = Icarbonemissions () * Ilanduse(ha) =>
    
    where:
        
        Icarbonemissions (tco2e/ha) = sum(buffered_carbon_in_georegion * commodity production) / sum(human_area_in_georegion) * Sum(Production_commodity)
        
        Ilanduse (ha) = Sum(harvest_area_in_georegion)/sum(production_in_georegion) * Purchased_volume
        
       

        
    
    

In [8]:
#import libraries

from psycopg2.pool import ThreadedConnectionPool
import pandas as pd

In [10]:
#set env
#set env
## env file for gcs upload
env_path = ".env"
with open(env_path) as f:
    env = {}
    for line in f:
        env_key, _val = line.split("=", 1)
        env_value = _val.split("\n")[0]
        env[env_key] = env_value
        
#list(env.keys())

# set conexion to local ddbb
postgres_thread_pool = ThreadedConnectionPool(
    1, 
    50,
    host=env['API_POSTGRES_HOST'],
    port=env['API_POSTGRES_PORT'],
    user=env['API_POSTGRES_USERNAME'],
    password=env['API_POSTGRES_PASSWORD']
)


#get list of sourcing records to iterate:
conn = postgres_thread_pool.getconn()
cursor = conn.cursor()

In [11]:
def psql(query):
    try:
        cursor.execute(query)
        return cursor.fetchall()
    except Exception as e:
        conn.rollback()
        print(e)

In [30]:
#define queries

SQL_GET_H3_UNCOMPACT_GEO_REGION = """
CREATE OR REPLACE FUNCTION get_h3_uncompact_geo_region(geo_region_id uuid, h3_resolution int)
RETURNS TABLE (h3index h3index) AS 
$$
    SELECT h3_uncompact(geo_region."h3Compact"::h3index[], h3_resolution) h3index
    FROM geo_region WHERE geo_region.id = geo_region_id
$$ 
LANGUAGE SQL;
"""

#asuming that all the landindicators have the buffer version in the table
#asuming that all indicators are resolution 6
SQL_GET_H3_TABLE_COLUMN_FOR_INDICATORS = """
CREATE OR REPLACE FUNCTION get_h3_table_column_for_indicators(shortName text)
RETURNS TABLE (h3_table_name varchar, h3_column_name varchar) AS
$$
    SELECT h3_data."h3tableName", h3_data."h3columnName"
    FROM h3_data
        INNER JOIN "indicator" ind ON ind."id" = h3_data."indicatorId"
    WHERE ind."nameCode" = shortName 
    LIMIT 1;
$$
LANGUAGE SQL;
"""

SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL = """
CREATE OR REPLACE FUNCTION get_h3_table_column_for_material(material_id uuid, h3_data_type material_to_h3_type_enum)
RETURNS TABLE (h3_table_name varchar, h3_column_name varchar, h3_resolution int) AS
$$
    SELECT h3_data."h3tableName", h3_data."h3columnName", h3_data."h3resolution"
    FROM h3_data
        INNER JOIN material_to_h3 ON material_to_h3."h3DataId" = h3_data.id
    WHERE material_to_h3."materialId" = material_id 
        AND material_to_h3.type = h3_data_type
    LIMIT 1;
$$
LANGUAGE SQL;
"""

SQL_SUM_H3_GRID_OVER_GEO_REGION = SQL_GET_H3_UNCOMPACT_GEO_REGION+"""
CREATE OR REPLACE FUNCTION sum_h3_grid_over_georegion(
    geo_region_id uuid, 
    h3_resolution int,
    h3_table_name varchar, 
    h3_column_name varchar
)
RETURNS float AS
$$
    DECLARE
        sum float;
    BEGIN
        EXECUTE format(
            'SELECT sum(h3grid.%I)
                FROM
                    get_h3_uncompact_geo_region($1, $2) geo_region
                    INNER JOIN %I h3grid ON h3grid.h3index = geo_region.h3index;
            ', h3_column_name, h3_table_name)
            USING geo_region_id, h3_resolution
            INTO sum;
        RETURN sum;
    END;
$$
LANGUAGE plpgsql;
"""

SQL_SUM_MATERIAL_OVER_GEO_REGION = SQL_SUM_H3_GRID_OVER_GEO_REGION+SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL+"""
CREATE OR REPLACE FUNCTION sum_material_over_georegion(
    geo_region_id uuid, 
    material_id uuid,
    h3_data_type material_to_h3_type_enum
)
RETURNS float AS
$$
    DECLARE
        h3_table_name varchar;
        h3_column_name varchar;
        h3_resolution integer;
        sum float;

    BEGIN
        -- Get h3data table name and column name for given material
        SELECT * INTO h3_table_name, h3_column_name, h3_resolution
        FROM get_h3_table_column_for_material(material_id, h3_data_type);

        -- Sum table column over region
        SELECT sum_h3_grid_over_georegion(geo_region_id, h3_resolution, h3_table_name, h3_column_name) 
        INTO sum;
        RETURN sum;
    END;
$$
LANGUAGE plpgsql;
"""

##NOTE: Assuming that deforestation-ghg emissions/human land use has been preprocessed and stored in the ddbb

SQL_GET_ANNUAL_DEFORESTATION_OR_CLIMATE_RISK_OVER_GEO_REGION = """
CREATE OR REPLACE FUNCTION get_annual_deforestation_or_climate_risk_over_georegion(
    geo_region_id uuid,
    shortName text,
    material_id uuid,
    h3_data_type material_to_h3_type_enum
)
RETURNS float AS
$$
    DECLARE
        h3_resolution integer;
        indicator_h3_table_name varchar;
        indicator_h3_column_name varchar;
        material_h3_table_name varchar;
        material_h3_column_name varchar;
        sum float;
    BEGIN
    
        -- Get h3data table name, column and resolution for the deforestation land indicator
        SELECT * INTO indicator_h3_table_name, indicator_h3_column_name
        FROM get_h3_table_column_for_indicators(shortName);
        
        -- Get h3data table name, column and resolution for the material production
        SELECT * INTO material_h3_table_name, material_h3_column_name, h3_resolution
        FROM get_h3_table_column_for_material(material_id, h3_data_type);
        
        -- Sum deforestation values
        EXECUTE format(
            'SELECT sum(h3ind.%I * h3prod.%I )
                FROM
                    get_h3_uncompact_geo_region($1, $2) geo_region
                INNER JOIN %I h3ind ON h3ind.h3index = geo_region.h3index
                INNER JOIN %I h3prod ON h3ind.h3index = h3prod.h3index;
            ', indicator_h3_column_name, material_h3_column_name, indicator_h3_table_name, material_h3_table_name)
            USING geo_region_id, h3_resolution
            INTO sum;
        RETURN sum;
    END;
$$
LANGUAGE plpgsql;
"""

In [37]:
sourcing_records = pd.read_sql_query(
    SQL_SUM_MATERIAL_OVER_GEO_REGION \
    + SQL_GET_H3_TABLE_COLUMN_FOR_INDICATORS \
    + SQL_GET_ANNUAL_DEFORESTATION_OR_CLIMATE_RISK_OVER_GEO_REGION \
    + """
    SELECT 
        sr.id,
        sr.tonnage,
        sl.production,
        sl.harvest,
        sl.def_annual,
        sl.emissions_annual
    FROM 
        sourcing_records sr
    INNER JOIN
        (
        
        SELECT 
            id,
            "geoRegionId",
            "locationCountryInput",
            sum_material_over_georegion("geoRegionId", "materialId", 'producer') as production,
            sum_material_over_georegion("geoRegionId", "materialId", 'harvest') as harvest,
            get_annual_deforestation_or_climate_risk_over_georegion("geoRegionId", 'DF_LUC_T', "materialId", 'producer') def_annual,
            get_annual_deforestation_or_climate_risk_over_georegion("geoRegionId",'GHG_LUC_T', "materialId", 'producer') emissions_annual
        FROM
            sourcing_location
        ) AS sl
        ON sr."sourcingLocationId" = sl."id"
        """, conn)

sourcing_records['land_per_ton'] = sourcing_records['harvest'] / sourcing_records['production']
sourcing_records['land_use'] = sourcing_records['land_per_ton'] * sourcing_records['tonnage']



##Assuming that all forest loss is due to human land use (crop/pasture/managed forest/urban) and all human land use within 50km of the deforested pixel is equally responsible: 
#What is the average number of hectares of forest lost per hectare of cropland in the local area/jurisdiction?
#NOTE: Should we do this with buffer or withouth?

#DEFORESTATION:
# 1. Calculate the raw deforestation. This would be my getting the sum of deforestation per ha of human land use time the production of a commodity by pixel level.
# 2. Divide the raw deforestation by the total production to get the row feforestation weighted by the production
# 3. Multiply that by the land use impact of my material

sourcing_records['raw_deforestation_risk'] = sourcing_records['def_annual'] / sourcing_records['production'] 
sourcing_records['deforestation_risk'] = sourcing_records['raw_deforestation_risk'] * sourcing_records['land_use']

#CARBON:
# 1. Calculate the total carbon emissions in georegion per hectare of human land use
# 2. Multiply the carbon emissions by the commodity production to weight the impact
# 3. Multiply that by the land use impact
                                                                               
sourcing_records['raw_emissions'] = sourcing_records['emissions_annual'] / sourcing_records['production'] #change this harvest area by the total human area or the total pasture+crop area in georegion?
sourcing_records['emissions_risk'] = sourcing_records['raw_emissions'] * sourcing_records['land_use']


sourcing_records.head()

Unnamed: 0,id,tonnage,production,harvest,def_annual,emissions_annual,land_per_ton,land_use,raw_deforestation_risk,deforestation_risk,raw_emissions,emissions_risk
0,cd7a0ba4-43cb-4625-8a31-573291bb6aa8,3773.0,0.0,0.0,0.0,0.0,,,,,,
1,32a90626-db9e-4916-96c7-d6facde801b9,3454.0,0.0,0.0,0.0,0.0,,,,,,
2,eb689989-d9d2-4b40-b557-681fe712f0f7,7210.0,0.0,0.0,0.0,0.0,,,,,,
3,20252614-e8ec-43d0-ac04-c2a6f89b4855,3316.0,0.0,0.0,0.0,0.0,,,,,,
4,c72d8cd8-cea0-4e63-a6e3-68a32c176228,9089.0,0.0,0.0,0.0,0.0,,,,,,


In [38]:
sourcing_records.dropna()

Unnamed: 0,id,tonnage,production,harvest,def_annual,emissions_annual,land_per_ton,land_use,raw_deforestation_risk,deforestation_risk,raw_emissions,emissions_risk
11,8003d601-5e4b-48ee-a544-5beb2f11f901,7598.0,8.963334e+03,2.786306e+03,88.501312,4.909429e+08,0.310856,2361.883891,0.009874,23.320544,54772.355114,1.293659e+08
12,5333ed1a-5c26-46fc-9368-099b343ebc31,5476.0,8.963334e+03,2.786306e+03,88.501312,4.909429e+08,0.310856,1702.247458,0.009874,16.807489,54772.355114,9.323610e+07
13,79f7ceaf-e4fe-44a2-bd7d-b0fba44777fa,7722.0,8.963334e+03,2.786306e+03,88.501312,4.909429e+08,0.310856,2400.430035,0.009874,23.701137,54772.355114,1.314772e+08
14,a7242c6f-83ad-4671-87ff-dd12b5e7e1c8,7139.0,8.963334e+03,2.786306e+03,88.501312,4.909429e+08,0.310856,2219.200987,0.009874,21.911735,54772.355114,1.215509e+08
15,1363fc28-eefc-42af-8c9a-04aca58cc84b,5737.0,8.963334e+03,2.786306e+03,88.501312,4.909429e+08,0.310856,1783.380874,0.009874,17.608576,54772.355114,9.767997e+07
...,...,...,...,...,...,...,...,...,...,...,...,...
16290,247fcce5-e30c-4e71-a2e7-435306857cd6,6294.0,2.097151e+05,3.353160e+04,348.261383,2.030740e+09,0.159891,1006.354978,0.001661,1.671193,9683.326106,9.744863e+06
16291,77ba2a4c-e626-405d-abc6-8ed04d631f0f,6108.0,1.071310e+06,8.540640e+04,922.701721,2.513853e+09,0.079721,486.938684,0.000861,0.419392,2346.522654,1.142613e+06
16292,225b1e4f-d26c-4544-b33f-0a54527bf121,4797.0,3.717501e+07,2.524384e+07,58.641193,1.142282e+08,0.679054,3257.422827,0.000002,0.005138,3.072715,1.000913e+04
16293,2d2e02ed-4b02-4dd9-853e-0b9656c66d09,2260.5,3.717501e+07,2.524384e+07,58.641193,1.142282e+08,0.679054,1535.001939,0.000002,0.002421,3.072715,4.716624e+03


In [129]:
#export to csv
sourcing_records.to_csv('../../datasets/raw/TRASE_data/carbon_deforestation_updated_values.csv')