# Review methodology for the deforestation and carbon equations:

Reviewed formulas to discuss:

Deforestation risk:

    Deforestation risk (ha) = Idef(unitless) * Ilanduse(ha) =>
    
    where:
    
        Idef (unitless) = (Sum(buffered_def_in_georegion) / Sum(Human_area_in_gepregion))
        Ilanduse (ha) = Sum(harvest_area_in_georegion)/sum(production_in_georegion) * Purchased_volume
        
        
To discuss:

    - should we compute the human area in georegion or  the total crop+pasture area?: Until we have confirmed this, i'll use the harvest area of the crop of interest in the georegion
    

Carbon risk:

    Carbon risk () = Icarbonemissions () * Ilanduse(ha) =>
    
    where:
        
        Icarbonemissions (tco2e/ha) = sum(buffered_carbon_in_georegion) (tco2e) / sum(human_area_in_georegion) (ha)
        
        Ilanduse (ha) = Sum(harvest_area_in_georegion)/sum(production_in_georegion) * Purchased_volume
        
        
To discuss:

    - confirm that the carbon unitss ingested as tco2e
    - we are downloading Mg_CO2e_px_download
    - Has the data been normilised (divided by 21 years period) to get the annual average carbon emissions during the preprocessing?
    - in that case, should we multiply the carbon by the total crop produced in that pixel?
    - should we compute the total human area in georegion or the total crop+pasture? Until we have confirmed this, i'll use the harvest area of the crop of interest in the georegion
        
    
    

In [4]:
#import libraries

from psycopg2.pool import ThreadedConnectionPool
import pandas as pd

In [5]:
#set env
#set env
## env file for gcs upload
env_path = ".env"
with open(env_path) as f:
    env = {}
    for line in f:
        env_key, _val = line.split("=", 1)
        env_value = _val.split("\n")[0]
        env[env_key] = env_value
        
#list(env.keys())

# set conexion to local ddbb
postgres_thread_pool = ThreadedConnectionPool(
    1, 
    50,
    host=env['API_POSTGRES_HOST'],
    port=env['API_POSTGRES_PORT'],
    user=env['API_POSTGRES_USERNAME'],
    password=env['API_POSTGRES_PASSWORD']
)

#get list of sourcing records to iterate:
conn = postgres_thread_pool.getconn()
cursor = conn.cursor()

In [6]:
def psql(query):
    try:
        cursor.execute(query)
        return cursor.fetchall()
    except Exception as e:
        conn.rollback()
        print(e)

In [120]:
#define queries

SQL_GET_H3_UNCOMPACT_GEO_REGION = """
CREATE OR REPLACE FUNCTION get_h3_uncompact_geo_region(geo_region_id uuid, h3_resolution int)
RETURNS TABLE (h3index h3index) AS 
$$
    SELECT h3_uncompact(geo_region."h3Compact"::h3index[], h3_resolution) h3index
    FROM geo_region WHERE geo_region.id = geo_region_id
$$ 
LANGUAGE SQL;
"""

#asuming that all the landindicators have the buffer version in the table
SQL_GET_H3_TABLE_COLUMN_FOR_LAND_INDICATORS = """
CREATE OR REPLACE FUNCTION get_h3_table_column_for_land_indicators(shortName text)
RETURNS TABLE (h3_resolution int, h3_table_name varchar, h3_column_name varchar) AS
$$
    SELECT h3_data."h3resolution", h3_data."h3tableName", h3_data."h3columnName"
    FROM h3_data
        INNER JOIN "indicator" ind ON ind."id" = h3_data."indicatorId"
    WHERE ind."nameCode" = shortName 
        AND h3_data."h3columnName" like '%Buffered%'
    LIMIT 1;
$$
LANGUAGE SQL;
"""

SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL = """
CREATE OR REPLACE FUNCTION get_h3_table_column_for_material(material_id uuid, h3_data_type material_to_h3_type_enum)
RETURNS TABLE (h3_table_name varchar, h3_column_name varchar, h3_resolution int) AS
$$
    SELECT h3_data."h3tableName", h3_data."h3columnName", h3_data."h3resolution"
    FROM h3_data
        INNER JOIN material_to_h3 ON material_to_h3."h3DataId" = h3_data.id
    WHERE material_to_h3."materialId" = material_id 
        AND material_to_h3.type = h3_data_type
    LIMIT 1;
$$
LANGUAGE SQL;
"""

SQL_SUM_H3_GRID_OVER_GEO_REGION = SQL_GET_H3_UNCOMPACT_GEO_REGION+"""
CREATE OR REPLACE FUNCTION sum_h3_grid_over_georegion(
    geo_region_id uuid, 
    h3_resolution int,
    h3_table_name varchar, 
    h3_column_name varchar
)
RETURNS float AS
$$
    DECLARE
        sum float;
    BEGIN
        EXECUTE format(
            'SELECT sum(h3grid.%I)
                FROM
                    get_h3_uncompact_geo_region($1, $2) geo_region
                    INNER JOIN %I h3grid ON h3grid.h3index = geo_region.h3index;
            ', h3_column_name, h3_table_name)
            USING geo_region_id, h3_resolution
            INTO sum;
        RETURN sum;
    END;
$$
LANGUAGE plpgsql;
"""

SQL_SUM_MATERIAL_OVER_GEO_REGION = SQL_SUM_H3_GRID_OVER_GEO_REGION+SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL+"""
CREATE OR REPLACE FUNCTION sum_material_over_georegion(
    geo_region_id uuid, 
    material_id uuid,
    h3_data_type material_to_h3_type_enum
)
RETURNS float AS
$$
    DECLARE
        h3_table_name varchar;
        h3_column_name varchar;
        h3_resolution integer;
        sum float;

    BEGIN
        -- Get h3data table name and column name for given material
        SELECT * INTO h3_table_name, h3_column_name, h3_resolution
        FROM get_h3_table_column_for_material(material_id, h3_data_type);

        -- Sum table column over region
        SELECT sum_h3_grid_over_georegion(geo_region_id, h3_resolution, h3_table_name, h3_column_name) 
        INTO sum;
        RETURN sum;
    END;
$$
LANGUAGE plpgsql;
"""

SQL_GET_ANNUAL_DEFORESTATION_OVER_GEO_REGION = """
CREATE OR REPLACE FUNCTION get_annual_deforestation_over_georegion(
    geo_region_id uuid,
    shortName text
)
RETURNS float AS
$$
    DECLARE
        h3_resolution integer;
        deforestation_h3_table_name varchar;
        deforestation_h3_column_name varchar;
        sum float;
    BEGIN
    
        -- Get h3data table name, column and resolution for the deforestation land indicator
        SELECT * INTO h3_resolution, deforestation_h3_table_name, deforestation_h3_column_name
        FROM get_h3_table_column_for_land_indicators(shortName);
        
        -- Sum deforestation values
        EXECUTE format(
            'SELECT sum(h3_cell_area(h3def.h3index) *100* h3def.%I) /5
                FROM
                    get_h3_uncompact_geo_region($1, $2) geo_region
                INNER JOIN %I h3def ON h3def.h3index = geo_region.h3index;
            ', deforestation_h3_column_name, deforestation_h3_table_name)
            USING geo_region_id, h3_resolution
            INTO sum;
        RETURN sum;
    END;
$$
LANGUAGE plpgsql;
"""

SQL_GET_ANNUAL_CARBON_EMISSIONS_OVER_GEO_REGION = """
CREATE OR REPLACE FUNCTION get_annual_carbon_emissions_over_georegion(
    geo_region_id uuid,
    shortName text
)
RETURNS float AS
$$
    DECLARE
        h3_resolution integer;
        carbon_h3_table_name varchar;
        carbon_h3_column_name varchar;
        sum float;
    BEGIN
    
        -- Get h3data table name, column and resolution for the carbon land indicator
        SELECT * INTO h3_resolution, carbon_h3_table_name, carbon_h3_column_name
        FROM get_h3_table_column_for_land_indicators(shortName);
        
        -- Sum deforestation values
        EXECUTE format(
            'SELECT sum(h3emissions.%I)
                FROM
                    get_h3_uncompact_geo_region($1, $2) geo_region
                INNER JOIN %I h3emissions ON h3emissions.h3index = geo_region.h3index;
            ', carbon_h3_column_name, carbon_h3_table_name)
            USING geo_region_id, h3_resolution
            INTO sum;
        RETURN sum;
    END;
$$
LANGUAGE plpgsql;
"""

In [123]:
sourcing_records = pd.read_sql_query(
    SQL_SUM_MATERIAL_OVER_GEO_REGION \
    + SQL_GET_H3_TABLE_COLUMN_FOR_LAND_INDICATORS \
    + SQL_GET_ANNUAL_DEFORESTATION_OVER_GEO_REGION \
    + SQL_GET_ANNUAL_CARBON_EMISSIONS_OVER_GEO_REGION \
    + """
    SELECT 
        sr.id,
        sr.tonnage,
        sl.production,
        sl.harvest,
        sl.def_annual,
        sl.emissions_annual
    FROM 
        sourcing_records sr
    INNER JOIN
        (
        
        SELECT 
            id,
            "geoRegionId",
            "locationCountryInput",
            sum_material_over_georegion("geoRegionId", "materialId", 'producer') as production,
            sum_material_over_georegion("geoRegionId", "materialId", 'harvest') as harvest,
            get_annual_deforestation_over_georegion("geoRegionId", 'DF_LUC_T') def_annual,
            get_annual_carbon_emissions_over_georegion("geoRegionId",'GHG_LUC_T') emissions_annual
        FROM
            sourcing_location
        ) AS sl
        ON sr."sourcingLocationId" = sl."id"
        """, conn)

sourcing_records['land_per_ton'] = sourcing_records['harvest'] / sourcing_records['production']
sourcing_records['land_use'] = sourcing_records['land_per_ton'] * sourcing_records['tonnage']



##Assuming that all forest loss is due to human land use (crop/pasture/managed forest/urban) and all human land use within 50km of the deforested pixel is equally responsible: 
#What is the average number of hectares of forest lost per hectare of cropland in the local area/jurisdiction?
#NOTE: Should we do this with buffer or withouth?

#DEFORESTATION:
# 1. calculate the total hectares of land deforested - THIS IS ALREADY ACCOUNTED ON THE ANNUAL DEFOREDTATION IN THE GEOREGION
# 2. Calculate total hectares of human land use?? Why human land use? FOR NOW I'LL USE THE TOTAL HECTARES OF CROP IN MY GEOREGION
# 3. Divide the total hectaes of land deforested/harvest area to get the deforestation rate per hectare of land use 
# 4. Multiply that by the land use impact of my material

sourcing_records['buffer_deforestation_per_ha_land_use'] = sourcing_records['def_annual'] / sourcing_records['harvest'] #change this harvest area by the total human area or the total pasture+crop area in georegion?
sourcing_records['deforestation_risk'] = sourcing_records['buffer_deforestation_per_ha_land_use'] * sourcing_records['land_use']

#CARBON:
# 1. Calculate the total carbon emissions in georegion
# 2. Calculate the total carbon emissions per hectares of land use
# 3. Multiply that by the land use impact
sourcing_records['buffer_emissions_per_ha_land_use'] = sourcing_records['emissions_annual'] / sourcing_records['harvest'] #change this harvest area by the total human area or the total pasture+crop area in georegion?
sourcing_records['emissions_risk'] = sourcing_records['buffer_emissions_per_ha_land_use'] * sourcing_records['land_use']


sourcing_records.head()

Unnamed: 0,id,tonnage,production,harvest,def_annual,emissions_annual,land_per_ton,land_use,buffer_deforestation_per_ha_land_use,deforestation_risk,buffer_emissions_per_ha_land_use,emissions_risk
0,f8c29ccf-d291-4198-bb68-165fd9e9eb14,4595.934407,312659.09375,640832.125,5.278378,38270.945312,2.049619,9419.91604,8e-06,0.07759,0.059721,562.564013
1,976f6107-c9f4-4738-a998-7112d42ac6d5,2018.921879,312659.09375,640832.125,5.278378,38270.945312,2.049619,4138.021327,8e-06,0.034084,0.059721,247.125545
2,984e841c-9fec-43c1-919f-209b47aa73a4,3481.199091,312659.09375,640832.125,5.278378,38270.945312,2.049619,7135.132979,8e-06,0.05877,0.059721,426.115161
3,46b99e48-87d8-412f-9d1c-8250842292f9,1559.732732,312659.09375,640832.125,5.278378,38270.945312,2.049619,3196.858371,8e-06,0.026332,0.059721,190.918631
4,359bc160-3cbf-4092-bed9-63cabd6220e4,334.88481,312659.09375,640832.125,5.278378,38270.945312,2.049619,686.386383,8e-06,0.005654,0.059721,40.991478


In [129]:
#export to csv
sourcing_records.to_csv('../../datasets/raw/TRASE_data/carbon_deforestation_updated_values.csv')