# Distributed probability map

This notebook contains a prototype for the impact map using the scaler and the pointer:

Basically the idea would be to query the sourcing location data using the clien filters, so for each georegion we will need to do the following:

    distributed impact = (Value *  H3 prod values (pointer))/ Scaler
    



In [3]:
def psql(query):
    try:
        cursor.execute(query)
        return cursor.fetchall()
    except Exception as e:
        conn.rollback()
        print(e)

In [4]:
#import libraries
from psycopg2.pool import ThreadedConnectionPool
import pandas as pd
import json

In [5]:
## env file for gcs upload
env_path = ".env"
with open(env_path) as f:
    env = {}
    for line in f:
        env_key, _val = line.split("=", 1)
        env_value = _val.split("\n")[0]
        env[env_key] = env_value
        
list(env.keys())

['API_SERVICE_PORT',
 'API_POSTGRES_HOST',
 'API_POSTGRES_PORT',
 'API_POSTGRES_USERNAME',
 'API_POSTGRES_PASSWORD',
 'API_POSTGRES_DATABASE',
 'CLIENT_SERVICE_PORT']

In [6]:
# conect to ddbb
postgres_thread_pool = ThreadedConnectionPool(1, 50,
                                              host=env['API_POSTGRES_HOST'],
                                              port=env['API_POSTGRES_PORT'],
                                              user=env['API_POSTGRES_USERNAME'],
                                              password=env['API_POSTGRES_PASSWORD']
                                              )
#get list of sourcing records to iterate:
conn = postgres_thread_pool.getconn()
cursor = conn.cursor()

In [7]:
## SELECT FIRST THE GEOREGION
SQL_GET_H3_UNCOMPACT_GEO_REGION = """
CREATE OR REPLACE FUNCTION get_h3_uncompact_geo_region(geo_region_id uuid, h3_resolution int)
RETURNS TABLE (h3index h3index) AS 
$$
    SELECT h3_uncompact(geo_region."h3Compact"::h3index[], h3_resolution) h3index
    FROM geo_region WHERE geo_region.id = geo_region_id
$$ 
LANGUAGE SQL;
"""

SQL_GET_H3_MATERIAL_TABLE_COLUMN = """
CREATE OR REPLACE FUNCTION get_h3_material_table_column(h3DataId uuid)
RETURNS TABLE (h3_table_name varchar, h3_column_name varchar) AS
$$
    SELECT h3_data."h3tableName", h3_data."h3columnName"
    FROM h3_data
    WHERE h3_data.id = h3DataId 
    LIMIT 1;
$$
LANGUAGE SQL;
"""

SQL_GET_H3_DATA_OVER_GEO_REGION = SQL_GET_H3_MATERIAL_TABLE_COLUMN+SQL_GET_H3_UNCOMPACT_GEO_REGION+"""
CREATE OR REPLACE FUNCTION get_h3_data_over_georegion(
    geo_region_id uuid, 
    h3DataId uuid
)
RETURNS TABLE (h3index h3index, value float) AS
$$
    DECLARE
        material_h3_table_name varchar;
        material_h3_column_name varchar;
        h3_resolution integer := 6;
        value float;

    BEGIN
        -- Get h3data table name and column name for given material
        SELECT * INTO material_h3_table_name, material_h3_column_name
        FROM get_h3_material_table_column(h3DataId);

        -- Sum table column over region
        RETURN QUERY EXECUTE format(
            'SELECT 
                h3grid.h3index,
                h3grid.%I::float
                FROM
                    get_h3_uncompact_geo_region($1, $2) geo_region
                    INNER JOIN %I h3grid ON h3grid.h3index = geo_region.h3index
            WHERE h3grid.%I > 0
            ', material_h3_column_name, material_h3_table_name, material_h3_column_name)
            USING geo_region_id, h3_resolution;
    END;
$$
LANGUAGE plpgsql;
"""


In [8]:
#filters that can be provided by the
indicator_id = 'e2c00251-fe31-4330-8c38-604535d795dc'
material_id = '52370288-8c6c-4f31-919c-d30538d39ee3'##palm oil -- we can select all material or specific ones
origins = '998257ec-534d-4221-bc95-05e88c992a35'## admin region Id-- we need to get the paret and chlids to filter the sourcing locations 
#suppliers = 'f634e464-c9ca-469d-85d6-df3cd1877f26' suppliers/producers

In [None]:
df_impact_map = pd.read_sql_query(
    SQL_GET_H3_DATA_OVER_GEO_REGION+\
    SQL_GET_H3_UNCOMPACT_GEO_REGION+
    f"""
        SELECT
            h3data.h3index,
            sum(h3data.value)
        FROM sourcing_location sl
        INNER JOIN sourcing_records sr ON sr."sourcingLocationId" = sl.id 
        INNER JOIN indicator_record ir ON ir."sourcingRecordId" = sr.id
        INNER JOIN material_to_h3 mth ON mth."materialId" = sl."materialId", -- remove when h3DataId gets updated
        LATERAL (
            SELECT
                h3index,
                ir.value/ir.scaler * value as value
            FROM get_h3_data_over_georegion(sl."geoRegionId", mth."h3DataId")
        ) h3data
        WHERE sr.year=2015
        GROUP by h3data.h3index"""
, conn)
df_impact_map

In [None]:
len(df_impact_map)

In [None]:
impact_json = []
for i,r in df_impact_map.iterrows():
    impact_json.append({
        'hexId': r['h3index'],
        'impact':float(r['sum'])
    })
print(len(impact_json))

In [None]:
#export json
with open('../../data/processed/h3_impact_distributes_v3.json', 'w') as f:
    json.dump(impact_json, f)