In [None]:
from utils.run_sql import run_sql

# grabbing data from asset table
asset_query = '''
    select ca.name country_name
        , ae.iso3_country
        , original_inventory_sector
        , to_char(start_time, 'YYYY-MM') as year_month
        , ae.gas
        , sum(activity) activity
        , avg(emissions_factor) mean_emissions_factor
        , sum(emissions_quantity) emissions_quantity

    from asset_emissions ae
    left join country_analysis ca
        on cast(ca.iso3_country as varchar) = cast(ae.iso3_country as varchar)

    where start_time >= '2022-02-01'
        and ae.gas = 'co2e_100yr'
        -- and original_inventory_sector in ('international-shipping','domestic-shipping','electricity-generation')
        and most_granular = true

    group by ca.name
        , ae.iso3_country
        , original_inventory_sector
        , to_char(start_time, 'YYYY-MM')
        , ae.gas
'''

asset_df = run_sql(asset_query)



In [None]:
import duckdb
import os
from dotenv import load_dotenv
from urllib.parse import quote_plus
from sqlalchemy import create_engine

load_dotenv()

# Build SQLAlchemy engine for PostgreSQL
user = quote_plus(os.getenv("CLIMATETRACE_USER"))
password = quote_plus(os.getenv("CLIMATETRACE_PASS"))
host = os.getenv("CLIMATETRACE_HOST")
port = os.getenv("CLIMATETRACE_PORT")
database = os.getenv("CLIMATETRACE_DB")

postgres_url = f"postgresql://{user}:{password}@{host}:{port}/{database}"
parquet_path = "data/asset_parquet/asset_emissions_most_granular.parquet"
os.makedirs("data/asset_parquet", exist_ok=True)


print("Running query and writing to parquet file, this may take an hour...")
# Use DuckDB to write directly from PostgreSQL to Parquet
con = duckdb.connect()
con.execute(f"""
    INSTALL postgres;
    LOAD postgres;

    CREATE TABLE asset_emissions_parquet AS
    SELECT ae.*,
           sch.sector,
           ca.name as country_name,
           ca.continent,
           ca.unfccc_annex,
           ca.em_finance,
           ca.eu,
           ca.oecd,
           ca.developed_un
    FROM postgres_scan('{postgres_url}', 'public', 'asset_emissions') ae
    LEFT JOIN postgres_scan('{postgres_url}', 'public', 'country_analysis') ca
        ON CAST(ca.iso3_country AS VARCHAR) = CAST(ae.iso3_country AS VARCHAR)
    LEFT JOIN (
        SELECT DISTINCT sector, subsector FROM postgres_scan('{postgres_url}', 'public', 'asset_schema')
    ) sch
        ON CAST(sch.subsector AS VARCHAR) = CAST(ae.original_inventory_sector AS VARCHAR)
    WHERE ae.start_time >= DATE '2022-02-01'
      AND ae.gas = 'co2e_100yr'
      AND ae.most_granular = TRUE;

    COPY asset_emissions_parquet TO '{parquet_path}' (FORMAT PARQUET);
""")
con.close()

print("Parquet file exported!")

In [3]:
import duckdb

con = duckdb.connect()

# Replace with your actual path if needed
parquet_path = "data/asset_parquet/asset_emissions_most_granular.parquet"

# Count how many rows you actually wrote
result = con.execute(f"SELECT count(*) FROM '{parquet_path}' ").df()


print(result)


   count_star()
0      34778764


In [4]:
from utils.utils import map_region_condition

print(map_region_condition('EU'))

{'column_name': 'eu', 'column_value': True}


In [None]:
# map_region_condition(region_selection)

test = map_region_condition('Asia')

print(test)
print(test['column_name'])
print(test['column_value'])