# Valle del Cauca Exports - Analysis

## Dependencies

In [None]:
import os
import sys
import duckdb

sys.path.append("../ingest")
import config # type: ignore

## Check for Files in Clean Data Directory

In [2]:
clean_data = "." + config.Local_Dir.Exports["clean"]
print(os.listdir(clean_data))

['clean_exports.parquet']


## Duckdb Relation

In [3]:
conn = duckdb.connect()
rel = conn.from_parquet(clean_data + config.datasets["exports"]["filename-clean"])

TODO:
- [ ] delete all registries where MODAD != 198.
- [ ] pad left zero all registries where POSAR length < 10 (this means the POSAR chapter < 10).
- [ ] delete non existing DPTO1 = 0.
- [ ] delete all columns except: COD_PAI4, DPTO1, POSAR, FOBPES, AGRENA.

## Dataset Summary

In [4]:
summary = rel.query(virtual_table_name="summary", sql_query="SUMMARIZE rel")
summary

┌─────────────┬─────────────┬───────────┬──────────────────┬───────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────┬─────────────────┐
│ column_name │ column_type │    min    │       max        │ approx_unique │        avg         │        std         │        q25         │        q50         │        q75         │ count  │ null_percentage │
│   varchar   │   varchar   │  varchar  │     varchar      │     int64     │      varchar       │      varchar       │      varchar       │      varchar       │      varchar       │ int64  │  decimal(9,2)   │
├─────────────┼─────────────┼───────────┼──────────────────┼───────────────┼────────────────────┼────────────────────┼────────────────────┼────────────────────┼────────────────────┼────────┼─────────────────┤
│ FECH        │ USMALLINT   │ 2301      │ 2312             │            12 │ 2306.6872045242226 │ 3.396179255833571  │ 2304               │ 2307               │ 231

## Top Departments by Exports Value (COP)

- Antioquia = 5.
- Cesar = 20.
- Bogotá = 11.
- Guajira = 44.
- Cundinamarca = 45.
- Valle del Cauca = 76.
- Bolivar = 13.
- Atlantico = 8.
- Santander = 68.
- Casanare = 85.

In [26]:
top_dptos = rel.query(
    virtual_table_name="top_dptos",
    sql_query="""SELECT DPTO1, SUM(FOBPES) TOTAL_FOBPES
    FROM rel
    WHERE MODAD = 198
    GROUP BY DPTO1
    HAVING DPTO1 <> 0
    ORDER BY TOTAL_FOBPES desc
    LIMIT 10;
    """
)
top_dptos

┌───────┬───────────────────┐
│ DPTO1 │   TOTAL_FOBPES    │
│ uint8 │      double       │
├───────┼───────────────────┤
│     5 │ 274317387795845.0 │
│    20 │ 162419056332081.0 │
│    11 │ 137139376881722.0 │
│    44 │  97596421873233.0 │
│    25 │  88872208360794.0 │
│    76 │  81881358057711.0 │
│    13 │  70931585086010.0 │
│     8 │  58637885734123.0 │
│    68 │  45422345286743.0 │
│    85 │  42147727224518.0 │
├───────┴───────────────────┤
│ 10 rows         2 columns │
└───────────────────────────┘

## Top Valle del Cauca Exported Goods by Value (COP)
- SUGARS AND SUGAR CONFECTIONERY = **17**.
- ELECTRICAL MACHINERY AND EQUIPMENT AND PARTS THEREOF; SOUND RECORDERS AND REPRODUCERS; TELEVISION IMAGE AND SOUND RECORDERS AND REPRODUCERS, PARTS AND ACCESSORIES OF SUCH ARTICLES = **85**.
- PHARMACEUTICAL PRODUCTS = **30**.
- ANIMAL OR VEGETABLE FATS AND OILS AND THEIR CLEAVAGE PRODUCTS; PREPARED ANIMAL FATS; ANIMAL OR VEGETABLE WAXES = **15**.
- RUBBER AND ARTICLES THEREOF = **40**.
- ORGANIC CHEMICALS = **29**.
- SOAP, ORGANIC SURFACE-ACTIVE AGENTS; WASHING, LUBRICATING, POLISHING OR SCOURING PREPARATIONS; ARTIFICIAL OR PREPARED WAXES, CANDLES AND SIMILAR ARTICLES, MODELLING PASTES, DENTAL WAXES AND DENTAL PREPARATIONS WITH A BASIS OF PLASTER = **34**.

In [25]:
top_valle_exports = rel.query(
    virtual_table_name="top_valle_exports",
    sql_query="""SELECT POSAR, SUM(FOBPES) TOTAL_FOBPES
    FROM rel
    WHERE DPTO1 = 76
    AND MODAD = 198
    AND POSAR <> 901119000
    GROUP BY POSAR
    ORDER BY TOTAL_FOBPES desc
    LIMIT 10;
    """
)
top_valle_exports

┌────────────┬─────────────────┐
│   POSAR    │  TOTAL_FOBPES   │
│   uint64   │     double      │
├────────────┼─────────────────┤
│ 1701999000 │ 5327639389373.0 │
│ 1704901000 │ 4488386164723.0 │
│ 8507100000 │ 3616734764075.0 │
│ 3004902900 │ 2879040231207.0 │
│ 1507909000 │ 2587768956666.0 │
│ 4011201000 │ 2225384669311.0 │
│ 8523520000 │ 1979038722351.0 │
│ 2918140000 │ 1954860744950.0 │
│ 1701140000 │ 1868642321006.0 │
│ 3401110000 │ 1392228264272.0 │
├────────────┴─────────────────┤
│ 10 rows            2 columns │
└──────────────────────────────┘

## Top Valle del Cauca Exported Goods by Agregated Value (COP)

- SUGARS AND SUGAR CONFECTIONERY = **17**.
- ORGANIC CHEMICALS = **29**.
- RUBBER AND ARTICLES THEREOF = **40**.
- ESSENTIAL OILS AND RESINOIDS; PERFUMERY, COSMETIC OR TOILET PREPARATIONS = **33**.
- ELECTRICAL MACHINERY AND EQUIPMENT AND PARTS THEREOF; SOUND RECORDERS AND REPRODUCERS; TELEVISION IMAGE AND SOUND RECORDERS AND REPRODUCERS, PARTS AND ACCESSORIES OF SUCH ARTICLES = **85**.

In [24]:
top_valle_agrena = rel.query(
    virtual_table_name="top_valle_agrena",
    sql_query="""SELECT POSAR, SUM(AGRENA) TOTAL_AGRENA
    FROM rel
    WHERE DPTO1 = 76
    AND MODAD = 198
    AND POSAR <> 901119000
    GROUP BY POSAR
    ORDER BY TOTAL_AGRENA desc
    LIMIT 10;
    """
)
top_valle_agrena

┌────────────┬──────────────┐
│   POSAR    │ TOTAL_AGRENA │
│   uint64   │    double    │
├────────────┼──────────────┤
│ 1704901000 │  798135554.0 │
│ 1701999000 │  489145670.0 │
│ 2918140000 │  477775397.0 │
│ 4011201000 │  415211786.0 │
│ 3004902900 │  254436279.0 │
│ 3306100000 │  240464356.0 │
│ 8523520000 │  230193767.0 │
│ 3304990000 │  199749003.0 │
│ 8544499020 │  154456470.0 │
│ 3306900000 │  150083670.0 │
├────────────┴──────────────┤
│ 10 rows         2 columns │
└───────────────────────────┘

## Top Valle del Cauca Exports Destination by Value (COP)

- United States = USA.
- Ecuador = ECU.
- Peru = PER.
- Chile = CHL.
- Mexico = MEX.
- China = CHN.
- Venezuela = VEN.
- Dominican Republic = DOM.
- Panama = PAN.
- Brazil = BRA.

In [23]:
top_valle_destinations = rel.query(
    virtual_table_name="top_valle_destinations",
    sql_query="""SELECT COD_PAI4, SUM(FOBPES) TOTAL_FOBPES
    FROM rel
    WHERE DPTO1 = 76
    AND MODAD = 198
    AND POSAR <> 901119000
    GROUP BY COD_PAI4
    ORDER BY TOTAL_FOBPES desc
    LIMIT 10;
    """
)
top_valle_destinations

┌──────────┬──────────────────┐
│ COD_PAI4 │   TOTAL_FOBPES   │
│ varchar  │      double      │
├──────────┼──────────────────┤
│ USA      │ 16424281381684.0 │
│ ECU      │ 11946354543325.0 │
│ PER      │  6793737554809.0 │
│ CHL      │  4703884940311.0 │
│ MEX      │  4480336657354.0 │
│ CHN      │  3936371561251.0 │
│ VEN      │  2434020564921.0 │
│ DOM      │  2387392178722.0 │
│ PAN      │  2275493878512.0 │
│ BRA      │  2262575454994.0 │
├──────────┴──────────────────┤
│ 10 rows           2 columns │
└─────────────────────────────┘

## Valle del Cauca Exports to Korea Value (COP)

In [21]:
value_exports_to_korea = rel.query(
    virtual_table_name="value_exports_to_korea",
    sql_query="""SELECT COD_PAI4, SUM(FOBPES) TOTAL_FOBPES
    FROM rel
    WHERE DPTO1 = 76
    AND COD_PAI4 = 'KOR'
    AND MODAD = 198
    AND POSAR <> 901119000
    GROUP BY COD_PAI4;
    """
)
value_exports_to_korea

┌──────────┬────────────────┐
│ COD_PAI4 │  TOTAL_FOBPES  │
│ varchar  │     double     │
├──────────┼────────────────┤
│ KOR      │ 430441599012.0 │
└──────────┴────────────────┘

## Top Valle del Cauca Exported Goods to Korea by Value (COP)

- COPPER AND ARTICLES THEREOF = **74**.
- PAPER AND PAPERBOARD; ARTICLES OF PAPER PULP, OF PAPER OR PAPERBOARD = **48**.
- PREPARATIONS OF VEGETABLES, FRUIT, NUTS OR OTHER PARTS OF PLANTS = **20**.
- SUGARS AND SUGAR CONFECTIONERY = **17**.
- ALUMINIUM AND ARTICLES THEREOF = **76**.

In [22]:
top_exports_to_korea = rel.query(
    virtual_table_name="top_exports_to_korea",
    sql_query="""SELECT POSAR, SUM(FOBPES) TOTAL_FOBPES
    FROM rel
    WHERE DPTO1 = 76
    AND COD_PAI4 = 'KOR'
    AND MODAD = 198
    AND POSAR <> 901119000
    GROUP BY POSAR
    ORDER BY TOTAL_FOBPES desc
    LIMIT 10;
    """
)
top_exports_to_korea

┌────────────┬────────────────┐
│   POSAR    │  TOTAL_FOBPES  │
│   uint64   │     double     │
├────────────┼────────────────┤
│ 7404000010 │ 221841854647.0 │
│ 4805939000 │  91760561314.0 │
│ 2008993000 │  26086622810.0 │
│ 1704901000 │  20952217511.0 │
│ 7602000000 │  12724974271.0 │
│ 4805929000 │  11742354754.0 │
│ 4802579000 │   8575497670.0 │
│ 4810990000 │   6798235878.0 │
│ 1701140000 │   6244112401.0 │
│ 1701130000 │   5354043940.0 │
├────────────┴────────────────┤
│ 10 rows           2 columns │
└─────────────────────────────┘