# Korea Imports - Analysis

## Dependencies

In [4]:
import os
import sys
import duckdb

sys.path.append("../ingest")
import config # type: ignore

## Check for Files in Clean Data Directory

In [5]:
clean_data = "." + config.Local_Dir.Korea_Imports["clean"]
print(os.listdir(clean_data))

['clean_koreaimports.parquet']


## Duckdb Relation

In [None]:
conn = duckdb.connect()
rel = conn.from_parquet(clean_data + config.datasets["korea-imports"]["filename-clean"])

TODO:
- [ ] delete cmdDesc column.
- [ ] pad left zero all registries where cmdCode length < 10 (this means the POSAR chapter < 10).

## Dataset Summary

In [7]:
summary = rel.query(virtual_table_name="summary", sql_query="SUMMARIZE rel")
summary

┌──────────────┬─────────────┬─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬────────────────────────────────────────────────────────┬───────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬───────────────────┬───────┬─────────────────┐
│ column_name  │ column_type │                                                                                         min                                                                                         │                          max                           │ approx_unique │        avg         │        std         │        q25         │        q50         │        q75        │ count │ null_percentage │
│   varchar    │   varchar   │                                                                                       varchar                                            

## Top Korea Imports from World by Value (USD)

- MINERAL FUELS, MINERAL OILS AND PRODUCTS OF THEIR DISTILLATION; BITUMINOUS SUBSTANCES; MINERAL WAXES = **27**.
- ELECTRICAL MACHINERY AND EQUIPMENT AND PARTS THEREOF; SOUND RECORDERS AND REPRODUCERS; TELEVISION IMAGE AND SOUND RECORDERS AND REPRODUCERS, PARTS AND ACCESSORIES OF SUCH ARTICLES = **85**.
- NUCLEAR REACTORS, BOILERS, MACHINERY AND MECHANICAL APPLIANCES; PARTS THEREOF = **84**.
- VEHICLES; OTHER THAN RAILWAY OR TRAMWAY ROLLING STOCK, AND PARTS AND ACCESSORIES THEREOF = **87**.
- INORGANIC CHEMICALS; ORGANIC AND INORGANIC COMPOUNDS OF PRECIOUS METALS; OF RARE EARTH METALS, OF RADIO-ACTIVE ELEMENTS AND OF ISOTOPES = **28**.

In [8]:
top_world_imports = rel.query(
    virtual_table_name="top_world_imports",
    sql_query="""SELECT cmdCode, SUM(primaryValue) totalValue
    FROM rel
    WHERE partnerDesc = 'World'
    GROUP BY cmdCode
    ORDER BY totalValue desc
    LIMIT 10;"""
)
top_world_imports

┌─────────┬───────────────┐
│ cmdCode │  totalValue   │
│ uint16  │    double     │
├─────────┼───────────────┤
│    2709 │ 86158418645.0 │
│    8542 │ 51685861319.0 │
│    2711 │ 41178396917.0 │
│    2710 │ 22926029085.0 │
│    2701 │ 20072226492.0 │
│    8486 │ 17336366723.0 │
│    8703 │ 14508706889.0 │
│    8517 │ 10954523971.0 │
│    2825 │ 10555594389.0 │
│    8507 │  8830420574.0 │
├─────────┴───────────────┤
│ 10 rows       2 columns │
└─────────────────────────┘

## Top Korea Import from Selected Partners by Value (USD)

In [11]:
top_partners_imports = rel.query(
    virtual_table_name="top_partners_imports",
    sql_query="""SELECT partnerDesc, cmdCode, SUM(primaryValue) totalValue
    FROM rel
    WHERE partnerDesc <> 'World'
    GROUP BY partnerDesc, cmdCode
    ORDER BY totalValue desc
    LIMIT 10;"""
)
top_partners_imports

┌─────────────┬─────────┬───────────────┐
│ partnerDesc │ cmdCode │  totalValue   │
│   varchar   │ uint16  │    double     │
├─────────────┼─────────┼───────────────┤
│ China       │    8542 │ 16816793018.0 │
│ USA         │    2709 │ 12316950059.0 │
│ China       │    2825 │  9033981475.0 │
│ USA         │    2711 │  8603559588.0 │
│ China       │    8507 │  8336929246.0 │
│ China       │    8517 │  5664000491.0 │
│ China       │    8471 │  4338042307.0 │
│ USA         │    8486 │  3882385885.0 │
│ USA         │    8703 │  3142367114.0 │
│ China       │    2841 │  2516297865.0 │
├─────────────┴─────────┴───────────────┤
│ 10 rows                     3 columns │
└───────────────────────────────────────┘

## Top Korea Imports From Colombia by Value (USD)

In [14]:
top_colombia_imports = rel.query(
    virtual_table_name="top_colombia_imports",
    sql_query="""SELECT partnerDesc, cmdCode, SUM(primaryValue) totalValue
    FROM rel
    WHERE partnerDesc = 'Colombia'
    GROUP BY partnerDesc, cmdCode
    ORDER BY totalValue desc
    LIMIT 10;"""
)
top_colombia_imports

┌─────────────┬─────────┬─────────────┐
│ partnerDesc │ cmdCode │ totalValue  │
│   varchar   │ uint16  │   double    │
├─────────────┼─────────┼─────────────┤
│ Colombia    │    2701 │ 736306973.0 │
│ Colombia    │     901 │ 153016607.0 │
│ Colombia    │    7202 │  52079479.0 │
│ Colombia    │    7404 │  33488166.0 │
│ Colombia    │     603 │  27355414.0 │
│ Colombia    │    2101 │  13060543.0 │
│ Colombia    │    3808 │  12556460.0 │
│ Colombia    │     803 │   6569164.0 │
│ Colombia    │    7602 │   5004662.0 │
│ Colombia    │    2607 │   4807759.0 │
├─────────────┴─────────┴─────────────┤
│ 10 rows                   3 columns │
└─────────────────────────────────────┘