# Korea Imports - Transformation

## Dependencies

In [None]:
import os
import sys
import duckdb

    sys.path.append("../ingest")
import config  # type: ignore

## Duckdb Database

In [None]:
db_dir = config.Database.dir
db_filename = config.Database.filename
db_path = os.path.join(db_dir, db_filename)

ddb = duckdb.connect(database=db_path)

In [4]:
ddb.execute(
    """CREATE TABLE IF NOT EXISTS korea_imports
    AS SELECT * FROM '../data/korea-imports/clean/clean_koreaimports.parquet'"""
)

<duckdb.duckdb.DuckDBPyConnection at 0x7ab76a5c4bf0>

## Summary

In [5]:
ddb.sql("SUMMARIZE korea_imports")

┌──────────────┬─────────────┬───────────┬───────────────┬───────────────┬────────────────────┬────────────────────┬───────────────────┬──────────────────┬────────────────────┬───────┬─────────────────┐
│ column_name  │ column_type │    min    │      max      │ approx_unique │        avg         │        std         │        q25        │       q50        │        q75         │ count │ null_percentage │
│   varchar    │   varchar   │  varchar  │    varchar    │     int64     │      varchar       │      varchar       │      varchar      │     varchar      │      varchar       │ int64 │  decimal(9,2)   │
├──────────────┼─────────────┼───────────┼───────────────┼───────────────┼────────────────────┼────────────────────┼───────────────────┼──────────────────┼────────────────────┼───────┼─────────────────┤
│ partnerDesc  │ VARCHAR     │ Argentina │ World         │             5 │ NULL               │ NULL               │ NULL              │ NULL             │ NULL               │  5483 │    

## Top Korea Imports from World by Value (USD)

- MINERAL FUELS, MINERAL OILS AND PRODUCTS OF THEIR DISTILLATION; BITUMINOUS SUBSTANCES; MINERAL WAXES = **27**.
- ELECTRICAL MACHINERY AND EQUIPMENT AND PARTS THEREOF; SOUND RECORDERS AND REPRODUCERS; TELEVISION IMAGE AND SOUND RECORDERS AND REPRODUCERS, PARTS AND ACCESSORIES OF SUCH ARTICLES = **85**.
- NUCLEAR REACTORS, BOILERS, MACHINERY AND MECHANICAL APPLIANCES; PARTS THEREOF = **84**.
- VEHICLES; OTHER THAN RAILWAY OR TRAMWAY ROLLING STOCK, AND PARTS AND ACCESSORIES THEREOF = **87**.
- OPTICAL, PHOTOGRAPHIC, CINEMATOGRAPHIC, MEASURING, CHECKING, MEDICAL OR SURGICAL INSTRUMENTS AND APPARATUS; PARTS AND ACCESSORIES = **90**.
- INORGANIC CHEMICALS; ORGANIC AND INORGANIC COMPOUNDS OF PRECIOUS METALS; OF RARE EARTH METALS, OF RADIO-ACTIVE ELEMENTS AND OF ISOTOPES = **28**.
- ORES, SLAG AND ASH = **26**.
- IRON AND STEEL = **72**.
- ORGANIC CHEMICALS = **29**.
- PLASTICS AND ARTICLES THEREOF = **39**.

In [6]:
ddb.execute(
    """CREATE VIEW IF NOT EXISTS top_korea_imports
    AS SELECT cmdCode, SUM(primaryValue) totalValue
    FROM korea_imports
    WHERE partnerDesc = 'World'
    GROUP BY cmdCode
    ORDER BY totalValue desc;"""
)

<duckdb.duckdb.DuckDBPyConnection at 0x7ab76a5c4bf0>

In [7]:
ddb.sql("SELECT * FROM top_korea_imports")

┌─────────┬────────────────┐
│ cmdCode │   totalValue   │
│  uint8  │     double     │
├─────────┼────────────────┤
│      27 │ 171354533642.0 │
│      85 │ 119478238633.0 │
│      84 │  63072733926.0 │
│      87 │  22134904666.0 │
│      90 │  21958896343.0 │
│      28 │  21516316779.0 │
│      26 │  19707452376.0 │
│      72 │  16471724419.0 │
│      29 │  13572871176.0 │
│      39 │  12710272397.0 │
│       · │          ·     │
│       · │          ·     │
│       · │          ·     │
│      14 │     88596298.0 │
│      58 │     85992982.0 │
│       1 │     81420586.0 │
│      53 │     60518334.0 │
│      57 │     58638446.0 │
│      36 │     54708395.0 │
│      50 │     34511657.0 │
│      46 │     27726945.0 │
│      45 │      5077975.0 │
│      99 │          321.0 │
├─────────┴────────────────┤
│    97 rows (20 shown)    │
└──────────────────────────┘

## Top Korea Import from China and USA by Value (USD)

- ELECTRICAL MACHINERY AND EQUIPMENT AND PARTS THEREOF; SOUND RECORDERS AND REPRODUCERS; TELEVISION IMAGE AND SOUND RECORDERS AND REPRODUCERS, PARTS AND ACCESSORIES OF SUCH ARTICLES = **85**.
- NUCLEAR REACTORS, BOILERS, MACHINERY AND MECHANICAL APPLIANCES; PARTS THEREOF = **84**.
- MINERAL FUELS, MINERAL OILS AND PRODUCTS OF THEIR DISTILLATION; BITUMINOUS SUBSTANCES; MINERAL WAXES = **27**.
- INORGANIC CHEMICALS; ORGANIC AND INORGANIC COMPOUNDS OF PRECIOUS METALS; OF RARE EARTH METALS, OF RADIO-ACTIVE ELEMENTS AND OF ISOTOPES = **28**.
- OPTICAL, PHOTOGRAPHIC, CINEMATOGRAPHIC, MEASURING, CHECKING, MEDICAL OR SURGICAL INSTRUMENTS AND APPARATUS; PARTS AND ACCESSORIES = **90**.
- VEHICLES; OTHER THAN RAILWAY OR TRAMWAY ROLLING STOCK, AND PARTS AND ACCESSORIES THEREOF = **87**.
- IRON AND STEEL = **72**.
- ORGANIC CHEMICALS = **29**.
- PLASTICS AND ARTICLES THEREOF = **39**.
- IRON OR STEEL ARTICLES = **73**.

In [8]:
ddb.execute(
    """CREATE VIEW IF NOT EXISTS top_korea_imports_from_main_partners
    AS SELECT cmdCode, SUM(primaryValue) totalValue
    FROM korea_imports
    WHERE partnerDesc IN ('China', 'USA')
    GROUP BY cmdCode
    ORDER BY totalValue desc;"""
)

<duckdb.duckdb.DuckDBPyConnection at 0x7ab76a5c4bf0>

In [9]:
ddb.sql("SELECT * FROM top_korea_imports_from_main_partners")

┌─────────┬───────────────┐
│ cmdCode │  totalValue   │
│  uint8  │    double     │
├─────────┼───────────────┤
│      85 │ 56509198848.0 │
│      84 │ 25607072932.0 │
│      27 │ 23195785444.0 │
│      28 │ 14376306897.0 │
│      90 │  8352982797.0 │
│      87 │  7423606011.0 │
│      72 │  6660043279.0 │
│      29 │  6472001586.0 │
│      39 │  6331655357.0 │
│      73 │  5087310595.0 │
│       · │         ·     │
│       · │         ·     │
│       · │         ·     │
│      58 │    50395840.0 │
│      53 │    42824215.0 │
│       1 │    42481606.0 │
│      36 │    37205119.0 │
│      50 │    28858375.0 │
│      14 │    16292780.0 │
│      46 │    14369052.0 │
│      79 │     8344773.0 │
│      78 │     5410246.0 │
│      45 │     2137698.0 │
├─────────┴───────────────┤
│   95 rows (20 shown)    │
└─────────────────────────┘

## Top Korea Imports form Iberoamerican Partners by Value (USD)

- ORES, SLAG AND ASH = **26**.
- MINERAL FUELS, MINERAL OILS AND PRODUCTS OF THEIR DISTILLATION; BITUMINOUS SUBSTANCES; MINERAL WAXES = **27**.
- INORGANIC CHEMICALS; ORGANIC AND INORGANIC COMPOUNDS OF PRECIOUS METALS; OF RARE EARTH METALS, OF RADIO-ACTIVE ELEMENTS AND OF ISOTOPES = **28**.
- CEREALS = **10**.
- COPPER AND ARTICLES THEREOF = **74**.
- NUCLEAR REACTORS, BOILERS, MACHINERY AND MECHANICAL APPLIANCES; PARTS THEREOF = **84**.
- VEHICLES; OTHER THAN RAILWAY OR TRAMWAY ROLLING STOCK, AND PARTS AND ACCESSORIES THEREOF = **87**.
- FOOD INDUSTRIES, RESIDUES AND WASTES THEREOF; PREPARED ANIMAL FODDER = **23**.
- ELECTRICAL MACHINERY AND EQUIPMENT AND PARTS THEREOF; SOUND RECORDERS AND REPRODUCERS; TELEVISION IMAGE AND SOUND RECORDERS AND REPRODUCERS, PARTS AND ACCESSORIES OF SUCH ARTICLES = **85**.
- MEAT AND EDIBLE MEAT OFFAL = **02**.



In [10]:
ddb.execute(
    """CREATE VIEW IF NOT EXISTS top_korea_imports_from_iberoamerica
    AS SELECT cmdCode, SUM(primaryValue) totalValue
    FROM korea_imports
    WHERE partnerDesc NOT IN ('China', 'USA', 'World', 'Colombia')
    GROUP BY cmdCode
    ORDER BY totalValue desc;"""
)

<duckdb.duckdb.DuckDBPyConnection at 0x7ab76a5c4bf0>

In [11]:
ddb.sql("SELECT * FROM top_korea_imports_from_iberoamerica")

┌─────────┬──────────────┐
│ cmdCode │  totalValue  │
│  uint8  │    double    │
├─────────┼──────────────┤
│      26 │ 4361260632.0 │
│      27 │ 3989716153.0 │
│      28 │ 3327486088.0 │
│      10 │ 1979956803.0 │
│      74 │ 1889418782.0 │
│      84 │ 1149563256.0 │
│      87 │  789402179.0 │
│      23 │  720078555.0 │
│      85 │  716490028.0 │
│       2 │  693113371.0 │
│       · │         ·    │
│       · │         ·    │
│       · │         ·    │
│      91 │      24930.0 │
│      93 │      15064.0 │
│      43 │      11480.0 │
│      53 │       9474.0 │
│      37 │       7088.0 │
│      36 │       6354.0 │
│      46 │       4009.0 │
│      80 │        429.0 │
│      66 │        381.0 │
│      45 │         62.0 │
├─────────┴──────────────┤
│   94 rows (20 shown)   │
└────────────────────────┘

## Top Korea Imports from Colombia by Value (USD)

- MINERAL FUELS, MINERAL OILS AND PRODUCTS OF THEIR DISTILLATION; BITUMINOUS SUBSTANCES; MINERAL WAXES = **27**.
- COFFEE, TEA, MATE AND SPICES = **09**.
- IRON AND STEEL = **72**.
- COPPER AND ARTICLES THEREOF = **74**.
- TREES AND OTHER PLANTS, LIVE; BULBS, ROOTS AND THE LIKE; CUT FLOWERS AND ORNAMENTAL FOLIAGE = **06**.
- MISCELLANEOUS EDIBLE PREPARATIONS = **21**.
- CHEMICAL PRODUCTS N.E.C. = **38**.
- FRUIT AND NUTS, EDIBLE; PEEL OF CITRUS FRUIT OR MELONS = **08**.
- ORES, SLAG AND ASH = **26**.
- ALUMINIUM AND ARTICLES THEREOF = **76**.

In [12]:
ddb.execute(
    """CREATE VIEW IF NOT EXISTS top_korea_imports_from_colombia
    AS SELECT cmdCode, SUM(primaryValue) totalValue
    FROM korea_imports
    WHERE partnerDesc NOT IN ('China', 'USA', 'World', 'Colombia')
    GROUP BY cmdCode
    ORDER BY totalValue desc;"""
)

<duckdb.duckdb.DuckDBPyConnection at 0x7ab76a5c4bf0>

In [13]:
ddb.sql("SELECT * FROM top_korea_imports_from_colombia")

┌─────────┬──────────────┐
│ cmdCode │  totalValue  │
│  uint8  │    double    │
├─────────┼──────────────┤
│      26 │ 4361260632.0 │
│      27 │ 3989716153.0 │
│      28 │ 3327486088.0 │
│      10 │ 1979956803.0 │
│      74 │ 1889418782.0 │
│      84 │ 1149563256.0 │
│      87 │  789402179.0 │
│      23 │  720078555.0 │
│      85 │  716490028.0 │
│       2 │  693113371.0 │
│       · │         ·    │
│       · │         ·    │
│       · │         ·    │
│      91 │      24930.0 │
│      93 │      15064.0 │
│      43 │      11480.0 │
│      53 │       9474.0 │
│      37 │       7088.0 │
│      36 │       6354.0 │
│      46 │       4009.0 │
│      80 │        429.0 │
│      66 │        381.0 │
│      45 │         62.0 │
├─────────┴──────────────┤
│   94 rows (20 shown)   │
└────────────────────────┘

## Check all Tables

In [14]:
ddb.sql("SHOW ALL TABLES")

┌──────────┬─────────┬──────────────────────────────────────┬──────────────────────────────────────┬─────────────────────────────┬───────────┐
│ database │ schema  │                 name                 │             column_names             │        column_types         │ temporary │
│ varchar  │ varchar │               varchar                │              varchar[]               │          varchar[]          │  boolean  │
├──────────┼─────────┼──────────────────────────────────────┼──────────────────────────────────────┼─────────────────────────────┼───────────┤
│ db       │ main    │ korea_imports                        │ [partnerDesc, cmdCode, primaryValue] │ [VARCHAR, UTINYINT, DOUBLE] │ false     │
│ db       │ main    │ top_korea_imports                    │ [cmdCode, totalValue]                │ [UTINYINT, DOUBLE]          │ false     │
│ db       │ main    │ top_korea_imports_from_colombia      │ [cmdCode, totalValue]                │ [UTINYINT, DOUBLE]          │ false     │

## Close Connection

In [16]:
ddb.close()