This notebook is to download a COMPACT dataset from Comtrade with your own specifications

Follow these steps to run your data requests:

    First Time Users: You will need to install a few python packages:

        1. Run the first cell below, < pip install -r requirements.txt > 
        2. Restart the kernel 

     If you've already used the Compactor, then proceed with the following steps:

        1. Click Run -> Run All Cells
        2. Wait a few seconds, while a data request form is generated
        3. Fill in the data request form
        4. Click submit and wait a few seconds while your data is compacted for you :) 
        5. Your data will be downloaded within the compactor_output folder. Navigate to
           the requested Classification Code and Year to retrieve your data file. 
    
Troubleshoot: If the notebook is "stuck" and not generating information for more than a few seconds, then try:
    
    1. Restarting the kernel (generates a fresh slate) Go to Kernel ->  Restart Kernel & Run All 


In [9]:
%%capture captured_output
!pip install -r '/n/holystore01/LABS/hausmann_lab/lab/dev_notebooks/comtrade_compactor_requirements.txt'
# !pip install jupyter_black

In [2]:
# %load_ext jupyter_black

In [3]:
# %load_ext line_profiler

In [4]:
import logging
import gzip
import glob
import os.path
import pandas as pd
import comtradeapicall
import time
from datetime import date, timedelta, datetime

import pickle
import ipywidgets as widgets
from IPython.display import display, clear_output
from jupyter_ui_poll import ui_events

In [5]:
# DATA REQUEST FORM - FrontEnd

# Get values for dropdowns

# classificationCode
classification_code_options = ["H0 (HS-92)", "H4 (HS-12)", "SITC"]

# reporterCode
reporter_df = comtradeapicall.getReference("reporter")
reporter_df = reporter_df[reporter_df["isGroup"] == False][
    ["reporterCode", "reporterCodeIsoAlpha3"]
]
reporter_df = reporter_df.rename(columns={"reporterCodeIsoAlpha3": "reporterISO3"})
iso3_reporter_options = reporter_df["reporterISO3"].drop_duplicates().tolist()

# partnerCode
partner_df = comtradeapicall.getReference("partner")
partner_df = partner_df[partner_df["isGroup"] == False]
# remove groupings with exception of Other Asia, nes since Comtrade has relayed this can be treated as Taiwan
partner_df = partner_df[
    ~partner_df["text"].str.contains(" nes")
    | partner_df["text"].str.contains("Other Asia,")
]
partner_df = partner_df[["PartnerCode", "PartnerCodeIsoAlpha3"]].rename(
    columns={"PartnerCode": "partnerCode", "PartnerCodeIsoAlpha3": "partnerISO3"}
)
iso3_partner_options = partner_df["partnerISO3"].drop_duplicates().tolist()

reexports_options = ["No", "Yes"]

iso3_reporter_options = ["All"] + sorted(iso3_reporter_options)
iso3_partner_options = ["All"] + ["World"] + sorted(iso3_partner_options)

flow_codes_options = [
    " ",
    "Import",
    "Export",
    "Domestic Export",
    "Foreign Import",
    "Import of goods for inward processing",
    "Import of goods after outward processing",
    "Re-import",
    "Re-export",
    "Export of goods after inward processing",
    "Export of goods for outward processing",
]
mode_of_transport_options = [
    " ",
    "TOTAL modes of transport",
    "Air",
    "Water",
    "Sea",
    "Inland waterway",
    "Water, not else classified",
    "Land",
    "Railway",
    "Road",
    "Land, not else classified",
    "Not elsewhere classified",
    "Pipelines and cables",
    "Pipelines",
    "Cables",
    "Pipelines and cables, not else classified",
    "Postal consignments, mail or courier shipment",
    "Self propelled goods",
    "Other",
]
customs_code_options = [
    " ",
    "TOTAL customs procedure codes",
    "Clearance for home use",
    "Reimportation in the same state",
    "Outright exportation",
    "Customs warehouses",
    "Free zone",
    "Inward processing",
    "Outward processing",
    "Drawback",
    "Processing of goods for home use",
    "Carriage of goods coastwise",
    "Customs offences",
    "Travellers",
    "Postal traffic",
    "Stores",
    "Relief consignments",
    "CPC N.E.S.",
]

digit_level_options = [0, 2, 4, 6]
additional_data_columns = [
    "typeCode",
    "freqCode",
    "mosCode",
    "qtyUnitCode",
    "isQtyEstimated",
    "altQtyUnitCode",
    "altQty",
    "isAltQtyEstimated",
    "netWgt",
    "isNetWgtEstimated",
    "grossWgt",
    "isGrossWgtEstimated",
    "CIFValue",
    "FOBValue",
    "legacyEstimationFlag",
    "isReported",
    "isAggregate",
]
data_format_options = ["csv", "dta", "parquet"]


# Create text input widgets for user's data request
start_year_widget = widgets.IntText(
    description="Start year:", style={"description_width": "initial"}
)
end_year_widget = widgets.IntText(
    description="End year:", style={"description_width": "initial"}
)
classification_code_widget = widgets.Dropdown(
    options=classification_code_options,
    description="Classification Code:",
    style={"description_width": "initial"},
)
reporter_iso3_widget = widgets.SelectMultiple(
    options=iso3_reporter_options,
    value=["All"],
    description="ISO3 Code for Reporting Country:",
    style={"description_width": "initial"},
)
partner_iso3_widget = widgets.SelectMultiple(
    options=iso3_partner_options,
    value=["All"],
    description="ISO3 Code for Partner Country:",
    style={"description_width": "initial"},
)
reexports_widget = widgets.Dropdown(
    options=reexports_options,
    description=f"Show Partner2Code country level. \n Selecting No shows the aggregated data for the World:",
    style={"description_width": "initial"},
)
commodity_code_widget = widgets.Text(
    description="Commodity Code(s):", style={"description_width": "initial"}
)
mot_widget = widgets.SelectMultiple(
    options=mode_of_transport_options,
    description="Mode of Transport:",
    style={"description_width": "initial"},
)
customs_widget = widgets.SelectMultiple(
    options=customs_code_options,
    description="Customs Codes",
    style={"description_width": "initial"},
)
flow_codes_widget = widgets.SelectMultiple(
    options=flow_codes_options,
    description="Flow Codes:",
    style={"description_width": "initial"},
)
digit_level_widget = widgets.SelectMultiple(
    options=digit_level_options,
    value=[0],
    description="Commodity Digit Level(s):",
    style={"description_width": "initial"},
)
additional_cols_widget = widgets.SelectMultiple(
    options=additional_data_columns,
    description="Requested additional columns:",
    style={"description_width": "initial"},
)
data_format_widget = widgets.Dropdown(
    options=data_format_options,
    description="Requested data format:",
    style={"description_width": "initial"},
)

# Create a button widget for submitting the request
submit_button = widgets.Button(description="Submit Request")

In [6]:
# DISPLAYS FORM - FrontEnd
form = widgets.VBox(
    [
        start_year_widget,
        end_year_widget,
        classification_code_widget,
        reporter_iso3_widget,
        partner_iso3_widget,
        commodity_code_widget,
        reexports_widget,
        mot_widget,
        customs_widget,
        flow_codes_widget,
        digit_level_widget,
        additional_cols_widget,
        data_format_widget,
        submit_button,
    ]
)

output_widget = widgets.Output()


# form updates based on user's reporter/partner country selection
def on_value_change(change):
    with output_widget:
        clear_output()
        logging.info("For Windows hold down Control to select multiple options")
        logging.info("For Mac users hold down Command to select multiple options")
        logging.info(" ")
        logging.info("Requested Countries: ")
        logging.info(f"Reporters: {reporter_iso3_widget.value}")
        logging.info(f"Partners: {partner_iso3_widget.value}")


# Attach the function to the widget's value change event
reporter_iso3_widget.observe(on_value_change, "value")
partner_iso3_widget.observe(on_value_change, "value")

# Display the widgets
display(output_widget)

# Display the form in the notebook
display(form)

Output()

VBox(children=(IntText(value=0, description='Start year:', style=DescriptionStyle(description_width='initial')…

In [7]:
# Translate UserInput into Comtrade API language
CLASSIFICATION_CODES = {"H0 (HS-92)": "H0", "H4 (HS-12)": "H4", "SITC": "SITC"}

FLOW_CODE_OPTIONS = {
    "Import": "M",
    "Export": "X",
    "Domestic Export": "DX",
    "Foreign Import": "FM",
    "Import of goods for inward processing": "MIP",
    "Import of goods after outward processing": "MOP",
    "Re-import": "RM",
    "Re-export": "RX",
    "Export of goods after inward processing": "XIP",
    "Export of goods for outward processing": "XOP",
}

MOT_OPTIONS = {
    "TOTAL modes of transport": "0",
    "Air": "1000",
    "Water": "2000",
    "Sea": "2100",
    "Inland waterway": "2200",
    "Water, not else classified": "2900",
    "Land": "3000",
    "Railway": "3100",
    "Road": "3200",
    "Land, not else classified": "3900",
    "Not elsewhere classified": "9000",
    "Pipelines and cables": "9100",
    "Pipelines": "9110",
    "Cables": "9120",
    "Pipelines and cables, not else classified": "9190",
    "Postal consignments, mail or courier shipment": "9200",
    "Self propelled goods": "9300",
    "Other": "9900",
}

CUSTOMS_OPTIONS = {
    "TOTAL customs procedure codes": "C00",
    "Clearance for home use": "C01",
    "Reimportation in the same state": "C02",
    "Outright exportation": "C03",
    "Customs warehouses": "C04",
    "Free zone": "C05",
    "Inward processing": "C06",
    "Outward processing": "C07",
    "Drawback": "C08",
    "Processing of goods for home use": "C09",
    "Carriage of goods coastwise": "C10",
    "Customs offences": "C11",
    "Travellers": "C12",
    "Postal traffic": "C13",
    "Stores": "C14",
    "Relief consignments": "C15",
    "CPC N.E.S.": "C20",
}

In [None]:
# Data Comtrade Compactor Object
logging.basicConfig(level=logging.INFO)


class ComtradeCompactor(object):

    GROUP_REPORTERS = {"EU": '097', "ASEAN": '975'}
    
    def __init__(
        self,
        classification_code,
        start_year,
        end_year,
        reporter_iso3_codes=[],
        partner_iso3_codes=[],
        is_show_reexport=[],
        commodity_codes=[],
        modes_of_transport=[],
        customs_codes=[],
        flow_codes=[],
        digit_level=0,
        additional_requested_cols=[],
        data_format="csv",
    ):
        self.columns = [
            "period",
            "reporterCode",
            "partnerCode",
            "partner2Code",
            "flowCode",
            "classificationCode",
            "cmdCode",
            "motCode",
            "customsCode",
            "qty",
            "primaryValue",
        ] + list(additional_requested_cols)

        additional_col_dtypes = {
            "typeCode": "str",
            "freqCode": "str",
            "mosCode": "str",
            "qtyUnitCode": "str",
            "isQtyEstimated": "bool",
            "atlQtyUnitCode": "str",
            "altQty": "float",
            "isAltQtyEstimated": "bool",
            "netWgt": "float",
            "isNetWgtEstimated": "bool",
            "grossWgt": "float",
            "isGrossWgtEstimated": "bool",
            "CIFValue": "float",
            "FOBValue": "float",
            "legacyEstimationFlag": "bool",
            "isReported": "bool",
            "isAggregate": "bool",
        }

        self.dtypes_dict = {
            "period": "int",
            "reporterCode": "int",
            "flowCode": "str",
            "partnerCode": "int",
            "classificationCode": "str",
            "cmdCode": "str",
            "motCode": "str",
            "customsCode": "str",
            "qty": "float",
            "primaryValue": "float",
        }

        for col in additional_requested_cols:
            dtype = additional_col_dtypes.get(col)
            if dtype:
                self.dtypes_dict[col] = dtype

        self.classification_code = CLASSIFICATION_CODES[classification_code]
        # TODO: ADD CASES BASED ON CLASSIFICATION CODE SELECTED FOR EACH YEAR
        self.start_year = start_year
        self.end_year = end_year
        self.years = range(self.start_year, self.end_year + 1)

        self.src_dir = os.path.join(
                "/n/hausmann_lab/lab/atlas/data/raw", self.classification_code,
            )
        
        self.output_dir = os.path.join(
            "/n/hausmann_lab/lab/*data_tools_for_GL/compactor_output",
            os.environ.get("USER"),
            self.classification_code,
        )
        os.makedirs(self.output_dir, exist_ok=True)

        self.run_time = time.strftime("%Y-%m-%d_%H_%M_%S", time.gmtime())

        # transform iso3_codes into reporterCodes
        reporters_df = comtradeapicall.getReference("reporter")
        reporters_df = reporters_df[reporters_df["isGroup"] == False][
            ["reporterCode", "reporterCodeIsoAlpha3"]
        ].rename(columns={"reporterCodeIsoAlpha3": "reporterISO3"})
        if "All" in reporter_iso3_codes:
            self.reporter_iso3s = [reporters_df["reporterISO3"].tolist()]
            self.reporter_codes = [reporters_df["reporterCode"].tolist()]
            # self.reporter_codes = [-1]
            self.requests_all_reporters = True
        else:
            reporter_codes = reporters_df[
                reporters_df["reporterISO3"].isin(reporter_iso3_codes)
            ]
            self.reporter_iso3s = reporter_codes["reporterISO3"].tolist()
            self.reporter_codes = reporter_codes["reporterCode"].tolist()
            self.requests_all_reporters = False

        # transform given iso3_codes into partnerCodes
        partners_df = comtradeapicall.getReference("partner")
        partners_df = partners_df[partners_df["isGroup"] == False][
            ["PartnerCode", "PartnerCodeIsoAlpha3"]
        ].rename(
            columns={
                "PartnerCode": "partnerCode",
                "PartnerCodeIsoAlpha3": "partnerISO3",
            }
        )

        if "All" in partner_iso3_codes and "World" in partner_iso3_codes:
            logging.info(
                "Requested All partners and world. World duplicates the total primary value"
            )
            self.partner_iso3s = partners_df["partnerISO3"].tolist()
            self.partner_codes = partners_df["partnerCode"].tolist()
            # self.partner_codes = [-2]
            self.requests_all_and_world_partners = True
            self.requests_all_partners = True
        elif "All" in partner_iso3_codes:
            logging.info("requested All partners")
            self.partner_iso3s = partners_df["partnerISO3"].tolist()
            self.partner_codes = partners_df[partners_df.partnerCode != "World"][
                "partnerCode"
            ].tolist()
            # self.partner_codes = [-1]
            self.requests_all_and_world_partners = False
            self.requests_all_partners = True
        else:
            partner_codes = partners_df[
                partners_df["partnerISO3"].isin(partner_iso3_codes)
            ]
            self.partner_iso3s = partner_codes["partnerISO3"].tolist()
            self.partner_codes = partner_codes["partnerCode"].tolist()
            self.requests_all_and_world_partners = False
            self.requests_all_partners = False

        # setup filter parameters
        self.filters = {}
        # if partner2Code detail is not requested filter to world
        if is_show_reexport == "Yes":
            self.filters["partner2Code"] = partners_df[
                partners_df.partnerCode != "World"
            ]["partnerCode"].tolist()
        else:
            self.filters["partner2Code"] = [0]
        self.filters["partnerCode"] = self.partner_codes
        if commodity_codes == "":
            self.filters["cmdCode"] = []
        elif len(commodity_codes) == 1:
            self.filters["cmdCode"] = [commodity_codes]
        else:
            commodity_codes = commodity_codes.split(",")
            commodity_codes = [cmd.strip() for cmd in commodity_codes]
            self.filters["cmdCode"] = commodity_codes
        modes_of_transport = [item for item in modes_of_transport if item != " "]

        if modes_of_transport:
            mot_ids = []
            for id in list(modes_of_transport):
                mot_ids.append(MOT_OPTIONS[id])
                self.filters["motCode"] = mot_ids
        else:
            # default to filter for total
            self.filters["motCode"] = ["0"]

        customs_codes = [item for item in customs_codes if item != " "]
        if customs_codes:
            customs_ids = []
            for id in list(customs_codes):
                customs_ids.append(CUSTOMS_OPTIONS[id])
                self.filters["customsCode"] = customs_ids
        else:
            # default to filter for total
            self.filters["customsCode"] = ["C00"]

        flow_codes = [item for item in flow_codes if item != " "]
        if flow_codes:
            flow_codes_abbrv = []
            for flow_code in list(flow_codes):
                flow_codes_abbrv.append(FLOW_CODE_OPTIONS[flow_code])
                self.filters["flowCode"] = flow_codes_abbrv
        else:
            self.filters["flowCode"] = []

        self.filters["digitLevel"] = list(digit_level)

        self.data_format = data_format

    def compact(self):
        """
        Runs steps to extract user requested data from the raw Comtrade data files
        on the cluster. Writes output file to a user output file in requested data format

        Input:
            ComtradeCompactor (obj)
        """
        final_df = pd.DataFrame(columns=self.columns)
        final_df = final_df.astype(self.dtypes_dict)
        logging.info(f"Requested data at {datetime.now()}")
        logging.info("Querying the data for")
        if self.requests_all_reporters:
            # if [-1] == self.reporter_codes:
            logging.info("all Reporters")
        else:
            logging.info(f"Reporters: {self.reporter_iso3s}")

        if self.requests_all_partners and self.requests_all_and_world_partners:
            # elif [-2] == self.partner_codes:
            logging.info("all Partners and the World")
        elif self.requests_all_partners:
            # if [-1] == self.partner_codes:
            logging.info("all Partners")
        else:
            logging.info(f"Partners: {self.partner_iso3s}")
        show_filters = {
            key: value for key, value in self.filters.items() if key != "partnerCode"
        }
        logging.info(f"Filtering for {show_filters}")
        logging.info(f"In the following years {self.start_year} - {self.end_year}.")

        query_statement = self.generate_filter()

        for year in self.years:
            df = self.get_df_by_year(year, query_statement)
            if df.empty:
                logging.info(
                    f"No requested {self.classification_code} data for {year}."
                )
                continue
            clean_df = self.clean(df)
            final_df = pd.concat([final_df, clean_df], axis=0)
        outpath = os.path.join(
            self.output_dir,
            f"{os.environ.get('USER')}_"
            + f"{self.classification_code}_"
            + f"{self.start_year}-{self.end_year}_"
            + f"{self.run_time}"
            f".{self.data_format}",
        )
        final_df = final_df.astype(self.dtypes_dict)
        if self.data_format == "parquet":
            final_df.to_parquet(outpath, index=False)
            logging.info(f"Request file downloaded to {outpath} as a parquet file")
        elif self.data_format == "dta":
            # cast ints to floats for stata files
            cast_fields = [
                "period",
                "reporterCode",
                "partnerCode",
                "motCode",
                "altQtyUnitCode",
            ]
            for cast_field in cast_fields:
                if cast_field in final_df.columns:
                    final_df[cast_field] = final_df[cast_field].astype(float)
            final_df.reset_index(drop=True, inplace=True)
            final_df.to_stata(outpath)
            logging.info(f"Request file downloaded to {outpath} as a stata file")
        elif self.data_format == "csv":
            final_df.to_csv(outpath, index=False)
            logging.info(f"Request file downloaded to {outpath} as a csv file")
        else:
            logging.info(
                f"Selected data format {self.data_format} is not yet supported."
            )
            logging.info("Defaulting to a csv file")
            if self.requests_all_reporters or len(self.reporter_codes) > 5:
                self.data_format = "gzip"
                final_df.to_csv(outpath, compression="gzip", index=False)
            else:
                final_df.to_csv(outpath, index=False)
            logging.info(f"Request file downloaded to {outpath} as a csv file")
        logging.info(f"Requested file available at {datetime.now()}")

    def read_and_filter_files(self, year, query_statement) -> pd.DataFrame:
        """
        Generator function iterates through requested classification code's year directory
        for requested reporter codes and then filters the reporter's dataframe

        Yields dataframe
        """
        # handle case when years requested cover both S1 and S2
        if self.classification_code == 'SITC':
            if year <= 1975:
                self.src_dir = os.path.join(
                "/n/hausmann_lab/lab/atlas/data/raw", 'S1'
            )
            if year >= 1976:
                self.src_dir = os.path.join(
                "/n/hausmann_lab/lab/atlas/data/raw", 'S2'
            )
        for src_name in glob.glob(os.path.join(self.src_dir, str(year), "*.gz")):
            # extracts reporter code based on outputted file naming convention
            reporter_code = src_name.split("/")[-1][17:20]
            # do not include group reporters 
            if reporter_code in [self.GROUP_REPORTERS["ASEAN"], self.GROUP_REPORTERS["EU"]]:
                continue
            if self.requests_all_reporters or int(reporter_code) in self.reporter_codes:
                reporter_df = pd.read_csv(
                    src_name,
                    compression="gzip",
                    sep="\t",
                    usecols=self.columns,
                    dtype=self.dtypes_dict,
                )
                try:
                    # create product digitlevel column based on commodity code
                    reporter_df.loc[reporter_df["cmdCode"] == "TOTAL", "digitLevel"] = 0
                    reporter_df["digitLevel"] = reporter_df["cmdCode"].apply(
                        lambda x: len(x)
                    )
                    # zero digit value replaces the word TOTAL
                    reporter_df["digitLevel"] = reporter_df["digitLevel"].replace(5, 0)
                except:
                    logging.info(
                        f"{self.classification_code}-{year} reports are not in available range"
                    )
                    logging.info(f"Skipping data files for {year}.")
                    continue

                try:
                    filtered_df = reporter_df.query(query_statement)
                    yield filtered_df
                except Exception as e:
                    logging.info(e)
                    yield reporter_df

    def get_df_by_year(self, year, query_statement) -> pd.DataFrame:
        """
        Calls generator function and concatenates returned filtered dataframes

        Returns a filter data frame for one year
        """
        year_df = pd.DataFrame(columns=self.columns)
        year_df.astype(self.dtypes_dict)
        # Concatenate DataFrames using the generator
        for df in self.read_and_filter_files(year, query_statement):
            year_df = pd.concat([year_df, df], axis=0, ignore_index=True)
        return year_df

    def generate_filter(self) -> str:
        """
        Returns query statement based on requested filters
        """
        conditions = []
        for field, values in self.filters.items():
            # no filtering is applied if value is empty list or requesting all partners
            if values == [] or (
                field == "partnerCode"
                and (self.requests_all_partners & self.requests_all_and_world_partners)
            ):
                continue
            elif field == "partnerCode" and self.requests_all_partners:
                condition_string = f"({field} != 0)"
                conditions.append(condition_string)
                continue
            condition_string = "("
            for value in values:
                if field in [
                    "cmdCode",
                    "flowCode",
                    "motCode",
                    "customsCode",
                ]:  # types are strings
                    condition = f"{field} == '{value}' or "
                else:
                    condition = f"{field} == {value} or "
                condition_string += condition
            condition_string = condition_string[0:-4]
            condition_string += ")"
            conditions.append(condition_string)
        if conditions:
            query_statement = " and ".join(conditions)
        else:
            query_statement = ""
        return query_statement

    def clean(self, df):
        """
        Adds ISOCode columns for reporter and partner countries
        """
        df_copy = df.copy()

        # reporterCodes => ISO3Codes
        reporter_code_df = comtradeapicall.getReference("reporter")
        reporter_code_df = reporter_code_df[["reporterCode", "reporterCodeIsoAlpha3"]].rename(
            columns={"reporterCodeIsoAlpha3": "reporterISO3"}
        )
        reporter_codes = reporter_code_df[
            reporter_code_df["reporterCode"].isin(df["reporterCode"].unique().tolist())
        ]
        mapping_dict = dict(
            zip(reporter_codes["reporterCode"], reporter_codes["reporterISO3"])
        )
        df_copy["reporterISO3"] = df_copy["reporterCode"].map(mapping_dict)

        # partnerCodes => ISO3Codes
        partner_code_df = comtradeapicall.getReference("partner")[
            ["PartnerCode", "PartnerCodeIsoAlpha3"]
        ].rename(
            columns={
                "PartnerCode": "partnerCode",
                "PartnerCodeIsoAlpha3": "partnerISO3",
            }
        )
        partner_codes = partner_code_df[
            partner_code_df["partnerCode"].isin(df["partnerCode"].unique().tolist())
        ]
        mapping_dict = dict(
            zip(partner_codes["partnerCode"], partner_codes["partnerISO3"])
        )
        mapping_dict[0] = "WLD"
        df_copy["partnerISO3"] = df_copy["partnerCode"].map(mapping_dict)

        return df_copy

NameError: name 'logging' is not defined

In [None]:
# Waits to run Compactor until User has Submitted their data request
...
while True:
    clicked = False

    def on_button_clicked(b):
        global clicked
        clicked = True
        # with output:
        #     logging.debug("button clicked")

    submit_button.on_click(on_button_clicked)

    with ui_events() as poll:
        while not clicked:
            # logging.debug("waiting...")
            poll(1000)  # poll queued UI events including button
            time.sleep(1)  # wait for 1 second before checking again

    # intiate code run
    logging.info("------------------------")
    logging.info("Data request initiated..")
    compactor = ComtradeCompactor(
        start_year=start_year_widget.value,
        end_year=end_year_widget.value,
        classification_code=classification_code_widget.value,
        reporter_iso3_codes=reporter_iso3_widget.value,
        partner_iso3_codes=partner_iso3_widget.value,
        is_show_reexport=reexports_widget.value,
        commodity_codes=commodity_code_widget.value,
        modes_of_transport=mot_widget.value,
        customs_codes=customs_widget.value,
        flow_codes=flow_codes_widget.value,
        digit_level=digit_level_widget.value,
        additional_requested_cols=additional_cols_widget.value,
        data_format=data_format_widget.value,
    )

    # %lprun -f ComtradeCompactor.compact -T profile_output.txt compactor.compact()
    compactor.compact()

In [None]:
c = ComtradeCompactor(
    "H0 (HS-92)",
    start_year=2023,
    end_year=2023,
    reporter_iso3_codes=("SUR",),
    partner_iso3_codes=("All", "World"),
    commodity_codes="",
    modes_of_transport=("TOTAL modes of transport",),
    flow_codes=("Import",),
    digit_level=(0,),
    additional_requested_cols="",
    data_format=("csv",),
)

c.generate_filter()

In [None]:
c.get_df_by_year(2023, "(motCode == 0) and (flowCode == 'M') and (digitLevel == 0)")