In [15]:
from dash import dcc, html, Dash, dash_table
from dash.dependencies import Output, Input
from dash.exceptions import PreventUpdate
import plotly.express as px
import pandas as pd
import numpy as np
from dash_bootstrap_templates import load_figure_template
import dash_bootstrap_components as dbc
import country_converter as coco
import logging
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [46]:
data = pd.read_csv("data/data_raw.csv")
ref_table = pd.read_csv("data/country_code_conversion.csv")

data["Area Code (M49)"] = (
    data["Area Code (M49)"]
    .astype(str)
    .str.replace('"', '', regex=False)
    .str.strip()
    .astype("Int64")
)
ref_table["Numeric code"] = (
    ref_table["Numeric code"]
    .astype(str)                 # ensure string
    .str.replace('"', '', regex=False)
    .str.strip()
    .astype("Int64")
)
ref_table["Latitude (average)"] = (
    ref_table["Latitude (average)"]
    .astype(str)                 # ensure string
    .str.replace('"', '', regex=False)
    .str.strip()
    .astype("Float64")
)
ref_table["Longitude (average)"] = (
    ref_table["Longitude (average)"]
    .astype(str)                 # ensure string
    .str.replace('"', '', regex=False)
    .str.strip()
    .astype("Float64")
)

data = data.merge(
    ref_table,
    how="left",
    left_on="Area Code (M49)",
    right_on="Numeric code"
)

cc = coco.CountryConverter()
logging.getLogger("country_converter").setLevel(logging.ERROR)
data["Continent"] = cc.convert(names=data["Area Code (M49)"], to="continent", src="UNnumeric")
data["Continent"] = data["Continent"].replace("not found", pd.NA)

data = data[data['Continent'].notna()]
data = data[['Area', 'Continent', 'Latitude (average)',
             'Longitude (average)', 'Year', 'Import', 'Export ',
             'Production', 'Consumption', 'Unit']]
data = data.rename(columns={
    'Latitude (average)': 'Latitude',
    'Longitude (average)': 'Longitude',
    'Export ': 'Export'
})

data.head()

Unnamed: 0,Area,Continent,Latitude,Longitude,Year,Import,Export,Production,Consumption,Unit
0,Afghanistan,Asia,33.0,65.0,2014,283.85,21099.0,21500.0,684.85,t
1,Afghanistan,Asia,33.0,65.0,2015,1000.16,17340.0,18000.0,1660.16,t
2,Afghanistan,Asia,33.0,65.0,2016,814.88,8353.0,17333.33,9795.21,t
3,Afghanistan,Asia,33.0,65.0,2017,3.67,8500.0,18234.12,9737.79,t
4,Afghanistan,Asia,33.0,65.0,2018,112.3,9456.0,17855.82,8512.12,t


In [45]:
data.columns

Index(['Domain Code', 'Domain', 'Area Code (M49)', 'Area', 'Element Code',
       'Item Code (CPC)', 'Item', 'Year', 'Unit', 'Import', 'Export ',
       'Production', 'Consumption', 'Country', 'Alpha-2 code', 'Alpha-3 code',
       'Numeric code', 'Latitude (average)', 'Longitude (average)',
       'Continent'],
      dtype='object')