In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('data/Chicken-Turkey Import-Export quant.csv')
df

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Item Code (CPC),Item,Year Code,Year,Unit,Value,Flag,Flag Description,Note
0,TCL,Crops and livestock products,4,Afghanistan,5609,Import quantity,2151,Chickens,2015,2015,1000 An,2111,X,Figure from external organization,Estimated data using trading partners database
1,TCL,Crops and livestock products,4,Afghanistan,5909,Export quantity,2151,Chickens,2015,2015,1000 An,8,X,Figure from external organization,Estimated data using trading partners database
2,TCL,Crops and livestock products,4,Afghanistan,5609,Import quantity,2151,Chickens,2016,2016,1000 An,15005,X,Figure from external organization,Estimated data using trading partners database
3,TCL,Crops and livestock products,4,Afghanistan,5609,Import quantity,2151,Chickens,2017,2017,1000 An,18970,A,Official figure,
4,TCL,Crops and livestock products,4,Afghanistan,5609,Import quantity,2151,Chickens,2018,2018,1000 An,12297,A,Official figure,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3629,TCL,Crops and livestock products,716,Zimbabwe,5909,Export quantity,2151,Chickens,2021,2021,1000 An,269,I,Value imputed by a receiving agency,
3630,TCL,Crops and livestock products,716,Zimbabwe,5609,Import quantity,2151,Chickens,2022,2022,1000 An,363,I,Value imputed by a receiving agency,
3631,TCL,Crops and livestock products,716,Zimbabwe,5909,Export quantity,2151,Chickens,2022,2022,1000 An,308,I,Value imputed by a receiving agency,
3632,TCL,Crops and livestock products,716,Zimbabwe,5609,Import quantity,2151,Chickens,2023,2023,1000 An,348,I,Value imputed by a receiving agency,


### Normalization Step: Add ISO-alpha3 Code

In [1]:
# --- Normalization: Add ISO-alpha3 column ---

# 1. Load UNSD Methodology for mapping
try:
    unsd_df = pd.read_csv('data/UNSD — Methodology.csv', sep=';')
except FileNotFoundError:
    unsd_df = pd.read_csv('../data/UNSD — Methodology.csv', sep=';')

# 2. Create mappings
unsd_df['Country or Area'] = unsd_df['Country or Area'].astype(str).str.strip()
name_to_iso3 = unsd_df.set_index('Country or Area')['ISO-alpha3 Code'].to_dict()

m49_to_iso3 = {}
for idx, row in unsd_df.iterrows():
    try:
        # Handle potential non-numeric or NaN M49 codes
        m49_code = int(row['M49 Code'])
        m49_to_iso3[m49_code] = row['ISO-alpha3 Code']
    except (ValueError, TypeError):
        continue

# 3. Apply mapping
print("Applying ISO-alpha3 normalization...")
if 'Area Code (M49)' in df.columns:
    def get_iso(val):
        try:
            return m49_to_iso3.get(int(val), None)
        except:
            return None
    df['iso_alpha3'] = df['Area Code (M49)'].apply(get_iso)
    print("  Mapped M49 codes to 'iso_alpha3'.")
else:
    print("  Warning: Source column 'Area Code (M49)' not found.")

# Check results
missing_iso = df['iso_alpha3'].isna().sum()
if missing_iso > 0:
    print(f"  Warning: {missing_iso} rows have missing ISO-alpha3 codes.")
    print(df[df['iso_alpha3'].isna()][['Area Code (M49)']].head())


NameError: name 'pd' is not defined

In [None]:
uniques = df.nunique()
cols_to_drop = uniques[uniques <= 1].index
df_filtered = df.drop(columns=cols_to_drop)
df = df_filtered
df

Unnamed: 0,Area Code (M49),Area,Element Code,Element,Item Code (CPC),Item,Year Code,Year,Value,Flag,Flag Description
0,4,Afghanistan,5609,Import quantity,2151,Chickens,2015,2015,2111,X,Figure from external organization
1,4,Afghanistan,5909,Export quantity,2151,Chickens,2015,2015,8,X,Figure from external organization
2,4,Afghanistan,5609,Import quantity,2151,Chickens,2016,2016,15005,X,Figure from external organization
3,4,Afghanistan,5609,Import quantity,2151,Chickens,2017,2017,18970,A,Official figure
4,4,Afghanistan,5609,Import quantity,2151,Chickens,2018,2018,12297,A,Official figure
...,...,...,...,...,...,...,...,...,...,...,...
3629,716,Zimbabwe,5909,Export quantity,2151,Chickens,2021,2021,269,I,Value imputed by a receiving agency
3630,716,Zimbabwe,5609,Import quantity,2151,Chickens,2022,2022,363,I,Value imputed by a receiving agency
3631,716,Zimbabwe,5909,Export quantity,2151,Chickens,2022,2022,308,I,Value imputed by a receiving agency
3632,716,Zimbabwe,5609,Import quantity,2151,Chickens,2023,2023,348,I,Value imputed by a receiving agency


In [None]:
df = df.drop(columns=['Element Code','Item Code (CPC)','Year Code','Flag', 'Flag Description'])
df

Unnamed: 0,Area Code (M49),Area,Element,Item,Year,Value
0,4,Afghanistan,Import quantity,Chickens,2015,2111
1,4,Afghanistan,Export quantity,Chickens,2015,8
2,4,Afghanistan,Import quantity,Chickens,2016,15005
3,4,Afghanistan,Import quantity,Chickens,2017,18970
4,4,Afghanistan,Import quantity,Chickens,2018,12297
...,...,...,...,...,...,...
3629,716,Zimbabwe,Export quantity,Chickens,2021,269
3630,716,Zimbabwe,Import quantity,Chickens,2022,363
3631,716,Zimbabwe,Export quantity,Chickens,2022,308
3632,716,Zimbabwe,Import quantity,Chickens,2023,348


In [None]:
df = df[df['Item'] == 'Chickens'].reset_index(drop=True)
df = df.drop(columns = ['Item'])
df

Unnamed: 0,Area Code (M49),Area,Element,Year,Value
0,4,Afghanistan,Import quantity,2015,2111
1,4,Afghanistan,Export quantity,2015,8
2,4,Afghanistan,Import quantity,2016,15005
3,4,Afghanistan,Import quantity,2017,18970
4,4,Afghanistan,Import quantity,2018,12297
...,...,...,...,...,...
2536,716,Zimbabwe,Export quantity,2021,269
2537,716,Zimbabwe,Import quantity,2022,363
2538,716,Zimbabwe,Export quantity,2022,308
2539,716,Zimbabwe,Import quantity,2023,348


In [None]:
df.rename(columns={'Area': 'Country Name'}, inplace=True)
df

Unnamed: 0,Area Code (M49),Country Name,Element,Year,Value
0,4,Afghanistan,Import quantity,2015,2111
1,4,Afghanistan,Export quantity,2015,8
2,4,Afghanistan,Import quantity,2016,15005
3,4,Afghanistan,Import quantity,2017,18970
4,4,Afghanistan,Import quantity,2018,12297
...,...,...,...,...,...
2536,716,Zimbabwe,Export quantity,2021,269
2537,716,Zimbabwe,Import quantity,2022,363
2538,716,Zimbabwe,Export quantity,2022,308
2539,716,Zimbabwe,Import quantity,2023,348
