In [None]:
import csv
import pandas

In [None]:
years = [1990, 1995, 2000, 2005, 2010, 2015, 2019]
gnfrs = {"GNFR_A": "A_PublicPower", "GNFR_B": "B_Industry", "GNFR_C": "C_OtherStationaryComb",
        "GNFR_D": "D_Fugitive", "GNFR_E": "E_Solvents", "GNFR_F": "F_RoadTransport",
        "GNFR_G": "G_Shipping", "GNFR_H": "H_Aviation", "GNFR_I": "I_Offroad",
        "GNFR_J": "J_Waste", "GNFR_K": "K_AgriLivestock", "GNFR_L": "L_AgriOther",
        "GNFR_M": "M_Other", "GNFR_N": "N_Natural", "GNFR_O": "O_AviCruise",
        "GNFR_P": "P_IntShipping", "GNFR_Z": "z_Memo"}

pollutants = ("NOX", "NMVOC", "SO2", "NH3", "PM2_5", "PM10", "BC", "CO", "PB", "CD", "HG", "PCDD_F", "PAHtotal", "HCB", "PCB")

In [None]:
for year in years:
    # Read and merge input files
    main = pandas.read_csv(f"data/{year}_Sub2021_GNFR.csv", sep=';', index_col=0)
    try:
        pops = pandas.read_csv(f"data/{year}_POPs_Sub2021_GNFR.csv", sep=';', index_col=0)
        greta = main.merge(pops)
    except FileNotFoundError as _:
        greta = main
    
    # Copy data over, one GNFR at a time
    data = pandas.DataFrame()
    for gnfr_key, gnfr_name in gnfrs.items():
        gnfr_data = greta[["LON", "LAT"]].copy().assign(GNFR=gnfr_name)

        for pollutant in pollutants:
            if f"E_{gnfr_key}_{pollutant}" in greta.columns:
                gnfr_data[pollutant] = greta[[f'E_{gnfr_key}_{pollutant}']].copy()
            elif year < 1995 and pollutant in ("PM2_5", "PM10"):
                gnfr_data[pollutant] = "NR"
            elif year < 2000 and pollutant in ("BC"):
                gnfr_data[pollutant] = "NR"
            else:
                gnfr_data[pollutant] = "NE" if gnfr_key in ("GNFR_M", "GNFR_N", "GNFR_O", "GNFR_P", "GNFR_Z") else "NA"

        data = data.append(gnfr_data, ignore_index=True)

    # Write out the result
    data.to_csv(f"output/{year}.csv", index=False, sep=';')

In [None]:
for year in years:
    data = pandas.read_csv(f"output/{year}.csv", sep=';', decimal=',', na_values=('NE', 'NA', 'NR'))
    main = pandas.read_csv(f"data/{year}_Sub2021_GNFR.csv", sep=';', decimal=',', index_col=0)
    try:
        pops = pandas.read_csv(f"data/{year}_POPs_Sub2021_GNFR.csv", sep=';', decimal=',', index_col=0)
        greta = main.merge(pops)
    except FileNotFoundError as _:
        greta = main
        
    # Cross check results
    print(year)
    for pollutant in pollutants:
        try:
            sum_data, sum_greta = data[pollutant].sum(), greta[f"E_SUM_{pollutant}"].sum()
            if abs(sum_data - sum_greta) > 10**-10:
                print(f"Total check {pollutant}: {sum_data:.6} vs. {sum_greta:.6} results in difference of {sum_data - sum_greta:.2}")
            else:
                print(f"Total check {pollutant}: {sum_data:.6} vs. {sum_greta:.6} results no difference")
        except KeyError as _:
            pass