### AR6 CSV Fixer

In [62]:
import pandas as pd
import numpy as np

df = pd.read_csv("/home/h02/hadcam/github/IllusPathwaysAR6/AR6Ilusplus_RCMIPhist.csv")
df.drop(columns='Unnamed: 0', inplace=True)
df_p = df.set_index(['Model', 'Scenario', 'Region', 'Variable', 'Unit', 'Year'])['value'].unstack().reset_index()
df_p.sort_values('Model', inplace=True)
df_p = df_p.rename_axis(None, axis=1).reset_index()
df_p.drop(columns='index', inplace=True)

### Removing Historical Fields that don't match up to AR6 

In [63]:
removals = ['AR6 climate diagnostics|Infilled|', 'F-Gases|', 'Montreal Gases|CFC|', 'Montreal Gases|']

df_p['Variable'] = df_p['Variable'].astype(str)
for y in removals:
    df_p['Variable'] = df_p['Variable'].apply(lambda x: x.replace(y, ''))

df_AR6 = df_p.loc[df_p['Model'] == 'AIM/CGE 2.2']

In [64]:
# isolating the extra variables we don't want and removing them
df_badvars = df_p[~df_p.Variable.isin(df_AR6.Variable)]
df_new = df_p[~df_p.Variable.isin(df_badvars.Variable)]
df_new = df_new.drop(df_new.index[df_new['Region'] != 'World'])

In [65]:
if len(df_new) == len(df_p):
    print("Didn't work")
else:
    print(f"Amount dropped = {abs(len(df_new) - len(df_p))}")

Amount dropped = 754


In [66]:
print(df_new['Model'].value_counts())

COFFEE 1.1                   104
AIM/CGE 2.2                   52
MESSAGEix-GLOBIOM 1.0         52
MESSAGEix-GLOBIOM_GEI 1.0     52
REMIND-MAgPIE 2.1-4.2         52
REMIND-MAgPIE 2.1-4.3         52
WITCH 5.0                     52
CEDS/UVA/GCP/PRIMAP           48
Name: Model, dtype: int64


In [67]:
hist = df_new.loc[df_new['Model'] == 'CEDS/UVA/GCP/PRIMAP']

x = [x for x in hist.Variable]
y = [x for x in df_AR6.Variable]

##### Checking for missing variables

In [68]:
unique = [k for k in y if k not in x]

print(f'Number of historical vars missing: {len(unique)}')
print(f'\nVars missing: \n{unique}')

Number of historical vars missing: 4

Vars missing: 
['Emissions|HFC|HFC43-10', 'Emissions|HFC|HFC245ca', 'Emissions|CO2|AFOLU', 'Emissions|CO2|Energy and Industrial Processes']


##### Dropping missing variables from the rest of the dataframe (CHECK)

In [69]:
for x in unique:
    df_new = df_new.drop(df_new.index[df_new['Variable'] == x])
    
hist = df_new.loc[df_new['Model'] == 'CEDS/UVA/GCP/PRIMAP']
x = [x for x in hist.Variable]
y = [x for x in df_new.Variable]

unique = [k for k in y if k not in x]

print(f'Number of historical vars missing: {len(unique)}')
print(f'\nVars missing: \n{unique}')

Number of historical vars missing: 0

Vars missing: 
[]


Check there are equal amounts of vars per scenario:

In [70]:
counts = df_new['Scenario'].value_counts()
for i in range(1, len(counts)):
    if counts[i] == counts [i-1]:
        continue
    else:
        raise ValueError('Scenarios have differing amounts of variables.')

print(f'All scenarios have equal amounts of variables: \n\n{counts}')

All scenarios have equal amounts of variables: 

EN_NPi2020_900f                  48
historical                       48
EN_NPi2020_400f_lowBECCS         48
EN_INDCi2030_500f                48
LowEnergyDemand_1.3_IPCC         48
SSP2_openres_lc_50               48
SusDev_SDP-PkBudg1000            48
DeepElec_SSP2_ HighRE_Budg900    48
CO_Bridge                        48
Name: Scenario, dtype: int64


Now we need to tile the historical data over each scenario from 1750 - 2014..

In [71]:
# get the historical data per year
for yr in range(1750, 2015):
    hist_data = hist[yr]
    hist_tiled = np.tile(hist_data, len(df_new['Scenario'].value_counts()))
    df_new[yr] = hist_tiled
    if len(df_new[yr]) == len(df_new['Model']):
        continue
    else:
        ValueError('Tiling did not work properly.')


Now delete historical-data-only rows:

In [72]:
df_new = df_new.drop(df_new.index[df_new['Scenario'] == 'historical'])

if 'historical' not in df_new['Scenario']:
    print('Great success!')

Great success!


##### Finally, read to CSV

In [73]:
df_new.to_csv('AR6Illus_RCMIP.csv')