# Concatenate ssp245 data

This replaces the original download of 4xCO2 data using `netcdf-scm`. Hege-Beate's data contains many more models.

In [None]:
import pandas as pd
import glob
import os

In [None]:
available_files = glob.glob('../data/cmip6-hbf/cmip_data/*/ssp245/*_ssp245_*_anomalies.txt')

In [None]:
lines = []
for file in available_files:
    model = file.split('/')[4]
    run = file.split('/')[6].split('_')[2]
    # Does historical exist?
    file_hist = f'../data/cmip6-hbf/cmip_data/{model}/historical/{model}_historical_{run}_anomalies.txt'
    if not os.path.exists(file_hist):
        print(model, run, "doesn't have corresponding historical")
        continue
    df_ssp245 = pd.read_csv(file, index_col=0)
    df_hist = pd.read_csv(file_hist, index_col=0)
    vars_245 = {}
    vars_hist = {}
    for var in ['tas', 'rlut', 'rsut', 'rsdt']:
        vars_245[var] = df_ssp245[var].values[:86].squeeze()
        vars_hist[var] = df_hist[var].values[:165].squeeze()
        line = ['CMIP', model, run, 'CMIP6', 'unspecified', 'World', 'ssp245', 'W m^-2', var]
        line.extend(vars_hist[var])
        line.extend(vars_245[var])
        lines.append(line)
    vars_245['rndt'] = vars_245['rsdt'] - vars_245['rsut'] - vars_245['rlut']
    vars_hist['rndt'] = vars_hist['rsdt'] - vars_hist['rsut'] - vars_hist['rlut']
    line = ['CMIP', model, run, 'CMIP6', 'unspecified', 'World', 'ssp245', 'W m^-2', 'rndt']
    line.extend(vars_hist['rndt'])
    line.extend(vars_245['rndt'])
    lines.append(line)

In [None]:
df = pd.DataFrame(
    lines, columns = (
        ['activity_id', 'climate_model', 'member_id', 'mip_era', 'model', 'region', 'scenario', 'unit', 'variable'] +
        ["X%d" % year for year in range(1850, 2101)]
    )
)
df.dropna(inplace=True)

In [None]:
df.to_csv('../data/cmip6-hbf/ssp245.csv', index=False) 

In [None]:
df