# Combine Hege-Beate's 4xCO2 text data into one file

This replaces the original download of 4xCO2 data using `netcdf-scm`. Hege-Beate's data contains many more models.

In [None]:
import pandas as pd
import glob
from pathlib import PurePath
import os
from dotenv import dotenv_values
from fair import __version__

In [None]:
cal_v = dotenv_values("../../.env")['CALIBRATION_VERSION']
fair_v = dotenv_values("../../.env")['FAIR_VERSION']

assert fair_v == __version__

In [None]:
available_files = glob.glob('../../data/cmip6-hbf/cmip_data/*/abrupt-4xCO2/*_abrupt-4xCO2_*_anomalies.txt')

In [None]:
models = []  # not unique
runs = []
lines = []
for file in available_files:
    model = PurePath(file).parts[5]
    run = PurePath(file).parts[7].split('_')[2]
    models.append(model)
    runs.append(run)
    df = pd.read_csv(file, index_col=0)
    vars = {}
    for var in ['tas', 'rlut', 'rsut', 'rsdt']:
        vars[var] = df[var].values[:150].squeeze()
        line = ['CMIP', model, run, 'CMIP6', 'unspecified', 'World', 'abrupt-4xCO2', 'W m^-2', var]
        line.extend(vars[var])
        lines.append(line)
    vars['rndt'] = vars['rsdt'] - vars['rsut'] - vars['rlut']
    line = ['CMIP', model, run, 'CMIP6', 'unspecified', 'World', 'abrupt-4xCO2', 'W m^-2', 'rndt']
    line.extend(vars['rndt'])
    lines.append(line)

In [None]:
df = pd.DataFrame(
    lines, columns = (
        ['activity_id', 'climate_model', 'member_id', 'mip_era', 'model', 'region', 'scenario', 'unit', 'variable'] +
        ["X%d" % year for year in range(1850, 2000)]
    )
)
df.dropna(inplace=True)

In [None]:
os.makedirs(f'../../output/fair-{fair_v}/v{cal_v}/calibrations/', exist_ok=True)

In [None]:
df.to_csv(f'../../output/fair-{fair_v}/v{cal_v}/calibrations/4xCO2_cmip6.csv', index=False)