# Combine Haozhe's 4xCO2 netcdf data into one file

In [None]:
import pandas as pd
import glob
from pathlib import PurePath
import os
from netCDF4 import Dataset
from dotenv import dotenv_values
from fair import __version__

In [None]:
cal_v = dotenv_values("../../.env")['CALIBRATION_VERSION']
fair_v = dotenv_values("../../.env")['FAIR_VERSION']

assert fair_v == __version__

In [None]:
available_files = glob.glob('../../data/longrunmip/longrunmip_data/*.nc')

In [None]:
lines = []
maxlen = 0
for file in available_files:
    model = PurePath(file).parts[5].split('_')[2]
    var = PurePath(file).parts[5].split('_')[0]
    nc = Dataset(file)
    data = nc.variables[f"{var}_glbmean"][:]
    if len(data) > maxlen:
        maxlen = len(data)
    line = ['CMIP', model, 'r1i1p1f1', 'longrunmip', 'unspecified', 'World', 'abrupt-4xCO2', 'W m^-2', var]
    line.extend(data)
    lines.append(line)
maxlen

In [None]:
df = pd.DataFrame(
    lines, columns = (
        ['activity_id', 'climate_model', 'member_id', 'mip_era', 'model', 'region', 'scenario', 'unit', 'variable'] +
        ["X%d" % year for year in range(1, maxlen+1)]
    )
)
#df.dropna(inplace=True)

In [None]:
os.makedirs(f'../../output/fair-{fair_v}/v{cal_v}/calibrations/', exist_ok=True)

In [None]:
df.to_csv(f'../../output/fair-{fair_v}/v{cal_v}/calibrations/4xCO2_longrunmip.csv', index=False)