# Convert from xarray DataSet to CSV

This is for backwards compatability with the code in notebook 2.0, which was developed before the authors knew xarray. This code is very slow; there's no doubt a much faster way to do this. At present, it takes over an hour for each experiment. 

In [1]:
import xarray as xr
import pandas as pd
import numpy as np
from tqdm import tqdm

Suppress warnings based on pandas depreciated function, since we're using a frozen version

In [2]:
import warnings
warnings.filterwarnings(action='ignore', category=FutureWarning) # setting ignore as a parameter and further adding category

## Define function to carry out the conversion

In [3]:
def rearrange_o3_data(ds, fname='o3_rearranged.csv'):
    
    ### part one: rearrange and convert to pandas dataframe
    
    # each x, y, season represents a single profile
    ds = ds.stack(profile=("y","x","season"))

    # convert to a single DataArray for simplicity
    o3 = ds.o3

    # reset the index, we only need profile number 
    o3 = o3.reset_index('profile')

    # conver to a pandas dataframe for ease of writing csv
    o3_df = o3.to_dataframe()

    # the x, y columns duplicate lon, lat, so we can drop them safely
    o3_df.drop(["x","y"],axis=1,inplace=True)
    o3_df = o3_df.reset_index()

    ### part two: loop through profiles for reformatting
    
    for profile_number in tqdm(range(o3_df.profile.max())):
    #for profile_number in range(5):
        
        # select single profile
        oneProf = o3_df.loc[o3_df['profile']==profile_number]
        
        # manually rearrange data
        x = oneProf.loc[oneProf['plev']==100.0].lon.values[0]
        y = oneProf.loc[oneProf['plev']==100.0].lat.values[0]
        season = oneProf.loc[oneProf['plev']==100.0].season.values[0]
        v1000 = oneProf.loc[oneProf['plev']==100000.0].o3.values[0]
        v0925 = oneProf.loc[oneProf['plev']==92500.0].o3.values[0]
        v0850 = oneProf.loc[oneProf['plev']==85000.0].o3.values[0]
        v0700 = oneProf.loc[oneProf['plev']==70000.0].o3.values[0]
        v0600 = oneProf.loc[oneProf['plev']==60000.0].o3.values[0]
        v0500 = oneProf.loc[oneProf['plev']==50000.0].o3.values[0]
        v0400 = oneProf.loc[oneProf['plev']==40000.0].o3.values[0]
        v0300 = oneProf.loc[oneProf['plev']==30000.0].o3.values[0]
        v0250 = oneProf.loc[oneProf['plev']==25000.0].o3.values[0]
        v0200 = oneProf.loc[oneProf['plev']==20000.0].o3.values[0]
        v0150 = oneProf.loc[oneProf['plev']==15000.0].o3.values[0]
        v0100 = oneProf.loc[oneProf['plev']==10000.0].o3.values[0]
        v0070 = oneProf.loc[oneProf['plev']==7000.0].o3.values[0]
        v0050 = oneProf.loc[oneProf['plev']==5000.0].o3.values[0]
        v0030 = oneProf.loc[oneProf['plev']==3000.0].o3.values[0]
        v0020 = oneProf.loc[oneProf['plev']==2000.0].o3.values[0]
        v0010 = oneProf.loc[oneProf['plev']==1000.0].o3.values[0]
        v0005 = oneProf.loc[oneProf['plev']==500.0].o3.values[0]
        v0001 = oneProf.loc[oneProf['plev']==100.0].o3.values[0]
        
        # put the above values into a dict
        data = {
                'x': x, 
                'y': y, 
                'season' : season, 
                '1000'   : v1000,
                '925'    : v0925,
                '850'    : v0850,
                '700'    : v0700,
                '600'    : v0600,
                '500'    : v0500,
                '400'    : v0400,
                '300'    : v0300,
                '250'    : v0250,
                '200'    : v0200,
                '150'    : v0150,
                '100'    : v0100,
                '70'     : v0070,
                '50'     : v0050,
                '30'     : v0030,
                '20'     : v0020,
                '10'     : v0010,
                '5'      : v0005,
                '1'      : v0001,
               }
    
        # either create or append to dataframe
        if profile_number==0:
            newdf = pd.DataFrame(data, index=[profile_number])
        else:
            newdf = newdf.append(data, ignore_index=True)
            
    ### part three: write to CSV
    newdf.to_csv(fname, na_rep=np.nan, index=False)
    

## Convert from NetCDF to rearranged CSV

In [None]:
# historical case
print("Historical data")
ds = xr.open_dataset("UKESM_O3_historical_seasonal.nc")
rearrange_o3_data(ds, fname="data_in/historical_seasonal.csv")
                  
# ssp126
print("SSP 126 data")
ds = xr.open_dataset("UKESM_O3_ssp126_seasonal.nc")
rearrange_o3_data(ds, fname="data_in/ssp126_seasonal.csv")
                  
# ssp585
print("SSP 585 data")
ds = xr.open_dataset("UKESM_O3_ssp585_seasonal.nc")
rearrange_o3_data(ds, fname="data_in/ssp585_seasonal.csv")

Historical data


100%|██████████| 110591/110591 [1:13:43<00:00, 25.00it/s]
 11%|█▏        | 12713/110591 [07:34<57:46, 28.23it/s]  IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 37%|███▋      | 41082/110591 [24:20<43:14, 26.79it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 59%|█████▉    | 65715/110591 [39:12<31:44, 23.57it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing i