# ESM vs WOA

#### Dask Import

In [1]:
from dask.distributed import Client

client = Client("tcp://127.0.0.1:33383")
client

0,1
Client  Scheduler: tcp://127.0.0.1:33383  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 7  Cores: 7  Memory: 45.10 GB


<br>

#### Choices for data

In [2]:
#Experiment data for analysis
dataVariableId = 'thetao'
dataExperimentId = 'historical'
dataSourceID = 'E3SM-1-0'

#Custom Variables
latSel = slice(-89.5,-29.5) #Selected latitude to be investigated

<br>

#### Importing Libaries

In [3]:
import cartopy.crs as ccrs
import dask.dataframe as dd
import fsspec
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import xarray as xr
import zarr

from dask import delayed
from matplotlib.pyplot import cm
print("Imports complete")

Imports complete


<br>

#### Data Loading

<b>WOA Data</b>

In [4]:
dsWOA = pd.read_csv("woa18_decav_t00mn01.csv", skiprows =1,index_col=[0,1])
dsWOA

Unnamed: 0_level_0,Unnamed: 1_level_0,AND VALUES AT DEPTHS (M):0,5,10,15,20,25,30,35,40,45,...,4600,4700,4800,4900,5000,5100,5200,5300,5400,5500
COMMA SEPARATED LATITUDE,LONGITUDE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
-77.5,-178.5,-0.896,-0.922,-0.942,-0.976,-1.001,-1.046,-1.094,-1.142,-1.202,-1.277,...,,,,,,,,,,
-77.5,-177.5,-0.720,-0.748,-0.763,-0.804,-0.854,-0.905,-0.990,-1.054,-1.111,-1.154,...,,,,,,,,,,
-77.5,-176.5,-0.901,-0.925,-0.996,-1.088,-1.132,-1.154,-1.215,-1.287,-1.345,-1.382,...,,,,,,,,,,
-77.5,-175.5,-0.802,-0.820,-0.839,-0.873,-0.926,-0.962,-0.965,-1.024,-1.057,-1.065,...,,,,,,,,,,
-77.5,-174.5,-1.046,-1.084,-1.108,-1.204,-1.287,-1.332,-1.346,-1.374,-1.447,-1.467,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89.5,174.5,,,-1.695,-1.695,-1.696,-1.696,-1.697,-1.688,-1.638,-1.618,...,,,,,,,,,,
89.5,175.5,,,-1.695,-1.696,-1.699,-1.699,-1.699,-1.699,-1.663,-1.638,...,,,,,,,,,,
89.5,176.5,,,-1.702,-1.702,-1.702,-1.702,-1.699,-1.674,-1.613,-1.554,...,,,,,,,,,,
89.5,177.5,,,-1.719,-1.718,-1.718,-1.718,-1.718,-1.718,-1.708,-1.701,...,,,,,,,,,,


In [5]:
dfWOA = dsWOA.to_xarray()
dfWOA = dfWOA.rename({'COMMA SEPARATED LATITUDE':'LATITUDE'})
dfWOA

<br>
<b>ESM Data</b>

In [6]:
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')
dfFilt = df[df.variable_id.eq('thetao') & df.experiment_id.eq('historical') & df.source_id.eq('E3SM-1-0')]
dfArea = df[df.variable_id.eq('areacello') & df.source_id.eq('CESM2')]

fileSetList = []
for i in range(len(dfFilt)):
    zstore = dfFilt.zstore.values[i]
    mapper = fsspec.get_mapper(zstore)
    fileRaw = xr.open_zarr(mapper, consolidated=True)
    fileSetList.append(fileRaw)
fileCount = len(fileSetList)

dsArea = xr.open_zarr(fsspec.get_mapper(dfArea.zstore.values[1]), consolidated=True)
dataAreaRaw = dsArea.areacello

for i in range(fileCount): #Formatting dates into np.datetime64 format
    startDateIterate = np.datetime64(fileSetList[i]['time'].values[0],'M')
    endDateIterate = np.datetime64(fileSetList[i]['time'].values[-1],'M') + np.timedelta64(1,'M')
    fileSetList[i]['time']=('time', np.arange(startDateIterate, endDateIterate, dtype='datetime64[M]'))
    fileSetList[i]['time_bnds']=('time_bnds', np.arange(startDateIterate, endDateIterate, dtype='datetime64[M]')) 
fileSet = xr.combine_nested(fileSetList, concat_dim='SetId') #Combining data sets

dataFiltLatLev = fileSet.thetao.sel(lat=latSel) #Selection of latitude and depth
dataFiltFull = dataFiltLatLev.reset_coords(drop=True) #Removes lev if single value

globalStartDate = dataFiltFull["time"][0].values
globalDateInc = dataFiltFull["time"][1].values - globalStartDate
#np.datetime64(globalDateInc,'D')
globalEndDateIn = dataFiltFull["time"][-1].values
globalEndDateOut = globalEndDateIn + globalDateInc

globalStartDateStr = str(globalStartDate)[:7]
globalEndDateInStr = str(globalEndDateIn)[:7]
globalEndDateOutStr = str(globalEndDateOut)[:7]

print("Data loaded")

Data loaded
