In [1]:
import xarray as xr
import numpy as np

f1 = "/Volumes/Jedha/cloud_locking/B1850_c201_CTL/B1850_c201_CTL.cam.h0.0001-01.ncrcat.TMQ.nc"
f2 = "/Volumes/Jedha/cloud_locking/B1850_c201_CLOCK/B1850_c201_CLOCK.cam.h0.0001-01.ncrcat.TMQ.nc"

ds1 = xr.open_dataset(f1, decode_times=False)
ds2 = xr.open_dataset(f2, decode_times=False)


In [2]:
def cesm_correct_time(ds):
    """Given a Dataset, check for time_bnds,
       and use avg(time_bnds) to replace the time coordinate.
       Purpose is to center the timestamp on the averaging inverval.   
       NOTE: ds should have been loaded using `decode_times=False`
    """
    assert 'time_bnds' in ds
    assert 'time' in ds
    correct_time_values = ds['time_bnds'].mean(dim='nbnd')
    # copy any metadata:
    correct_time_values.attrs = ds['time'].attrs
    ds = ds.assign_coords({"time": correct_time_values})
    ds = xr.decode_cf(ds)  # decode to datetime objects
    return ds

In [3]:
ds1 = cesm_correct_time(ds1)
ds2 = cesm_correct_time(ds2)

prw1 = ds1['TMQ']
prw2 = ds2['TMQ']

In [4]:
prw1.shape

(361, 192, 288)

In [6]:
# Alternative is to use two-sided t-statistic
from scipy import stats

# 1st start from the full data, and let's just see if the means are different

v1 = prw1.values.flatten()
# vs1 = v1[~np.isnan(v1)]

v2 = prw2.values.flatten()
# vs2 = v2[~np.isnan(v2)]
print("get test stats")
tstat, pval = stats.ttest_ind(v1,v2, equal_var=False)
print(f"The t-statistic is {tstat}, the p-value is {pval}; If pvalue is small, the means are different. Check {pval < 0.05}")

get test stats
The t-statistic is 4.850194709683481, the p-value is 1.23340845646681e-06; If pvalue is small, the means are different. Check True


In [7]:
# LAND
land_file = xr.open_dataset("/Users/brianpm/Dropbox/Data/cesm2_f09_land.nc")
LAND = land_file['LANDFRAC'].squeeze()



In [8]:
# 2nd move to tropical ocean, removing the land values from the data

prw1to = prw1.where(LAND <= 0).sel(lat=slice(-30,30))
prw2to = prw2.where(LAND <= 0).sel(lat=slice(-30,30))

v1 = prw1to.values.flatten()
vs1 = v1[~np.isnan(v1)]

v2 = prw2to.values.flatten()
vs2 = v2[~np.isnan(v2)]
print("get test stats")
tstat, pval = stats.ttest_ind(vs1,vs2, equal_var=False)
print(f"The t-statistic is {tstat}, the p-value is {pval}; If pvalue is small, the means are different. Check {pval < 0.05}")

get test stats
The t-statistic is -0.598293224327227, the p-value is 0.5496443126958364; If pvalue is small, the means are different. Check False


In [12]:
# JUST DOUBLE CHECK USING ANNUAL MEANS:
prw1_ann = prw1.sel(time=slice(None, '0030-12-31')).groupby('time.year').mean(dim='time')
prw1_ann

In [13]:
prw2_ann = prw2.sel(time=slice(None, '0030-12-31')).groupby('time.year').mean(dim='time')
prw2_ann

In [14]:
# 3rd annual means for tropical ocean, removing the land values from the data

prw1to = prw1_ann.where(LAND <= 0).sel(lat=slice(-30,30))
prw2to = prw2_ann.where(LAND <= 0).sel(lat=slice(-30,30))

v1 = prw1to.values.flatten()
vs1 = v1[~np.isnan(v1)]

v2 = prw2to.values.flatten()
vs2 = v2[~np.isnan(v2)]
print("get test stats")
tstat, pval = stats.ttest_ind(vs1,vs2, equal_var=False)
print(f"The t-statistic is {tstat}, the p-value is {pval}; If pvalue is small, the means are different. Check {pval < 0.05}")

get test stats
The t-statistic is -0.17301690673118816, the p-value is 0.8626381875698816; If pvalue is small, the means are different. Check False
