In [2]:
# Given hcast or cross, and given a region:
#  - slice up the observed anomaly dataset into 'sic_obsr': the correct time period and region. Validpoints.
#  - direct our path to the hcast or cross folder, where we can get our zz matrices.
def hcast_or_cross(matrix_type, region):
    if matrix_type == 'hcast':
        os.chdir("/d6/bxw2101/model_files/hcast_output")
        sic_obsr = sic_obs_h.where(region)
    elif matrix_type == 'cross':
        os.chdir("/d6/bxw2101/model_files/cross_output")
        sic_obsr = sic_obs_c.where(region)
    else:
        raise Exception('You dummy. first parameter needs be \'hcast\' or \'cross\'.'+
                        '\nSo we know what model we are comparing.')
    return sic_obsr

### The basic Corr/RMSE plots creator function: given corr/rmse matrices, plot them pretty.

In [1]:
# Basic Function used to plot the corr and rmse matrices.
def plot_corrs_rmses(flen, matrix_type, avg_corrs, avg_rmse, persistence_c, persistence_r, clegend = []):
    #__Variables:
    #  flen: length of the list of model file names
    #  avg_corrs: correlation to leadtime array for each model file (to be plotted)
    #  avg_rmse: rmse to leadtime array for each model file (to be plotted)
    #  matrix_type: hcast or cross. 
    #NOPE: zz_files: the actual list of model file names
    #  persistence_c: correlation persistence. (to be plotted)
    #  persistence_r: rmse persistence. (to be plotted)
    #  clegend: graph legend for display. just the list of model files.    
    
    # defining the graph's legend:
    if len(clegend) == 0:
        clegend = [zz_files[i].split('-')[0] for i in range(len(zz_files))]
        
    xticks = np.arange(1, 13)
    
    #building out the correlation plot: first graph the persistence correlation, then the model correlations.
    fig = plt.figure(figsize=(14,10))
    plt.suptitle('Correlation and RMSE compared to lead time: ' + matrix_type, fontsize=30)
    plt.plot(xticks, persistence_c, linewidth = 3, color = 'k', linestyle='dashed')
    for i in range(flen):
        plt.plot(xticks, avg_corrs[i], linewidth=3)
        
    #format and name the correlation plot.
    plt.yticks(fontsize=20)
    plt.xticks(xticks, fontsize=20);
    plt.ylabel('Correlation Coefficient', fontsize=30)
    plt.xlabel('Lead Time (Months)', fontsize=30)
    plt.legend(['persistence'] + clegend)#, prop={'size': 30})
    plt.show()

    #building out the rmse plot: first graph persistence rmse, then the model rmses.
    fig = plt.figure(figsize=(14,10))
    plt.plot(xticks, persistence_r*100, linewidth = 3, color = 'k', linestyle='dashed')
    for i in range(flen):
        plt.plot(xticks, avg_rmse[i]*100, linewidth = 3)
        
    #format and name the rmse plot.
    plt.yticks(fontsize=20)
    plt.xticks(xticks, fontsize=20)
    plt.ylabel('RMS Error (%)', fontsize=30)
    plt.xlabel('Lead Time (Months)', fontsize=30)
    plt.legend(['persistence'] +  clegend)#, prop={'size': 30})
    plt.show()

## defining a function that will plot the corr/rmse files you input, in a list.

In [9]:
# Manually enter the filenames as a list.
# can return a lot of info, or nothing. u can change it.
def leadtime_plot(matrix_type, zz_files, region, clegend = []):
    # matrix_type: hcast or cross.
    # zz_files: the list of zz file names. to plot
    # region: where to calculate the correlation and rmse.
    # clegend: graph legend for display.
    
    # Define the legend here: Just the file name. Includes: variables, weights, mode #.
    if len(clegend) == 0:
        clegend = zz_files
        # the following does not include weights:
        #clegend = [zz_files[i].split('-')[0] + '-' + zz_files[i].split('-')[-1] for i in range(len(zz_files))]
        
    # Calculate UNweighted mean of the observed region SIC anomaly. correct time period. only validpoints. smoothed.
    sic_obsr = hcast_or_cross(matrix_type, region)
    sic_obsr_mean = sic_obsr.mean(dim=['x','y'])
    
    # Calculate persistence of the observed anomalies
    persistence_c = np.zeros(12)
    persistence_r = np.zeros(12)
    for lag in range(1,13):
        persistence_c[lag-1] = xr.corr(sic_obsr_mean.shift(tdim=-lag), sic_obsr_mean, dim='tdim').data
        persistence_r[lag-1] = xs.rmse(sic_obsr_mean.shift(tdim=-lag), sic_obsr_mean, dim='tdim', skipna=True).data
    
    # Now, for every zz file: calculate the correlation between observed and model anomalies.
    flen = len(zz_files)
    print("# of zz matrices to plot: " + str(flen))

    avg_corrs = np.zeros([flen, 12])
    avg_rmse = np.zeros([flen, 12])
    for i in range(flen):
        # Load in the model file.
        zz_name = zz_files[i]
        print('Loaded in ' + zz_name)
        
        # Calculate UNweighted mean of the cross validation zz matrix' region.
        sic_zz =xr.open_dataset(zz_name).__xarray_dataarray_variable__
        sic_zzr = sic_zz.where(region).where(validpoints)
        sic_zzr_mean = sic_zzr.mean(dim=['x','y'])
        
        # Calculate correlation and rmse btwn observed and model anomalies.
        for lead in range(1, 13):
            avg_corrs[i, lead-1] = xr.corr(sic_obsr_mean, sic_zzr_mean.sel(lead=lead), dim='tdim').data
            avg_rmse[i, lead-1] = xs.rmse(sic_obsr_mean, sic_zzr_mean.sel(lead=lead), dim='tdim', skipna=True).mean().data
             
    # Toss the avg_corrs matrix and avg_rmse matrix into the plot_cors_rmses() function.    
    plot_corrs_rmses(flen, matrix_type, avg_corrs, avg_rmse, persistence_c, persistence_r, clegend)
    
    # return a bunch of things, cuz why not.
    #return flen, matrix_type, zz_files, region, avg_corrs, avg_rmse, persistence_c, persistence_r
    return avg_corrs, avg_rmse



In [1]:
def ayo(x):
    print('we just testing to see if utils was imported properly.')

# bruh eventually build the model to just take in 1979-2000 data tbh... like... recreate the ENTIRE THING.... it'll take so long but it might be worth it to figure out if ur model is running well or not. I think it's safe to assume the model is running well rn tho.

## Testing to try and recreate the 1979-2000 data:
- in the future, when u run the model with the 1979-2000 data just to make sure our model works, create a new file. save the output as new file names. that will take a while. just so u know.

In [None]:
# # Give the output a full file name in the format:
# # variables_separated_by_underscores - weights*separated*by*astericks - #ofmodes
# complete_fname = f_name+'-'+ str(modes) + '.nc'
# print('Building hcast and cross for: '+ complete_fname)

# # run the solver here.
# solver = MultivariateEof(model_vars,
#                      weights = model_wgts, center=False)
# nvars = nv-1 # nvars: number of variables NOT sic. ex. sst, sat: nvars = 2. i=0, 1.

# #____________________Solve for MEOFs, PCs
# # define the lat, lon coordinates for the variables.
# sic_lat_bins = np.arange(-89.5, -49.5, 0.5)
# sic_lon_bins = np.arange(0., 360, 1)
# vars_lat_bins = np.arange(-88., -49, 2)
# vars_lon_bins = np.arange(0., 359, 2)

# # get the eofs, pcs, eigenvalues here.
# np_eofs_list = solver.eofs(neofs=modes) # scale by something here?
# np_eofscov_list = solver.eofsAsCovariance(neofs=modes)
# np_pcs = solver.pcs(npcs=modes)
# np_eigs = solver.eigenvalues(neigs=modes) 
# sum_eigs = solver.eigenvalues().sum()
# percent_var = np_eigs.sum() / solver.eigenvalues().sum()
# tav = solver.totalAnomalyVariance()
# vf = solver.varianceFraction()

# # turn the list of eofs for each variable used in the model into SIC_EOFs, and the rest of the eofs.
# sic_eofs = xr.DataArray(np_eofs_list[0],
#                        coords = [np.arange(0, modes), sic_lat_bins, sic_lon_bins],
#                         dims=['mode', 'y', 'x']) * stds[0]
# all_vars_eofs = [sic_eofs] # all_vars_eofs is eventually what has all the eofs to be plotted.
# for i in range(nvars):
#     all_vars_eofs.append(xr.DataArray(np_eofs_list[i + 1],
#                        coords = [np.arange(0, modes), vars_lat_bins, vars_lon_bins],
#                         dims=['mode', 'y', 'x']) * stds[i + 1])

# # for each mode, get the pc timeseries for all the eofs.
# pcs = xr.DataArray(np_pcs, coords=[sic_anom.tdim, np.arange(0, modes)],
#                   dims=['tdim', 'mode'])