# getStats

Function takes a list of files (.nc) and returns statistical quantities of interest to tune our forcing. We need to compute the: 

- radiation anomaly (standard deviation?)
- maximum, average, and minimum shortwave radiation limits
- temperature mean
- temperature anomaly (standard deviation?)
- humidity mean 
- humidity anomaly (standard deviation?)
- precipitation mean
- precipitation anomaly (standard deviation?)
- the mean precipitation in any one event

In [3]:
import numpy as np
import xarray as xr
import glob

In [None]:
def getStats(rad_file_array, precip_file_array):
    
    rad_mean, rad_std, rad_max, rad_min = getMeanStdMaxMin(rad_file_array, "F_solar") 
    
    #temp_mean, temp_std, temp_max, temp_min = getMeanStdMaxMin(temp_file_array, "temperature")
    
    #hum_mean, hum_std, hum_max, hum_min = getMeanStdMaxMin(hum_file_array, "humidity")
    
    precip_mean, precip_std, precip_max, precip_min = getMeanStdMaxMin(precip_file_array, "precip")
    
    indiv_precip_event_mean = getPrecipEventMeansXARRAY(precip_file_array, threshold) # threshold = precip to consider an event
    
    ###
    stats_array = np.zeros(7) # initialize stats array
    
    # [rad_anom_norm, sw_limit, sw_average, sw_min, temp_mean, temp_anom_norm, hum_mean, hum_anom_mean, mean_precip, std_precip, indiv_event_mean] in getForcingFunctions
    
    stats_array[0] = rad_std
    stats_array[1] = rad_max
    stats_array[2] = rad_mean
    stats_array[3] = rad_min
    
    #stats_array[4] = temp_mean
    #stats_array[5] = temp_std
    
    #stats_array[6] = hum_mean
    #stats_array[7] = hum_std
    
    stats_array[8] = precip_mean
    stats_array[9] = precip_std
    
    stats_array[10] = indiv_precip_event_mean
    
    return stats_array

In [40]:
def getMeanStdMaxMin(filearray, data_var):
    
    N = len(filearray) 
    param_all_array = []
    param_max_array = []
    param_min_array = []
    
    for i in range(0, N):
        tmp_dataset = xr.open_dataset(filearray[i]) # Open ith summer's dataset
        
        tmp_param = tmp_dataset[data_var].values # open the "data_var" values for all summers
        
        tmp_param_max = np.amax(tmp_param) # take the file max
        tmp_param_min = np.amin(tmp_param) # take the file min
        
        param_max_array.append(tmp_param_max) # add file max to list
        param_min_array.append(tmp_param_min) # add file min to list
        param_all_array.append(tmp_param) # add values to total array
    
    param_all_array = np.concatenate(param_all_array) # concatenate arrays
    
    param_max = np.amax(param_max_array) # determine max of all files
    param_min = np.amin(param_min_array) # determine min of all files
    
    param_mean = np.mean(param_all_array) # determine mean of all files
    param_std = np.std(param_all_array) # determine standard deviation of all files
    
    return param_mean, param_std, param_max, param_min

In [None]:
def getPrecipEventMeansXARRAY(filearray, threshold):
    
    N_files = len(filearray) # number of files
    
    precip_all_array = []
    event_array = []
    tmp_event = []
    
    # loop through all files and find the following things:
    
    for i in range(0,N_files):
        
        tmp_dataset = xr.open_dataset(filearray[i]) # open a file's worth of data
        
        tmp_precip = tmp_dataset["precip"].values # extract precip values as a numpy array EDIT KEYWORD WHEN YOU KNOW MORE
    
        N_sum = len(tmp_precip.shape[1]) # number of summers
        
        for k in range(0, N_sum): # loop through the summers
            
            index_array = getEventIndicies(tmp_precip[:,k], threshold) # find within this numpy array where an event has occured relative to some threshold
    
            N_mins = len(tmp_precip.shape[0])
        
            for j in range(0, N_mins): 
                
                # loop through minute indices. if index j is in index array, then precip event happened at
                # that index. add those precip values to a temporary event array. 

                if j in index_array:
                    tmp_event.append(float(tmp_precip[j,k])) # append precipitation amount for an event

                    if j+1 in index_array: # if the next member of the array is also part of the event, keep looping
                        continue

                    else:
                        event_array.append(tmp_event) # if the next member is not part of the event, append the event to a list of all events
                        tmp_event = [] # clear temp event array

                else:
                    continue 
                    
    
    N_events = len(event_array)
    mean_array = []
    
    for i in range(0, N_events):
        tmp_mean = np.mean(event_array[i])
        mean_array.append(tmp_mean) # take mean of individual events and add them to an array
    
    return np.mean(mean_array) # mean of all events ("mean of the mean of individual events")

Everything below this is either:
- old pieces of code used as toy models for the above
- testing objects for code

In [5]:
def getEventIndicies(precip, threshold):
    
    return np.where(precip > threshold)[0]

In [11]:
def getPrecipEventMeans(precip):
    
    precip_all_array = []

    index_array = getEventIndicies(precip)
    
    N = len(precip)
    event_array = []
    tmp_event = []
    
    for i in range(0, N):
        
        if i in index_array:
            tmp_event.append(float(precip[i]))
            
            if i+1 in index_array:
                continue
            
            else:
                event_array.append(tmp_event)
                tmp_event = []
            
        else:
            continue
            
    N_events = len(event_array)
    mean_array = []
    
    for i in range(0, N_events):
        tmp_mean = np.mean(event_array[i])
        mean_array.append(tmp_mean)
    
            
    return np.mean(mean_array)

In [4]:
def getTestStats(filearray):
    
    N = len(filearray)
    Tup_all_array = []
    Tup_max_array = []
    
    for i in range(0, N):
        tmp_dataset = xr.open_dataset(filearray[i])
        
        tmp_Tup = tmp_dataset["c_f"].values
        tmp_Tup_max = np.amax(tmp_Tup)
        
        Tup_max_array.append(tmp_Tup_max)
        Tup_all_array.append(tmp_Tup)
    
    Tup_all_array = np.concatenate(Tup_all_array)
    
    Tup_max = np.amax(Tup_max_array)
    
    Tup_mean = np.mean(Tup_all_array)
    Tup_std = np.std(Tup_all_array)
    
    return Tup_mean, Tup_std, Tup_max

In [44]:
path = "/data/keeling/a/adammb4/SGP_proj_2021/DATA/"
files = glob.glob(path+"JCli_*.nc")

test = xr.open_dataset(files[0])

getMeanStdMaxMin(files, "F_solar")

(260.41965, 273.0409, 1000.0, 0.0)

In [48]:
precip = np.array([[0, 0, 0, 2, 3, 4, 2, 1, 0, 0, 1, 2, 0], [0, 1, 2, 0, 0,1,0,0,1,2,3,0,0]])

getPrecipEventMeans(precip)

TypeError: only size-1 arrays can be converted to Python scalars

In [46]:
a = np.array([[1, 2], [3, 4]])

a.shape[1]

2

In [None]:
np.mean(a, axis=1)