# Calculate monthly average


In [1]:
import numpy as np
import netCDF4 as nc
import xarray as xr
import pandas as pd 
import glob
from datetime import datetime
from calendar import monthrange
import time

%matplotlib inline

#### Parameters:

In [2]:
# domain dimensions:
jmin, jmax = 159, 799
imin, imax = 1139, 2179
isize = imax-imin
jsize = jmax-jmin

days_in_month = [monthrange(2002,mth)[1] for mth in range(1,13)]

#### Files:

In [3]:
# Modified ANHA12 mesh mask file (http://knossos.eas.ualberta.ca/anha/anhatable.php)
mfile        = xr.open_dataset('/ocean/brogalla/GEOTRACES/data/ANHA12/ANHA12_mask_Pb-20230213.nc')
mesh_lon     = mfile['nav_lon'].values
mesh_lat     = mfile['nav_lat'].values
tmask        = mfile['tmask'].values[0,:,imin:imax,jmin:jmax]
bathy_masked = np.ma.masked_where((tmask> 0.1), tmask) 
mesh_depth   = mfile['nav_lev'].values
# mesh_bathy = np.array(mesh.variables['hdept'][0])

#### Functions

In [4]:
def write_to_file(votemper, vosaline, year, month):
    
    file_write = xr.Dataset(
        {'vosaline' : (("deptht","y","x"), vosaline),
         'votemper' : (("deptht","y","x"), votemper)}, 
        coords = {
            "time_counter": np.zeros(1),
            "deptht": mesh_depth,
            "y": np.zeros(mesh_lat.shape[0]),
            "x": np.zeros(mesh_lon.shape[1])},
    )
    
    file_write.to_netcdf(f'/data/brogalla/ANHA12/new_averages/gridT_{year}_{month:02}.nc', unlimited_dims='time_counter')
    
    return

In [11]:
def calculate_monthly_average(folder_ref, year, month):
    # Calculate monthly average:
    file_list = list(np.sort(glob.glob(f'{folder_ref}ANHA12-EXH006_5d_gridT_y{year}m{month:02}d??.nc')))
    max_day   = datetime.strptime(file_list[-1][45:56],'y%Ym%md%d').day

    # if the last file in the list does not contain the last day in the month, also load the first file of the next month
    if max_day < days_in_month[month-1]:
        files_next_month = np.sort(glob.glob(f'{folder_ref}ANHA12-EXH006_5d_gridT_y{year}m{month+1:02}d??.nc'))
        file_list.append(files_next_month[0])
        
    # read in each data file
    monthly_sum_T = 0; monthly_sum_S = 0;
    sum_days = 0
    for file in file_list:
        ds  = xr.open_dataset(f'{file}')
        sal  = ds['vosaline'].values[0,:,:,:]
        temp = ds['votemper'].values[0,:,:,:]
        
        date = datetime.strptime(file[45:56],'y%Ym%md%d')
#         print(date)
        
        if (date.day - 5 >= 0) & (date.day <= days_in_month[month-1]):
            monthly_sum_T += temp*(5) # since these are five-day files
            monthly_sum_S += sal *(5) 
            sum_days +=5
            print(f'Normal case: {date}, multiply by 5')
        elif (date.month == month) & (date.day - 5 < 0):
            monthly_sum_T += temp*(date.day)
            monthly_sum_S += sal *(date.day)
            sum_days += date.day
            print(f'Start of month: {date}, multiply by {date.day}')
        elif (date.month != month) & (date.day - 5 < 0):
            monthly_sum_T += temp*(days_in_month[month-1] - max_day)
            monthly_sum_S += sal *(days_in_month[month-1] - max_day)
            sum_days += (days_in_month[month-1] - max_day)
            print(f'End of month: {date}, multiply by {(days_in_month[month-1] - max_day)}')
        
    monthly_T_ave = monthly_sum_T / days_in_month[month-1]
    monthly_S_ave = monthly_sum_S / days_in_month[month-1]
   
    print(sum_days, days_in_month[month-1])

    write_to_file(monthly_T_ave, monthly_S_ave, year, month)
  
    return monthly_T_ave, monthly_S_ave

In [None]:
for month in range(1,13):
    T_ave_200401, S_ave_200401 = calculate_monthly_average('/data/brogalla/ANHA12/', 2021, month)

#### Sea ice cover

In [29]:
def write_to_file(iocesflx, year, month):
    
    file_write = xr.Dataset(
        {'iocesflx' : (("time_counter","y","x"), iocesflx)}, 
        coords = {
            "time_counter": np.zeros(1),
            "y": np.zeros(mesh_lat.shape[0]),
            "x": np.zeros(mesh_lon.shape[1])},
    )
    
    file_write.to_netcdf(f'/data/brogalla/ANHA12/icemod_ave/ANHA12-EXH006_icemod_y{year}m{month:02}.nc', unlimited_dims='time_counter')
    
    return

In [30]:
def calculate_monthly_average(folder_ref, year, month):
    # Calculate monthly average:
    file_list = list(np.sort(glob.glob(f'{folder_ref}ANHA12-EXH006_y{year}m{month:02}d??_icemod.nc')))
    max_day   = datetime.strptime(file_list[-1][36:47],'y%Ym%md%d').day

    # if the last file in the list does not contain the last day in the month, also load the first file of the next month
    if max_day < days_in_month[month-1]:
        files_next_month = np.sort(glob.glob(f'{folder_ref}ANHA12-EXH006_y{year}m{month+1:02}d??_icemod.nc'))
        file_list.append(files_next_month[0])
        
    # read in each data file
    monthly_sum_T = 0; monthly_sum_S = 0;
    sum_days = 0
    for file in file_list:
        ds  = xr.open_dataset(f'{file}')
        sal  = ds['iocesflx'].values[:,:,:]
        
        date = datetime.strptime(file[36:47],'y%Ym%md%d')
#         print(date)
        
        if (date.day - 5 >= 0) & (date.day <= days_in_month[month-1]):
            monthly_sum_S += sal *(5) 
            sum_days +=5
            print(f'Normal case: {date}, multiply by 5')
        elif (date.month == month) & (date.day - 5 < 0):
            monthly_sum_S += sal *(date.day)
            sum_days += date.day
            print(f'Start of month: {date}, multiply by {date.day}')
        elif (date.month != month) & (date.day - 5 < 0):
            monthly_sum_S += sal *(days_in_month[month-1] - max_day)
            sum_days += (days_in_month[month-1] - max_day)
            print(f'End of month: {date}, multiply by {(days_in_month[month-1] - max_day)}')
        
    monthly_S_ave = monthly_sum_S / days_in_month[month-1]
   
    print(sum_days, days_in_month[month-1])

    write_to_file(monthly_S_ave, year, month)
  
    return monthly_S_ave

In [None]:
for month in range(1,13):
    S_ave_200401 = calculate_monthly_average('/data/brogalla/ANHA12/', 2021, month)

#### Mn extended domain run:

In [4]:
def write_Mn_to_file(Mn, year, month):
    
    file_write = xr.Dataset(
        {'oxidismn' : (("deptht","y","x"), Mn)}, 
        coords = {
            "time_counter": np.zeros(1),
            "deptht": mesh_depth,
            "y": np.zeros(mesh_lat[imin:imax,jmin:jmax].shape[0]),
            "x": np.zeros(mesh_lon[imin:imax,jmin:jmax].shape[1])},
    )
    
    file_write.to_netcdf(f'/data/brogalla/run_storage/Mn-extended-domain-202210/oMn_y{year}m{month:02}.nc', unlimited_dims='time_counter')
    
    return

In [5]:
def calculate_monthly_oMn_average(folder_ref, year, month):
    # Calculate monthly average: 
    file_list = list(np.sort(glob.glob(f'{folder_ref}'+\
                                       f'ANHA12_EXH006_5d_{year}0101_{year}1231_ptrc_T_{year}????-{year}{month:02}??.nc')))
    max_day   = datetime.strptime(file_list[-1][128:136],'%Y%m%d').day

    # if the last file in the list does not contain the last day in the month, also load the first file of the next month
    if max_day < days_in_month[month-1]:
        files_next_month = np.sort(glob.glob(f'{folder_ref}'+\
                                             f'ANHA12_EXH006_5d_{year}0101_{year}1231_ptrc_T_{year}{month:02}??-{year}{month+1:02}??.nc'))
        file_list.append(files_next_month[0])
        
    # read in each data file
    monthly_sum_oMn = 0;
    sum_days = 0
    for file in file_list:
        ds   = xr.open_dataset(f'{file}')
        omn  = ds['oxidismn'].values[0,:,:,:]
        
        date = datetime.strptime(file[128:136],'%Y%m%d')
        if (date.day - 5 >= 0) & (date.day <= days_in_month[month-1]):
            monthly_sum_oMn += omn*(5) # since these are five-day files
            sum_days +=5
            print(f'Normal case: {date}, multiply by 5')
        elif (date.month == month) & (date.day - 5 < 0):
            monthly_sum_oMn += omn*(date.day)
            sum_days += date.day
            print(f'Start of month: {date}, multiply by {date.day}')
        elif (date.month != month) & (date.day - 5 < 0):
            monthly_sum_oMn += omn*(days_in_month[month-1] - max_day)
            sum_days += (days_in_month[month-1] - max_day)
            print(f'End of month: {date}, multiply by {(days_in_month[month-1] - max_day)}')
        
    monthly_oMn_ave = monthly_sum_oMn / days_in_month[month-1]
   
    print(sum_days, days_in_month[month-1])

    write_Mn_to_file(monthly_oMn_ave, year, month)
  
    return monthly_oMn_ave

In [6]:
for year in range(2020,2022):
    for month in range(1,13):
        oMn_ave = calculate_monthly_oMn_average(f'/data/brogalla/run_storage/Mn-extended-domain-202210/Mn-extended-{year}-202311/', \
                                                year, month)

Normal case: 2020-01-05 00:00:00, multiply by 5
Normal case: 2020-01-10 00:00:00, multiply by 5
Normal case: 2020-01-15 00:00:00, multiply by 5
Normal case: 2020-01-20 00:00:00, multiply by 5
Normal case: 2020-01-25 00:00:00, multiply by 5
Normal case: 2020-01-30 00:00:00, multiply by 5
End of month: 2020-02-04 00:00:00, multiply by 1
31 31
Start of month: 2020-02-04 00:00:00, multiply by 4
Normal case: 2020-02-09 00:00:00, multiply by 5
Normal case: 2020-02-14 00:00:00, multiply by 5
Normal case: 2020-02-19 00:00:00, multiply by 5
Normal case: 2020-02-24 00:00:00, multiply by 5
End of month: 2020-03-01 00:00:00, multiply by 4
28 28
Start of month: 2020-03-01 00:00:00, multiply by 1
Normal case: 2020-03-06 00:00:00, multiply by 5
Normal case: 2020-03-11 00:00:00, multiply by 5
Normal case: 2020-03-16 00:00:00, multiply by 5
Normal case: 2020-03-21 00:00:00, multiply by 5
Normal case: 2020-03-26 00:00:00, multiply by 5
Normal case: 2020-03-31 00:00:00, multiply by 5
31 31
Normal case: 2

#### Phosphate biomass from BLING:

In [4]:
def write_biomass_to_file(biomass, fpop, jp_pop, year, month):
    
    file_write = xr.Dataset(
        {'biomass_p' : (("deptht","y","x"), biomass),
         'fpop' : (("deptht","y","x"), fpop),
         'jp_pop' : (("deptht","y","x"), jp_pop)}, 
        coords = {
            "time_counter": np.zeros(1),
            "deptht": mesh_depth,
            "y": np.zeros(800),
            "x": np.zeros(544)},
    )
    
    file_write.to_netcdf(f'/data/brogalla/ANHA4/BLING-EPM151/ANHA4-EPM151_y{year}m{month:02}.nc', unlimited_dims='time_counter')
    
    return

In [6]:
# Similar function but for BLING:
# folder = '/ocean/brogalla/GEOTRACES/BLING-EMP101/'
# file = 'ANHA4-EPM101_y2002m01d05_gridB.nc'

def calculate_monthly_biomass_average(folder_ref, year, month):
    # Calculate monthly average:
    file_list = list(np.sort(glob.glob(f'{folder_ref}ANHA4-EPM151_y{year}m{month:02}d??_gridB.nc')))
    print(file_list[-1])
    max_day   = datetime.strptime(file_list[-1][47:58],'y%Ym%md%d').day

    # if the last file in the list does not contain the last day in the month, also load the first file of the next month
    if max_day < days_in_month[month-1]:
        files_next_month = np.sort(glob.glob(f'{folder_ref}ANHA4-EPM151_y{year}m{month+1:02}d??_gridB.nc'))
        file_list.append(files_next_month[0])
        
    # read in each data file
    monthly_sum_biomass = 0; monthly_sum_jppop = 0; monthly_sum_fpop = 0;
    sum_days = 0
    for file in file_list:
        ds      = xr.open_dataset(f'{file}')
        biomass = ds['biomass_p'].values[0,:,:,:]
        jp_pop  = ds['jp_pop'].values[0,:,:,:]
        fpop    = ds['fpop'].values[0,:,:,:]
        
        date    = datetime.strptime(file[47:58],'y%Ym%md%d')
        
        if (date.day - 5 >= 0) & (date.day <= days_in_month[month-1]):
            monthly_sum_biomass += biomass*(5) # since these are five-day files
            monthly_sum_jppop   += jp_pop*(5) # since these are five-day files
            monthly_sum_fpop    += fpop*(5) # since these are five-day files
            sum_days +=5
            print(f'Normal case: {date}, multiply by 5')
        elif (date.month == month) & (date.day - 5 < 0):
            monthly_sum_biomass += biomass*(date.day)
            monthly_sum_jppop   += jp_pop*(date.day)
            monthly_sum_fpop    += fpop*(date.day)
            sum_days += date.day
            print(f'Start of month: {date}, multiply by {date.day}')
        elif (date.month != month) & (date.day - 5 < 0):
            monthly_sum_biomass += biomass*(days_in_month[month-1] - max_day)
            monthly_sum_jppop   += jp_pop*(days_in_month[month-1] - max_day)
            monthly_sum_fpop    += fpop*(days_in_month[month-1] - max_day)
            sum_days += (days_in_month[month-1] - max_day)
            print(f'End of month: {date}, multiply by {(days_in_month[month-1] - max_day)}')
        
    monthly_biomass_ave = monthly_sum_biomass / days_in_month[month-1]
    monthly_jppop_ave   = monthly_sum_jppop   / days_in_month[month-1]
    monthly_fpop_ave    = monthly_sum_fpop    / days_in_month[month-1]
   
    print(sum_days, days_in_month[month-1])

    write_biomass_to_file(monthly_biomass_ave, monthly_jppop_ave, monthly_fpop_ave, year, month)
  
    return monthly_biomass_ave

In [None]:
for year in range(2002,2005):
    for month in range(1,13):
        bio_ave_200201 = calculate_monthly_biomass_average('/data/brogalla/ANHA4/BLING-EPM151/', year, month)

In [None]:
for year in range(2019,2022):
    for month in range(1,13):
        bio_ave_200201 = calculate_monthly_biomass_average('/data/brogalla/ANHA4/BLING-EPM151/', year, month)