In [37]:
import pandas as pd
import numpy as np
import xarray as xr
from configparser import ConfigParser
import sys
import os
import netCDF4 as nc
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.append('../application')
from BMNP import BMNP

In [38]:
def ncExists(lat, lon):
    # Check to see if file exists at following location
    ncloc = './templates/mur_mmm.nc'
    
    # If it doesn't, we create it.
    if not BMNP.fileExists(ncloc):
        # Create an nc file with name mur_mmm
        newnc = nc.Dataset(ncloc, 'w', format='NETCDF4')
        
        # Create dimensions
        newnc.createDimension('time', None)
        newnc.createDimension('lat', len(lat))
        newnc.createDimension('lon', len(lon))
        
        # Create variables
        newnc.createVariable('time', 'f8', ('time',))
        newnc.createVariable('lat', 'f8', ('lat',))
        newnc.createVariable('lon', 'f8', ('lon',))
        newnc.createVariable('mmm_sst', 'f8', ('time', 'lat', 'lon'))
        
        # Add lat and lon data to variables
        newnc.variables['lat'][:] = lat
        newnc.variables['lon'][:] = lon
        
        # Add time data to variable
        newnc.variables['time'].units = 'Month and Year Value'
        
        # Create the file at location
        newnc.close()

In [39]:
months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']

def after2020data(location):
    # Read Data into dataframe, making top row the column names, index leftmost column
    df = pd.read_csv(location, header=0, index_col=0)
    year = location[-12:-8]
    
    # load the following nc file at this location:
    latlon_nc = nc.Dataset('../data/refined/MUR/2020/01/20200101090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04_subsetted.nc4', 'r')
    
    # Get lat and lon data from nc file
    lat = latlon_nc.variables['lat'][:]
    lon = latlon_nc.variables['lon'][:]
    
    # Check to see if nc file exists.
    ncExists(lat, lon)
    
    # For loop cycling through months
    for month in months:
        monthyear = f'{year}{str(month)}'
        
        days = os.listdir(f'../data/refined/MUR/{year}/{month}')
        
        # Create empty numpy array named mmm
        mmm = []
        
        for day in days:
            file_loc = f'../data/refined/MUR/{year}/{month}/{day}'
            
            # Load in data from nc file
            ncfile = nc.Dataset(file_loc)
            
            # Extract "sst" and load this into a list.
            sst = ncfile.variables['sst'][:]
            
            # Make this a numpy array
            sst = np.array(sst)

            # If mmm np array is empty, make it equal to sst, otherwise add
            if len(mmm) == 0:
                mmm = sst
            else:
                mmm = mmm + sst
        
        # Divide mmm by the number of days in the month
        mmm = mmm / len(days)
        
        # Convert this mmm to df and save as csv, only if file does not already exist. Print shape
        if not BMNP.fileExists(f'mmm1.csv'):
            print("- Test 1 for Post-2020 Data")
            #mmm_df = pd.DataFrame(mmm)
            #mmm_df.to_csv(f'mmm1.csv')
            print(mmm.shape)
        
        # Open nc file, the data mmm is meant to be in the shape of the lat and lon, we want time to be a single value, equal to the monthyear.
        ncfile = nc.Dataset('./templates/mur_mmm.nc', 'a')

        # Get the current number of time stamps
        num_time_stamps = len(ncfile.dimensions['time'])

        # Create a new time stamp for the current monthyear
        ncfile.variables['time'][num_time_stamps] = monthyear

        # Add the new array of mmm values to the new time stamp
        ncfile.variables['mmm_sst'][num_time_stamps, :, :] = mmm

        # Close the nc file
        ncfile.close()

In [40]:
def addDataToArray(prev, new, day):
    # If asking if the previous array is empty, return the new array
    if len(prev) == 0:
        prev = new
    else:
        prev = prev + new
    day = day + 1
    
    return prev, day

In [41]:
def addDataToNC(prev, monthyear, lat, lon):
    # Convert the shape of prev from (25, 32) to (1, 25, 32)
    #prev = np.expand_dims(prev, axis=0)

    # File path to nc file
    ncfile_path = './templates/mur_mmm.nc'
    
    # Open the existing nc file
    ncfile = nc.Dataset(ncfile_path, 'a')

    # Get the current number of time stamps
    num_time_stamps = len(ncfile.dimensions['time'])

    # Create a new time stamp for the current monthyear
    ncfile.variables['time'][num_time_stamps] = str(monthyear)

    # Create a new numpy array with the desired shape
    new_array = np.zeros((1, 32, 25))

    # Assign the values from prev to the new array
    new_array[0, :, :] = prev
    
    # Save prev and new_arrau to csv file for testing if they do not exist.
    if not BMNP.fileExists('./templates/prev.csv'):
        # Convert prev to df and save to csv
        print(prev)
    if not BMNP.fileExists('./templates/new_array.csv'):
        # Convert new_array to df and save to csv
        print(new_array)
    
    # Convert new_array to df and save to csv
    if not BMNP.fileExists('./new_array.csv'):
        print("- Test 2 for Pre-2020 Data")
        #new_array_df = pd.DataFrame(new_array)
        #new_array_df.to_csv('./new_array.csv')
        print(new_array.shape)

    # Add the new numpy array of mmm values to the new time stamp
    ncfile.variables['mmm_sst'][num_time_stamps, :, :] = new_array

    # Close the nc file
    ncfile.close()


In [45]:
import datetime
def before2020data(nc):
    currentym = -1
    daysinmonth = 0
    currmonth = np.zeros((32, 25))
    
    for i in range(len(nc.variables['time'])):
        minlat = 12.009
        maxlat = 12.321
        
        minlon = -68.431
        maxlon = -68.189
        
        # Load the current data for the current time stamp
        data = nc.variables['analysed_sst'][i, :, :]
        lat = nc.variables['lat'][:]
        lon = nc.variables['lon'][:]
        
        # Convert data to a DataFrame
        data = pd.DataFrame(data, index=lat, columns=lon) # DATA IS GOOD
        
        # First filter the data by latitude
        data = (data.loc[minlat:maxlat, minlon:maxlon] - 273.15)
        
        # Covert the data (not the index or column names) to numpy array
        data = data.to_numpy()
        
        # Retrieve the data from the time stamp using datetime
        dayssince = nc.variables['time'][i] # days since 2002-06-01
        date = datetime.datetime(2002, 6, 1) + datetime.timedelta(days=int(dayssince))
        yearmonth = str(date.strftime('%Y%m'))
        
        if currentym == -1: currentym = yearmonth
        
        # If the current yearmonth is the same as the previous yearmonth, add the data to the current month array
        if currentym == yearmonth:
            currmonth, daysinmonth = addDataToArray(currmonth, data, daysinmonth)
        else:
            currmonth = currmonth / daysinmonth
            addDataToNC(currmonth, currentym, lat, lon)
            currentym = yearmonth
            daysinmonth = 0
            currmonth = []
            currmonth, daysinmonth = addDataToArray(currmonth, data, daysinmonth)

In [43]:
# Check to see if a file templates/coordinates.csv exists
file = 'templates/'


if not BMNP.fileExists(file):
    BMNP.createFolders(file)

In [46]:
# Folder Locations for MUR Data years 2020-22
murloc = '../data/csv/MUR/'

# Load in files in this location.
files = os.listdir(murloc)
delete = True
delete2 = True
    
# If delete = True, delete the nc
if delete and BMNP.fileExists('./templates/mur_mmm.nc'):
    os.remove('./templates/mur_mmm.nc')
    
if delete:
    print("Starting to add data after 2020")
    for i in range(len(files)):
        after2020data(f'{murloc}{files[i]}')
    
# We now open the nc file at the following location
ncfile = nc.Dataset('../data/refined/MUR_SST-Pre2020.nc')

if delete2 and BMNP.fileExists('./templates/before2020.nc'):
    os.remove('./templates/before2020.nc')

# Before 2020
print("Starting to add data before 2020")
before2020data(ncfile)

# Close the nc file
ncfile.close()

Starting to add data after 2020
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1 for Post-2020 Data
(1, 32, 25)
- Test 1