In [None]:
import os
import pandas as pd
import re
import numpy as np
import netCDF4 as nc
from PIL import Image
import datetime 
import imageio
import os 
import matplotlib.pyplot as plt
import math
import xarray as xr

# Load Site Data

In [None]:
sites_df = pd.read_csv('site.csv')
sites = [[row['label'], row['lat'], row['long']] for index, row in sites_df.iterrows()]
sites[:10]

# Load single NetCDF dataset for the lat and lon grid, and snap sites to grid

In [None]:
ds = nc.Dataset("cmems_obs_oc_nws_bgc_tur-spm-chl_nrt_l4-hr-mosaic_P1D-m_1677243373097.nc")
lats = ds['lat'][:].data
lons = ds['lon'][:].data

In [None]:
lat = sites[2][1]
lon = sites[2][2]
print(lat)
print(lats[np.argmin(np.abs(lats - lat))])
print(lon)
print(lons[np.argmin(np.abs(lons - lon))])
lat_grid_left = lats[np.argmin(np.abs(lats - lat))-j]
lon_grid_left = lons[np.argmin(np.abs(lons - lon))-j]
lat_grid_right = lats[np.argmin(np.abs(lats - lat))+j]
lon_grid_right = lons[np.argmin(np.abs(lons - lon))+j]
print(lat_grid_left)
print(lon_grid_left)
print(lat_grid_right)
print(lon_grid_right)

In [None]:
%%time
sites_grid = []
for i in range(len(sites)):
    site = sites[i]
    lat_orig = site[1]
    lon_orig = site[2]
    
    j = 5 # this returns 11 x 11 grid with site at the center
    
    lat_grid = lats[np.argmin(np.abs(lats - lat_orig))]
    lon_grid = lons[np.argmin(np.abs(lons - lon_orig))]
    
    lat_grid_left = lats[np.argmin(np.abs(lats - lat_orig))-j]
    lon_grid_left = lons[np.argmin(np.abs(lons - lon_orig))-j]
    
    lat_grid_right = lats[np.argmin(np.abs(lats - lat_orig))+j]
    lon_grid_right = lons[np.argmin(np.abs(lons - lon_orig))+j]
    
    sites_grid.append([site[0], lat_orig, lon_orig, lat_grid, lon_grid, lat_grid_left, lon_grid_left, lat_grid_right, lon_grid_right])
    
sites_grid[:10]

# Loop through multiple sites

## Trial with one site

In [None]:
site = sites_grid[0]
lat = site[3]
lon = site[4]

print(site)

In [None]:
# general
username = "slai"
password = "Capstone2023"
output_directory = "/Users/SallyLai/Desktop/LSE/Masters/Capstone/capstone-project-sewage-pollution/copernicus_code"
product_id = "OCEANCOLOUR_NWS_BGC_HR_L4_NRT_009_209-TDS"
dataset_id = "cmems_obs_oc_nws_bgc_tur-spm-chl_nrt_l4-hr-mosaic_P1D-m"

# site info
i = 0
site = sites_grid[i]
site_name = site[0]
lat_grid_max = site[5]
lat_grid_min = site[7]
lon_grid_max = site[8]
lon_grid_min = site[6]

# query specifications
output_filename = f"{site_name}.nc".replace(" ", "_")
date_min = "2022-03-01 00:00:00"
date_max = "2022-10-31 23:59:59"

# query
query = ('python -m motuclient --motu https://nrt.cmems-du.eu/motu-web/Motu --service-id ' + product_id + ' --product-id ' + dataset_id + 
         f' --longitude-min {lon_grid_min} --longitude-max {lon_grid_max} --latitude-min {lat_grid_min} --latitude-max {lat_grid_max} ' +
         f'--date-min {date_min} --date-max {date_max} ' +
         '--variable CHL --variable SPM --variable TUR ' +
         '--out-dir ' + output_directory + ' --out-name ' + output_filename +
         ' --user ' + username + ' --pwd ' + password)

os.system(query)

In [None]:
trial_xr = xr.open_dataset(output_filename)
dates_list = [str(pd.Timestamp(trial_xr['time'][:].data[i]).date()) for i in range(237)]

In [None]:
def site_local_plot_location(ds, time, variable):
    '''
    ds: .nc Dataset for a single location with data in numpy array of shape (11,11)  \
    of date range 01/03/2022 to 31/10/2022 (string)
    
    time: index from 0 to 236 (integer)
    
    variable: 'CHL' or 'TUR' or 'SPM' (string)
    '''
    lats = ds['lat'][:].data
    lons = ds['lon'][:].data

    arr = trial[variable][time,:,:].data

    # Basic plot
    plt.imshow(arr, cmap='viridis', interpolation='nearest')

    # Add highlighted point of Site Location
    plt.scatter(5, 5, s=100, marker='o', facecolors='r')

    # Plot colorbar
    plt.colorbar()

    # Changing Ticks to match latitude and longitude
    new_xticks = [round(i, 3) for i in lons[::5]]
    plt.xticks([0,5,10], new_xticks)
    new_yticks = [round(i, 5) for i in lats[::5]]
    plt.yticks([0,5,10], new_yticks)
    
    # Add Title
    plt.title(f'Time = {dates_list[time]}, {variable}')

In [None]:
trial = nc.Dataset(output_filename)
site_local_plot_location(trial, 1, 'CHL')

In [None]:
fig, axs = plt.subplots(nrows=60, ncols=4, figsize=(25, 300))

for i, ax in enumerate(axs.flatten()):
    if i < 237:
        site_local_plot_location(trial, i, 'CHL')
        plt.sca(ax)
        plt.xticks([])
        plt.yticks([])
        ax.set_facecolor('black')
    else:
        ax.axis('off')
        
plt.subplots_adjust(wspace=0.2, hspace=0.6)
plt.show()

In [None]:
df = trial_xr.to_dataframe()
df['Site'] = site_name
df

# Loop through all sites

## Download .nc files

In [None]:
%%time
# general
username = "slai"
password = "Capstone2023"
output_directory = "/Users/SallyLai/Desktop/LSE/Masters/Capstone/capstone-project-sewage-pollution/copernicus_code/data_by_site"
product_id = "OCEANCOLOUR_NWS_BGC_HR_L4_NRT_009_209-TDS"
dataset_id = "cmems_obs_oc_nws_bgc_tur-spm-chl_nrt_l4-hr-mosaic_P1D-m"

# Loop through sites_grid (lists of list, with each list corresponding to a site and its corresponding lon/lat markings)
for site in sites_grid[250:]:
    # site info
    site_name = site[0].replace("(","").replace(")","").replace("`","")
    lat_grid_max = site[5]
    lat_grid_min = site[7]
    lon_grid_max = site[8]
    lon_grid_min = site[6]

    # query specifications
    output_filename = f"{site_name}.nc".replace(" ", "_")
    date_min = "2022-03-01 00:00:00"
    date_max = "2022-10-31 23:59:59"

    # query
    query = ('python -m motuclient --motu https://nrt.cmems-du.eu/motu-web/Motu --service-id ' + product_id + ' --product-id ' + dataset_id + 
             f' --longitude-min {lon_grid_min} --longitude-max {lon_grid_max} --latitude-min {lat_grid_min} --latitude-max {lat_grid_max} ' +
             f'--date-min {date_min} --date-max {date_max} ' +
             '--variable CHL --variable SPM --variable TUR ' +
             '--out-dir ' + output_directory + ' --out-name ' + output_filename +
             ' --user ' + username + ' --pwd ' + password)

    os.system(query)  

In [None]:
[site[0] for site in sites_grid[:30]]

In [None]:
# Check missing sites
path = '/Users/SallyLai/Desktop/LSE/Masters/Capstone/capstone-project-sewage-pollution/copernicus_code/data_by_site'
files_now = [i.replace(".nc", "").replace("_", " ") for i in os.listdir(path)]
files_all = [site[0].replace("(","").replace(")","").replace("`","") for site in sites_grid]
diff = list(set(files_all) - set(files_now))
diff

Mother Ivey`s Bay
St Mary's Bay (Kent)
Anstey's Cove (Torquay)
St Mary's Bay (Devon)

In [None]:
pd.set_option('display.max_rows', 500)

In [None]:
[site.replace("(","").replace(")","").replace("`","") for site in pd.DataFrame(sites_grid)[0]][:500]

In [None]:
pd.DataFrame(sites_grid)[0].str.replace("(","").str.replace(")","").str.replace("`","").isin(diff).sum()

In [None]:
np.array(pd.DataFrame(sites_grid)[mask])

In [None]:
sites_grid_missing = np.array(pd.DataFrame(sites_grid)[pd.DataFrame(sites_grid)[0].str.replace("(","").str.replace(")","").str.replace("`","").isin(diff)])

In [None]:
sites_grid_missing

In [None]:
sites_grid_missing[1:]

In [None]:
%%time
# general
username = "slai"
password = "Capstone2023"
output_directory = "/Users/SallyLai/Desktop/LSE/Masters/Capstone/capstone-project-sewage-pollution/copernicus_code/data_by_site"
product_id = "OCEANCOLOUR_NWS_BGC_HR_L4_NRT_009_209-TDS"
dataset_id = "cmems_obs_oc_nws_bgc_tur-spm-chl_nrt_l4-hr-mosaic_P1D-m"

# Change the following number each time
start_index = 10
end_index = 30

# Loop
for site in sites_grid_missing[1:]:
    # site info
    site_name = site[0].replace("(","").replace(")","").replace("`","").replace("'","")
    lat_grid_max = site[5]
    lat_grid_min = site[7]
    lon_grid_max = site[8]
    lon_grid_min = site[6]

    # query specifications
    output_filename = f"{site_name}.nc".replace(" ", "_")
    date_min = "2022-03-01 00:00:00"
    date_max = "2022-10-31 23:59:59"

    # query
    query = ('python -m motuclient --motu https://nrt.cmems-du.eu/motu-web/Motu --service-id ' + product_id + ' --product-id ' + dataset_id + 
             f' --longitude-min {lon_grid_min} --longitude-max {lon_grid_max} --latitude-min {lat_grid_min} --latitude-max {lat_grid_max} ' +
             f'--date-min {date_min} --date-max {date_max} ' +
             '--variable CHL --variable SPM --variable TUR ' +
             '--out-dir ' + output_directory + ' --out-name ' + output_filename +
             ' --user ' + username + ' --pwd ' + password)

    os.system(query)  

## Convert to CSV and merge

In [2]:
import xarray as xr
import pandas as pd
import glob

In [9]:
nc_files = glob.glob('/Users/SallyLai/Desktop/LSE/Masters/Capstone/capstone-project-sewage-pollution/copernicus_code/data_by_site/*.nc')
ds_list = [xr.open_dataset(file) for file in nc_files]

In [18]:
nc_files

['/Users/SallyLai/Desktop/LSE/Masters/Capstone/capstone-project-sewage-pollution/copernicus_code/data_by_site/Newbiggin_North.nc',
 '/Users/SallyLai/Desktop/LSE/Masters/Capstone/capstone-project-sewage-pollution/copernicus_code/data_by_site/Goodrington.nc',
 '/Users/SallyLai/Desktop/LSE/Masters/Capstone/capstone-project-sewage-pollution/copernicus_code/data_by_site/Watergate_Bay.nc',
 '/Users/SallyLai/Desktop/LSE/Masters/Capstone/capstone-project-sewage-pollution/copernicus_code/data_by_site/Tunstall.nc',
 '/Users/SallyLai/Desktop/LSE/Masters/Capstone/capstone-project-sewage-pollution/copernicus_code/data_by_site/Ansteys_Cove_Torquay.nc',
 '/Users/SallyLai/Desktop/LSE/Masters/Capstone/capstone-project-sewage-pollution/copernicus_code/data_by_site/Frensham_Great_Pond.nc',
 '/Users/SallyLai/Desktop/LSE/Masters/Capstone/capstone-project-sewage-pollution/copernicus_code/data_by_site/Marske_Sands.nc',
 '/Users/SallyLai/Desktop/LSE/Masters/Capstone/capstone-project-sewage-pollution/copernicu

In [21]:
file_names = [file.split('/')[-1].replace(".nc", "") for file in nc_files]

In [22]:
ds_list_converted = []
for i, ds in enumerate(ds_list):
    df = ds.to_dataframe()
    df['site'] = file_names[i]
    ds_list_converted.append(df)

In [24]:
final_ds = pd.concat(ds_list_converted)
final_ds

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,TUR,SPM,CHL,site
time,lat,lon,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-03-01,55.189352,-1.519043,,,,Newbiggin_North
2022-03-01,55.189352,-1.517296,,,,Newbiggin_North
2022-03-01,55.189352,-1.515549,,,,Newbiggin_North
2022-03-01,55.189352,-1.513802,,,,Newbiggin_North
2022-03-01,55.189352,-1.512055,,,,Newbiggin_North
...,...,...,...,...,...,...
2022-10-31,50.277315,-3.893256,2.006134,1.186131,1.828091,Bigbury-on-Sea_South
2022-10-31,50.277315,-3.891509,0.708182,0.406647,1.046769,Bigbury-on-Sea_South
2022-10-31,50.277315,-3.889762,1.258156,0.729469,1.327342,Bigbury-on-Sea_South
2022-10-31,50.277315,-3.888015,1.037095,0.598882,1.254974,Bigbury-on-Sea_South


In [25]:
final_ds.to_csv("sites_data_11x11.csv")