In [2]:
import netCDF4
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gsw
import os

In [14]:
# blank final df
df_final = pd.DataFrame(columns=['time', 'depth', 'temperature', 'salinity'])

In [None]:
# path = "./../../ukmet/EN.4.2.2.analyses.l09.2000/EN.4.2.2.f.analysis.l09.200001.nc"

In [None]:
# ds = xr.open_dataset(path)

# # Find profiles near 6S 10W
# mask = (abs(ds['lat'] + 6) < 0.5) & (abs((ds['lon'] % 360) - 350) < 0.5)
# nearby_profiles = ds.where(mask, drop=True)

# print(nearby_profiles['lat'])
# print(nearby_profiles['lon'])


<xarray.DataArray 'lat' (lat: 1)> Size: 4B
array([-6.], dtype=float32)
Coordinates:
  * lat      (lat) float32 4B -6.0
Attributes:
    long_name:      latitude
    units:          degrees_north
    standard_name:  latitude
<xarray.DataArray 'lon' (lon: 1)> Size: 4B
array([350.], dtype=float32)
Coordinates:
  * lon      (lon) float32 4B 350.0
Attributes:
    long_name:      longitude
    units:          degrees_east
    standard_name:  longitude


So we have exact 6S10W, i.e. -6.0 lat, 350.0 lon. We choose this profile.

In [None]:
# # convert to pandas dataframe
# df = nearby_profiles.to_dataframe().reset_index()
# df.head()


Unnamed: 0,time,depth,lat,lon,bnds,temperature,salinity,temperature_uncertainty,salinity_uncertainty,temperature_observation_weights,salinity_observation_weights,time_bnds,depth_bnds
0,2000-01-16 12:00:00,5.02159,-6.0,350.0,0,299.313027,35.870613,0.621887,0.084526,1.007349,0.988412,2000-01-01,0.0
1,2000-01-16 12:00:00,5.02159,-6.0,350.0,1,299.313027,35.870613,0.621887,0.084526,1.007349,0.988412,2000-02-01,10.0475
2,2000-01-16 12:00:00,15.07854,-6.0,350.0,0,299.336867,35.918091,0.66275,0.095002,1.003931,0.929268,2000-01-01,10.0475
3,2000-01-16 12:00:00,15.07854,-6.0,350.0,1,299.336867,35.918091,0.66275,0.095002,1.003931,0.929268,2000-02-01,20.115801
4,2000-01-16 12:00:00,25.16046,-6.0,350.0,0,299.290673,36.04855,0.609222,0.106646,1.005579,0.915094,2000-01-01,20.115801


In [None]:
# # keep only date in 'time' column
# df['time'] = pd.to_datetime(df['time'].dt.strftime('%Y-%m-%d'))
# # drop unneeded columns
# lat = -6.0
# lon = 350.0
# df = df.drop(columns=['lat', 'lon'])

# # drop NaN values
# df = df.dropna()

# # we only need columns 'time', 'depth', 'temperature', 'salinity'
# # remove rows with bnds col value 1 first
# df = df[df['bnds'] == 0]
# df = df[['time', 'depth', 'temperature', 'salinity']]

# # convert temperature to celsius
# df['temperature'] = df['temperature'] - 273.15

In [None]:
# df.head()

Unnamed: 0,time,depth,temperature,salinity
0,2000-01-16,5.02159,26.163027,35.870613
2,2000-01-16,15.07854,26.186867,35.918091
4,2000-01-16,25.16046,26.140673,36.04855
6,2000-01-16,35.27829,26.199018,36.022953
8,2000-01-16,45.447762,25.729147,35.99292


### Processing all datafiles and creating a final df to store the data for 6S10W


In [9]:
folder_path = "./../../ukmet/"
# loop through folders in this folder, each folder contains some netcdf files open each of those
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith(".nc"):
            file_path = os.path.join(root, file)
            ds = xr.open_dataset(file_path)
            mask = (abs(ds['lat'] + 6) < 0.5) & (abs((ds['lon'] % 360) - 350) < 0.5)
            nearby_profiles = ds.where(mask, drop=True)

            # convert to pandas dataframe
            df = nearby_profiles.to_dataframe().reset_index()

            # keep only date in 'time' column
            df['time'] = pd.to_datetime(df['time'].dt.strftime('%Y-%m-%d'))
            # drop unneeded columns
            lat = -6.0
            lon = 350.0
            df = df.drop(columns=['lat', 'lon'])

            # drop NaN values
            df = df.dropna()

            # we only need columns 'time', 'depth', 'temperature', 'salinity'
            # remove rows with bnds col value 1 first
            df = df[df['bnds'] == 0]
            df = df[['time', 'depth', 'temperature', 'salinity']]

            # convert temperature to celsius
            df['temperature'] = df['temperature'] - 273.15

            # ignore all depths above 200m
            # df = df[df['depth'] < 200]

            # append to final df
            df_final = pd.concat([df_final, df], ignore_index=True)
            

  df_final = pd.concat([df_final, df], ignore_index=True)


In [10]:
df_final.to_csv('ukmet_6S_10W.csv', index=False)

### Final Density Calculation from refined csv

In [16]:
df = pd.read_csv('ukmet_6S_10W.csv')
df['time'] = pd.to_datetime(df['time'])

In [17]:
lat = -6.0
lon = 350.0

# p = gsw.p_from_z(-df['depth'].values, lat=0)
# SA = gsw.SA_from_SP(df['S_41'].values, p, lon=0, lat=0)
# CT = gsw.CT_from_t(SA, df['T_21'].values, p)
# sigma_theta = gsw.sigma0(SA, CT)

p = gsw.p_from_z(-df['depth'].values, lat=lat)
df['SA'] = gsw.SA_from_SP(df['salinity'].values, p, lon=lon, lat=lat).round(8)
df['CT'] = gsw.CT_from_t(df['SA'].values, df['temperature'].values, p).round(8)
df['STH_pred'] = gsw.sigma0(df['SA'].values, df['CT'].values).round(8)
df['rho_pred'] = gsw.rho(df['SA'].values, df['CT'].values, p).round(8) - 1000.0

In [18]:
df.to_csv(f'ukmet_results.csv', index=False)