In [90]:
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
# Import modules

import numpy as np
import xarray as xr
import pandas as pd
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
import datetime
import gsw
import os
import glob
import sys
import re
from setdir import *

# Local import
from SIO_wrap import dir_tree, fnames
from SIO_wrap.lowess import LatLonLocalWess
from SIO_wrap import jlab_python as jlab



In [4]:
# Path for the output data
data_dir = dir_tree.dir_out

#--------------
# Time formats
tstamp_strftime = '%Y%m%d'  # Filename timestamp
timcol_strftime = '%Y-%m-%d %H:%M:%S'  # Convert text to datetime format 


# Extract a list with the names of existing raw data files.
existing_files = glob.glob(os.path.join(data_dir, fnames.fname_rawdata + '*'))

# ~ ~ print update ~ ~ 
if len(existing_files) > 0:
    print("Existing raw data files: \n%s\n" % existing_files)
else:
    sys.exit("No previous raw data files.\n")

# ~ ~ filenaming convention ~ ~
# If there are multiple files with raw data (i.e. non-updated datasets), select 
# the latest one updated.
# The file names are distinguished by the timestamp appended to the filename 
# and has <tstamp_strftime> format (see 'user edits' section).
# The data are cropped such that the last day is fully sampled (spans 0h-23h).
# The timestamp in the filename is the latest downloaded fully sampled day.

# Extract the timestamp part of the filename(s) in a list
tstamp = [date for file in existing_files 
            for date in re.findall("(\d{8})", file)]

# Convert to datetime and pick the most recent timestamp
tstamp_date = pd.to_datetime(tstamp, format=tstamp_strftime)
fname_timestamp = tstamp[tstamp_date.argmax()]

# Load the raw file with the latest timestamp
ds_fname = f"{fnames.fname_rawdata}{fname_timestamp}.nc"
ds_fpath = os.path.join(data_dir, ds_fname)

print("Opening file: %s\n" % ds_fpath)
ds_raw = xr.open_dataset(ds_fpath)

# Total number of points 
total_points = int(ds_raw.time.size)

Existing raw data files: 
['/Users/eddifying/Python/drifters/01-data/02-intermediate/drifter_data_raw_20220107.nc']

Opening file: /Users/eddifying/Python/drifters/01-data/02-intermediate/drifter_data_raw_20220107.nc



In [85]:
# Extract all the PIDs
ds_raw
PID = list(set(ds_raw.Platform_ID.values.astype('int')))

df = pd.DataFrame(PID, columns = ['Platform_ID'])

In [84]:
#original_stdout = sys.stdout

#with open('PID_list.txt', 'w') as f:
#    sys.stdout = f
#    print(str(PID)[1:-1])
#    sys.stdout = original_stdout

In [86]:
df

Unnamed: 0,PID
0,300234066519040
1,300234068243970
2,300234068243460
3,300234068342280
4,300234066519050
...,...
195,300234068343280
196,300234068245490
197,300234068244980
198,300234068244470


In [91]:
PID_list = df.to_csv(cat_proc_path('PID_list.txt'))
