In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import requests
import os
import glob
import re
import sys
from datetime import timedelta
import datetime
from io import StringIO



In [2]:
# Local import 
# > Make sure SIO_wrap dir is on the same path as this script.
%load_ext autoreload
%autoreload 2
from SIO_wrap import dir_tree, fnames

from setdir import *

/Users/eddifying/Python/drifters/ already exists
/Users/eddifying/Python/drifters/02-code/ already exists
/Users/eddifying/Python/drifters/02-code/SIO_wrap/ already exists
/Users/eddifying/Python/drifters/01-data/04-aux/ already exists
/Users/eddifying/Python/drifters/01-data/02-intermediate/ already exists


In [3]:
def load_one_drifter(PID, base_url, username, password, 
                     download_start_date): 
    
    from SIO_wrap import fnames
    
    tstr = f'start_date={download_start_date}'
    pidstr = '&platform_id='

    full_url = base_url+tstr+pidstr+PID

    # Request file
    resp = requests.get(full_url, auth=(username, password))
    print(resp.status_code)

    # Print the response code (200 is good. If you get something else, may be
    # a password problem)

    # To print content: resp.content
    aa = resp.content.decode("utf-8")
    data_df = pd.read_csv(StringIO(aa))
    
    # Clean up column names
    tmp = data_df.columns.str.strip().str.replace(" ","").str.replace("-","_")
    tmp = tmp.str.replace("(","_").str.replace(')','')
    data_df.columns = tmp
    
    # Remove the </br> column
    data_df = data_df.drop(columns='</br>')
    data_df.dtypes
    
    # Convert the time column to a timestamp
    time_colname = 'Timestamp_UTC'
    data_df[time_colname] = pd.to_datetime(data_df[time_colname],
                                       format=timcol_strftime) 

    print("Sorting rows by time ..\n")
    data_df = data_df.sort_values(by=time_colname)
    
    # Prep to convert xarray
    data_df2 = data_df
    data_df2["time"] = data_df2["Timestamp_UTC"].values
    data_df2 = data_df2.set_index("time")
    data_df2 = data_df2.drop(columns='Timestamp_UTC')
    # Convert to xarray
    ds = data_df2.to_xarray()

    return ds


In [4]:
####################-----------   USER EDITS    ------------####################

# Path where data are saved. Can be changed in file SIO_wrap/dir_tree.py
data_dir = dir_tree.dir_out

# Download start date must have format yyyy-mm-dd. Default is set to the
# beginning of the TERIFIC project, i.e 2019-12-04.
download_start_date = "2019-12-04"
print("\nDefault download start date: %s\n" % download_start_date)


# SIO username and password
username = "uk-noc"
password = "noc-drifter"

# URL for data
base_url = "https://gdp.ucsd.edu/cgi-bin/projects/uk-noc/drifter.py?" 

# Options

#full_url = base_url+tstr+pidstr
#download_url = ("https://gdp.ucsd.edu/cgi-bin/projects/uk-noc/"
#                "drifter.py?start_date=") 


# String formatting for time for:
#   - the download url, 
#   - appending to the filename
# 	- the data time column, respectively.
url_strftime = '%Y-%m-%d'
tstamp_strftime = '%Y%m%d'
timcol_strftime = '%Y-%m-%d %H:%M:%S'




Default download start date: 2019-12-04



In [6]:
# Get the list of Platform IDs
PID = pd.read_csv(cat_proc_path('PID_list.txt'), header='infer', index_col=0)


In [7]:
# Load a single drifter on the web
for i in range(len(PID))
    pid1 = (PID["PID"].values)[i].astype('str')

    # Load data into an xarray
    ds = load_one_drifter(pid1, base_url, username, password,
                               download_start_date)
    
    
    # Get values for attributes
    PID1 = pid1.values
    dstr = datetime.datetime.today()
    dstr = dstr.replace(hour=0, minute=0, second=0, 
                        microsecond=0).strftime('%Y-%m-%d')

    maxtime = ds.time.max().values
    maxtimestr = pd.to_datetime(maxtime).strftime('%Y-%m-%dT%H:%M:%S')


    project_name = 'TERIFIC'
    operator_name = "EFW"
    institution_name = 'National Oceanography Centre, UK'


    # Create a dictionary of attributes
    attr_dict = {"Platform_ID": PID1,
                 "End Time": maxtimestr,
                 "Project": project_name,
                 "Originator": operator_name,
                 "Institution": institution_name,
                 "Date created": dstr,
                }


    ds = ds.assign_attrs(attr_dict)

    ds = ds.drop('Platform_ID')
    
    # Save file to raw
    enddate = pd.to_datetime(maxtime).strftime('%Y%m%d')
    fname = 'pid'+str(PID1)+'_'+enddate+'.nc'
    ds.to_netcdf(cat_raw_path(fname))



200
Sorting rows by time ..



  tmp = tmp.str.replace("(","_").str.replace(')','')


AttributeError: 'numpy.str_' object has no attribute 'values'

In [215]:
ds

In [None]:
ds

In [164]:
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
# convert pandas dataset to xarray dataset 
# (easier to save as netcdf file)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
# Define a dictionary and first populate it with the float variables.
# Treat the time variable separately because it has type datetime64[ns].
#
# Dictionary uses an ordinate (n) as a coordinate; decided not to use the time 
# because it does not have unique values although this can be changed.

#enddate = maxtime.to_datetime().strftime('%Y%m%d')




In [165]:
ds

In [122]:



#fname_data = "sio"+str(PID[i].astype("int"))

# Filename and path of dataset
#data_fname = f"{fname_data}_{fname_timestamp}.nc"
#data_fpath = os.path.join(data_dir, data_fname)

# Save dataset to netcdf file
#drift_ii.to_netcdf(data_fpath)



Converting DataFrame to xarray Dataset..



In [154]:
# xarray dataset
ds = xr.Dataset(dd)



In [155]:
ds