# TEST getLTSPfileName


to see if the function that uses pandas, truncates the name

In [2]:
import pandas as pd
import xarray as xr

In [3]:
## this is the getName function
def getLTSPfileName(site, product="gridded", QC=True, param="TEMP", webURL="opendap"):
    '''
    get the url of the LTSP files
    
    require: pandas
    site: the site_code
    product: product type )aggregated, hourly or gridded)
    QC: for the hourly, include only good data (default True)
    param: for aggregated product, parameter code as IMOS standard (e.g. TEMP)
    webURL: web source of the file (S3: Amazon AWS (fastest), wget (AODN THREDDS, to download),
            opendap (AODN THREDDS to open remotely)
    E. Klein. eklein at ocean-analytics dot com dot au
    '''
    
    if webURL == "opendap": 
        WEBROOT = 'http://thredds.aodn.org.au/thredds/dodsC/'
    elif webURL == "wget":
        WEBROOT = 'http://thredds.aodn.org.au/thredds/fileServer/'
    elif webURL == "S3":
        WEBROOT = 'https://s3-ap-southeast-2.amazon.com/imos-data/'
    else:
        print("ERROR: wrong webURL: it must be one of S3, opendap or wget")

  
    urlGeoServer = "http://geoserver-123.aodn.org.au/geoserver/ows?typeName=moorings_all_map&SERVICE=WFS&REQUEST=GetFeature&VERSION=1.0.0&outputFormat=csv&CQL_FILTER=(realtime='FALSE')and(site_code='" + site + "')"
    df = pd.read_csv(urlGeoServer)
    url = df['url']
    
    #fileName = df$url[grepl(paste0(product,"-timeseries"), df$url)]
    fileName = "TEST"
    
    
    if product == "gridded": 
        fileName = url[url.str.contains("gridded")]
    elif product=="velocity-hourly":
        fileName = url[url.str.contains("velocity-hourly")]
    elif product=="hourly":
        if QC:
            fileName = url[url.str.contains("(?<!velocity-)hourly-timeseries(?!-including)", regex=True)]
        else:
            fileName = url[url.str.contains("including-non")]
    elif product=="aggregated":
        fileName = url[url.str.contains(param) & url.str.contains("aggregated")]
    else:
        print("ERROR: invalid combination of arguments or wrong names")

    
    return WEBROOT + fileName.to_string(index=False, header=False).strip()



## Now check is we get the full name


In [10]:
fileName = getLTSPfileName(site='NRSYON', product="hourly", QC=True, param="TEMP", webURL="opendap")
print(fileName)

http://thredds.aodn.org.au/thredds/dodsC/IMOS/ANMN/NRS/NRSYON/hourly_timeseries/IMOS_ANMN-NRS_BOSTUZ_20080623_NRSYON_FV02_hourly-timeseries_END-20200804_C-20210428.nc


we can check what is the last character of the string: 

In [12]:
print(fileName[-1])

c


Now add a pandas option to avoid truncation:

In [7]:
pd.set_option('display.max_colwidth', None) 

and then try it again:

In [8]:
fileName = getLTSPfileName(site='NRSYON', product="hourly", QC=True, param="TEMP", webURL="opendap")
print(fileName)

http://thredds.aodn.org.au/thredds/dodsC/IMOS/ANMN/NRS/NRSYON/hourly_timeseries/IMOS_ANMN-NRS_BOSTUZ_20080623_NRSYON_FV02_hourly-timeseries_END-20200804_C-20210428.nc


## Conclusion: 
Is a pandas characteristic inside the notebooks that could be fixed using the above option