# This notebook pulls in prepared .csv files that contain CORA time series data and runs the data through the NOAA Tidal Analysis Datum Calculator.

In [16]:
import requests
import shutil
import os
import subprocess
import pandas as pd
import glob
from natsort import natsorted
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import detrend

**Create a dataframe of NWLON station ids and coordinates from the CO-OPS API where you want to use CORA data.**

In [17]:
units= 'metric'

station_type = 'waterlevels'

server = 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/'

myurl = (server + '.json?type='+station_type+'&units='+units)

urlResponse = requests.get(myurl)
content=urlResponse.json()

stations = content['stations']
stations_df = pd.DataFrame(stations)

stations_df = stations_df[['id','lat','lng']]
station_id = ['8651370','8656483']

stations_df = stations_df[stations_df['id'].isin(station_id)]
stations_df

Unnamed: 0,id,lat,lng
71,8651370,36.1833,-75.746696
74,8656483,34.7175,-76.671111


**Use glob to loop through the .csv files and run the data through the TADC using subprocess with SDC.py.**

In [21]:
# Specify the path to the folder containing the CSV files and use glob to iterate through the files
dirname = 'path\\to\\csvfiles\\'
csv_path = dirname + '*.csv'
configpath = 'path\\to\\TADC_config_file_directory\\'

i=0
for fname in natsorted(glob.glob(csv_path)): # natsorted makes sure of the natural sorting of files
    
    file_name = os.path.split(fname)[-1]  # Extract the filename from the path
    
    with open(configpath + 'config.cfg', "r") as file:
        lines = file.readlines()
    
        lines[22] = 'fname = ' + dirname + file_name + '\n'
        lines[47] = 'subordinate_lon = ' + str(stations_df.lng.iloc[i]) + '\n'
        lines[51] = 'subordinate_lat = ' + str(stations_df.lat.iloc[i]) + '\n'
    
    with open(configpath + 'config.cfg', "w") as file:
        file.writelines(lines)
        
    proc = subprocess.run(['python', configpath + 'SDC.py'], capture_output=True, text=True)
    i=i+1

In [22]:
# Specify the path to the folder containing the CSV files and use glob to iterate through the files


datastart = "Data Start:  2018-09-01 00:00:00"
folder_path = dirname + '*.out'

fulldata = []

for fname in natsorted(glob.glob(folder_path)): # natsorted makes sure of the natural sorting of files

    file_name = os.path.split(fname)[-1]  # Extract the filename from the path
    out_path = dirname + file_name

    with open(out_path, "r") as file:

        for line_number, line in enumerate(file, start=1):
            if datastart in line:
                fulldata.append(int(file_name[0:7])) # these are the points that included a full 19 years of data
                break  # Stop after finding the first match

# for i in range(len(fulldata)):

    # head_tail= os.path.split(str(fulldata[i]))
    head_tail= os.path.split(fname)
    
    file_name = head_tail[1]  # Extract the filename from the path
    out_path = dirname + file_name
    # file_stem = Path(head_tail[1]).stem
    
    with open(out_path, "r") as file:
        
        lines = len(file.readlines())
        
    with open(out_path, "r") as file:
        
        for line_number, line in enumerate(file, start=1):
            
            if "HWL" in line:                   
                    
                colspecs = [(0,5),(7,15)]
                df_fwf = pd.read_fwf(out_path, colspecs=colspecs,skiprows=line_number - 1, skipfooter=lines - line_number - 12, names=['Datums','meters'])
                                
                grid_fname = dirname + 'Datums_' + file_name + '.csv'
                df_fwf.to_csv(grid_fname,index=False) # write the files to .csv

                break  # Stop after finding the first match