# Getting structured tides from hydrographic data

<hr>

## Requirement

Starting with hydrographic html for a given location <sub>see [OneNote 08.2024 tide times]</sub>, build structured tide data for a set of dates

## Entities

<hr>

### TideForDay

#### Definition
Gathers data for a tide on a given calendar day

#### Key 
t_date

#### Cardinalities 
1-M TideMark

#### Data attributes

 
| Name | aka | data type | Definition |
|:-------------|----|-------|:-------------|
| t_date | Tide Date | int | UID of the tide for a given day. Strictly an alternate key , but no need for surrogates here. Not \"date\" due to keyword conflict. This is the date, but does not include the month and year. That is added during the output to csv stage |
| tidal_range | Tidal Range | float | The distance between the minimum low water mark, and the maximum high water mark, for that day |


<hr>

In [115]:
# pip install regex
import re
from pprint import pprint as pp
from dataclasses import dataclass
from datetime import datetime

@dataclass
class TideMark:
    t_date: int
    t_seq: int
    t_type: str # High or Low
    t_height: float
    t_time: datetime

@dataclass
class TideForDay:
    t_date: int
    tidal_range: float # distance between min and max tide_marks


tide_marks = []
tides = []

with open("./hydro_2024.09.09.txt","r") as infile:
    lines = infile.readlines()



In [116]:
# Data example:
# Sun 04 AugNew moon on this day
# High Water of 3.7 metres, at 19:59.

# Remove any blank lines, then I only want lines that start with the values in ok_search_terms
# Where a value is Sun, Mon, etc, subsequent records refer to the low or high-water times for
# that day... until you hit a new day. Etc.
ok_search_terms = ("Low","High","Fri","Sat","Sun","Mon","Tue","Wed","Thur")
#remove non-printing chars - typically tab etc
cleaned_lines = [line.strip() for line in lines]
high_lows = [line for line in cleaned_lines if line.startswith(ok_search_terms)]
print(high_lows)

['Mon 09 Sep', 'Low Water of 0.7 metres, at 04:14.', 'High Water of 3.3 metres, at 10:34.', 'Low Water of 0.8 metres, at 16:25.', 'High Water of 3.2 metres, at 22:48.\t-', 'Tues 10 Sep', 'Low Water of 0.9 metres, at 04:40.', 'High Water of 3.1 metres, at 11:04.', 'Low Water of 1.0 metres, at 16:56.', 'High Water of 3.0 metres, at 23:20.\t-', 'Weds 11 SepFirst quarter on this day', 'Low Water of 1.1 metres, at 05:12.', 'High Water of 2.9 metres, at 11:38.', 'Low Water of 1.2 metres, at 17:34.\t-\t-', 'Thurs 12 Sep\t-', 'High Water of 2.7 metres, at 00:01.', 'Low Water of 1.3 metres, at 05:52.', 'High Water of 2.8 metres, at 12:25.', 'Low Water of 1.4 metres, at 18:28.', 'Fri 13 Sep\t-', 'High Water of 2.6 metres, at 01:13.', 'Low Water of 1.5 metres, at 07:03.', 'High Water of 2.8 metres, at 14:29.', 'Low Water of 1.5 metres, at 20:53.', 'Sat 14 Sep\t-', 'High Water of 2.7 metres, at 03:17.', 'Low Water of 1.4 metres, at 09:27.', 'High Water of 3.0 metres, at 15:57.', 'Low Water of 1.1 

In [117]:
for line in high_lows:
    words = line.split()

    # If the sentence starts with Sun, Mon, etc, then subsequent records are
    # high/low water times. Keep words 1 (base 0)
    # (date, but not day). Month and year will be entered downstream
    if words[0] in ("Sun","Mon","Tues","Weds","Thurs","Fri","Sat"):
        curr_date = int(words[1])
        curr_seq = 0
        # new record required
        
        continue
    # After previous block, 7 or more words means a High or Low water sentence. 
    # Words 0 (base zero), 3 and 6 are respectively High/Low, tide-height, and 
    # time. Other words and characters are discarded
    if len(words) >= 7:
       
        curr_type = words[0]
        curr_time = words[6].rstrip('.')
        curr_height = words[3]
        # dataclass populates here:
        tide_mark = TideMark(curr_date, curr_seq, curr_type, curr_height, curr_time)
        tide_marks.append(tide_mark)
        #print(tide_mark)
        curr_seq += 1
        continue     


pp(tide_marks)


[TideMark(t_date=9, t_seq=0, t_type='Low', t_height='0.7', t_time='04:14'),
 TideMark(t_date=9, t_seq=1, t_type='High', t_height='3.3', t_time='10:34'),
 TideMark(t_date=9, t_seq=2, t_type='Low', t_height='0.8', t_time='16:25'),
 TideMark(t_date=9, t_seq=3, t_type='High', t_height='3.2', t_time='22:48'),
 TideMark(t_date=10, t_seq=0, t_type='Low', t_height='0.9', t_time='04:40'),
 TideMark(t_date=10, t_seq=1, t_type='High', t_height='3.1', t_time='11:04'),
 TideMark(t_date=10, t_seq=2, t_type='Low', t_height='1.0', t_time='16:56'),
 TideMark(t_date=10, t_seq=3, t_type='High', t_height='3.0', t_time='23:20'),
 TideMark(t_date=11, t_seq=0, t_type='Low', t_height='1.1', t_time='05:12'),
 TideMark(t_date=11, t_seq=1, t_type='High', t_height='2.9', t_time='11:38'),
 TideMark(t_date=11, t_seq=2, t_type='Low', t_height='1.2', t_time='17:34'),
 TideMark(t_date=12, t_seq=0, t_type='High', t_height='2.7', t_time='00:01'),
 TideMark(t_date=12, t_seq=1, t_type='Low', t_height='1.3', t_time='05:52'

In [118]:
# Initialize a dictionary to store the highest and lowest heights for each date
heights_by_date = {}

# Get a sorted list of the unique dates for this set of tides. 
# Example: [4, 5, 6, 7]
unique_dates = list(set(int(mark.t_date) for mark in tide_marks))
unique_dates.sort()
pp(unique_dates)
water_marks = {}
for d in unique_dates:
    water_marks[d] = {'Low':100,'High':-100}

pp(tide_marks)

tide_heights = [(int(mark.t_date), mark.t_seq, mark.t_type, float(mark.t_height), mark.t_time) for mark in tide_marks]

pp(tide_heights)

for test_tide in tide_heights:
    test_date = test_tide[0]
    test_type = test_tide[2]
    new_height = test_tide[3]
    
    if  test_type == 'Low':
        if  new_height < water_marks[test_date][test_type]:
            water_marks[test_date][test_type] = new_height
    else: # High
        if  new_height > water_marks[test_date][test_type]:
            water_marks[test_date][test_type] = new_height

pp(water_marks)
    

[9, 10, 11, 12, 13, 14, 15]
[TideMark(t_date=9, t_seq=0, t_type='Low', t_height='0.7', t_time='04:14'),
 TideMark(t_date=9, t_seq=1, t_type='High', t_height='3.3', t_time='10:34'),
 TideMark(t_date=9, t_seq=2, t_type='Low', t_height='0.8', t_time='16:25'),
 TideMark(t_date=9, t_seq=3, t_type='High', t_height='3.2', t_time='22:48'),
 TideMark(t_date=10, t_seq=0, t_type='Low', t_height='0.9', t_time='04:40'),
 TideMark(t_date=10, t_seq=1, t_type='High', t_height='3.1', t_time='11:04'),
 TideMark(t_date=10, t_seq=2, t_type='Low', t_height='1.0', t_time='16:56'),
 TideMark(t_date=10, t_seq=3, t_type='High', t_height='3.0', t_time='23:20'),
 TideMark(t_date=11, t_seq=0, t_type='Low', t_height='1.1', t_time='05:12'),
 TideMark(t_date=11, t_seq=1, t_type='High', t_height='2.9', t_time='11:38'),
 TideMark(t_date=11, t_seq=2, t_type='Low', t_height='1.2', t_time='17:34'),
 TideMark(t_date=12, t_seq=0, t_type='High', t_height='2.7', t_time='00:01'),
 TideMark(t_date=12, t_seq=1, t_type='Low', t_

In [119]:
# get the tidal range
for i in water_marks:
    t_high = water_marks[i]['High']
    t_low = water_marks[i]['Low']
    water_marks[i]['TidalRange'] = round(t_high - t_low,2)
pp(water_marks)

{9: {'High': 3.3, 'Low': 0.7, 'TidalRange': 2.6},
 10: {'High': 3.1, 'Low': 0.9, 'TidalRange': 2.2},
 11: {'High': 2.9, 'Low': 1.1, 'TidalRange': 1.8},
 12: {'High': 2.8, 'Low': 1.3, 'TidalRange': 1.5},
 13: {'High': 2.8, 'Low': 1.5, 'TidalRange': 1.3},
 14: {'High': 3.0, 'Low': 1.1, 'TidalRange': 1.9},
 15: {'High': 3.4, 'Low': 0.8, 'TidalRange': 2.6}}


In [120]:
# populate the tide_day class from the dictionary
tidal_ranges_by_date = [TideForDay(t_date=key, tidal_range=value['TidalRange']) for key, value in water_marks.items()]
pp(tidal_ranges_by_date)
pp(tide_marks)


[TideForDay(t_date=9, tidal_range=2.6),
 TideForDay(t_date=10, tidal_range=2.2),
 TideForDay(t_date=11, tidal_range=1.8),
 TideForDay(t_date=12, tidal_range=1.5),
 TideForDay(t_date=13, tidal_range=1.3),
 TideForDay(t_date=14, tidal_range=1.9),
 TideForDay(t_date=15, tidal_range=2.6)]
[TideMark(t_date=9, t_seq=0, t_type='Low', t_height='0.7', t_time='04:14'),
 TideMark(t_date=9, t_seq=1, t_type='High', t_height='3.3', t_time='10:34'),
 TideMark(t_date=9, t_seq=2, t_type='Low', t_height='0.8', t_time='16:25'),
 TideMark(t_date=9, t_seq=3, t_type='High', t_height='3.2', t_time='22:48'),
 TideMark(t_date=10, t_seq=0, t_type='Low', t_height='0.9', t_time='04:40'),
 TideMark(t_date=10, t_seq=1, t_type='High', t_height='3.1', t_time='11:04'),
 TideMark(t_date=10, t_seq=2, t_type='Low', t_height='1.0', t_time='16:56'),
 TideMark(t_date=10, t_seq=3, t_type='High', t_height='3.0', t_time='23:20'),
 TideMark(t_date=11, t_seq=0, t_type='Low', t_height='1.1', t_time='05:12'),
 TideMark(t_date=11, 

In [121]:
# Given a date, add the month and year to which this applies
def add_month_year(t_date):
    month = "09"
    year = "2024"
    return f"{t_date}/{month}/{year}"
    
    

In [122]:
# Now that the objects are populated, we can loop over these to
# create the csv for importing into a spreadsheet
# I want 2 tides per day, the "usable" tides, when I have a chance of
# getting down to the sea. There are 3 or 4 water marks in a calendar
# day. Take the first 2 that occur after say 0600.
# Drive the loop from the parent class

tides_as_csv = ""
x = ""
for i in tidal_ranges_by_date:
    
    #print(i)
    x += f"{add_month_year(i.t_date)},{i.tidal_range}"
    m = [mark for mark in tide_marks if mark.t_date == i.t_date and mark.t_time > "06:00"]
    sorted_marks = sorted(m, key=lambda x: x.t_seq, reverse=False)
    tide_count = 0
    for sorted_mark in sorted_marks:
        if tide_count < 2:
            x += f",{sorted_mark.t_type},{sorted_mark.t_time},{sorted_mark.t_height}"
        tide_count += 1
    x += "\n"
    
    # print("m------------")
    # print(m)
    # print("------------")
    # print(sorted_marks)
    
pp(x)    
# 02,High,2.5,05:53,3.1,Low,11:31,0.9,High,18:18,3.4,
# 03,Low,2.8,00:15,0.8,High,06:56,3.3,Low,12:32,0.8,High,19:13,3.6,
# 04,Low,3.0,01:08,0.7,High,07:46,3.4,Low,13:21,0.7,High,19:59,3.7,
# 05,Low,3.2,01:53,0.6,High,08:28,3.5,Low,14:03,0.6,High,20:37,3.8,
# 06,Low,3.2,02:33,0.6,High,09:01,3.5,Low,14:39,0.6,High,21:08,3.8,
# 07,Low,3.1,03:06,0.6,High,09:28,3.5,Low,15:10,0.6,High,21:33,3.7,
# 08,Low,2.9,03:33,0.6,High,09:49,3.5,Low,15:36,0.6,

('9/09/2024,2.6,High,10:34,3.3,Low,16:25,0.8\n'
 '10/09/2024,2.2,High,11:04,3.1,Low,16:56,1.0\n'
 '11/09/2024,1.8,High,11:38,2.9,Low,17:34,1.2\n'
 '12/09/2024,1.5,High,12:25,2.8,Low,18:28,1.4\n'
 '13/09/2024,1.3,Low,07:03,1.5,High,14:29,2.8\n'
 '14/09/2024,1.9,Low,09:27,1.4,High,15:57,3.0\n'
 '15/09/2024,2.6,Low,10:39,1.1,High,17:17,3.4\n')
