### nbdc data

In [94]:
import requests
import pandas as pd
import numpy as np

### get all relevant buoys

In [95]:
text_data = requests.get('https://www.ndbc.noaa.gov/data/stations/station_table.txt').text
rows = text_data.strip().split('\n')

# extracting headers
headers = map(lambda x: x.strip(), rows[0].strip().split('|'))

# remaining data. (skipping with only `#`)
data = [row.strip().split('|') for row in rows[2:]]

df_buoys = pd.DataFrame(data, columns = headers)

In [111]:
df_buoys[1:10]

Unnamed: 0,# STATION_ID,OWNER,TTYPE,HULL,NAME,PAYLOAD,LOCATION,TIMEZONE,FORECAST,NOTE
1,13001,PR,Atlas Buoy,PM-595,NE Extension,,"12.000 N 23.000 W (12&#176;0'0"" N 23&#176;0'0"" W)",,,
2,13002,PR,Atlas Buoy,,NE Extension,,"21.000 N 23.000 W (21&#176;0'0"" N 23&#176;0'0"" W)",,,
3,13008,PR,Atlas Buoy,PM-531,Reggae,,"15.000 N 38.000 W (15&#176;0'0"" N 38&#176;0'0"" W)",,,
4,13009,PR,Atlas Buoy,PM-533,Lambada,,"8.000 N 38.000 W (8&#176;0'0"" N 38&#176;0'0"" W)",,,
5,13010,PR,Atlas Buoy,PM-590,Soul,,"0.000 N 0.000 E (0&#176;0'0"" N 0&#176;0'0"" W)",,,
6,14040,RM,Atlas Buoy,,,,"8.000 S 67.000 E (8&#176;0'0"" S 67&#176;0'0"" E)",,,
7,14041,RM,Atlas Buoy,,,,"8.000 S 55.000 E (8&#176;0'0"" S 55&#176;0'0"" E)",,,
8,14043,RM,Atlas Buoy,,,,"12.000 S 67.000 E (12&#176;0'0"" S 67&#176;0'0"" E)",,,
9,14047,RM,Atlas Buoy,,,,"4.000 S 57.000 E (4&#176;0'0"" S 57&#176;0'0"" E)",,,


### Get buoy data

In [144]:
def getBuoyData(buoyid): 
    # Split the text into rows using newline characters
    # trying to get new data
    rows = requests.get(f'https://www.ndbc.noaa.gov/data/realtime2/{buoyid}.spec')
    rows.raise_for_status()
    
    # handling data if request is successful
    rows = rows.text.strip().split('\n')
    headers = rows[0].strip().split()
    data = [row.split() for row in rows[1:]]

    df_buoy_data = pd.DataFrame(data, columns=headers)
    return df_buoy_data

In [209]:
getBuoyData(46001)

Unnamed: 0,#YY,MM,DD,hh,mm,WVHT,SwH,SwP,WWH,WWP,SwD,WWD,STEEPNESS,APD,MWD
0,#yr,mo,dy,hr,mn,m,m,sec,m,sec,-,degT,-,sec,degT
1,2023,09,05,20,10,1.6,1.5,8.3,0.3,3.7,WSW,NW,AVERAGE,6.9,247
2,2023,09,05,19,40,1.6,1.6,16.0,0.3,3.8,SSE,WNW,SWELL,7.0,164
3,2023,09,05,19,10,1.6,1.6,16.0,0.3,3.3,SSE,NW,SWELL,7.3,155
4,2023,09,05,18,40,1.5,1.5,9.1,0.3,3.7,WSW,NW,AVERAGE,7.2,254
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2195,2023,07,22,02,10,0.6,0.5,12.1,0.4,3.3,SSE,NNW,,4.3,167
2196,2023,07,22,01,40,0.6,0.4,11.4,0.5,3.6,SSE,NNW,,4.1,165
2197,2023,07,22,01,10,0.6,0.4,10.8,0.5,3.6,SSE,NNW,,4.1,164
2198,2023,07,22,00,40,0.6,0.4,11.4,0.5,3.6,SE,NNW,,4.3,144


### Calculate that buoy's angle to Los Angeles

Swell direction is measured with north at 0 degrees, east at 90, etc. 

In [4]:
import math
import re

In [21]:
LA_lat = 33.8847
LA_long = -118.4109

def calculate_angle(coords):
    buoylat, buoylong = parse_coordinates(coords) if isinstance(coords, str) else coords
    LA_lat = 33.8847
    LA_long = -118.4109
    x = buoylong - LA_long
    y = buoylat - LA_lat
    
    if x >= 0:
        return 90 - math.atan(y / x) * 180 / math.pi 
    elif x < 0: 
        return 270 - math.atan(y / x) * 180 / math.pi
    
    #return math.atan(y / x) * 180 / math.pi
    
    
def parse_coordinates(input_string):
    pattern = r'(\d+\.\d+)\s*([NS])\s*(\d+\.\d+)\s*([EW])'
    match = re.search(pattern, input_string)
    if match:
        latitude = float(match.group(1))
        if match.group(2) == 'S':
            latitude = -latitude
        
        longitude = float(match.group(3))
        if match.group(4) == 'W':
            longitude = -longitude
        
        return latitude, longitude
    else:
        return None
    

In [22]:
df_buoys['angletoLA'] = df_buoys.apply(lambda row: calculate_angle(row.LOCATION), axis = 1)

### Get all valid buoys -- request all the data for those buoys!!

Buoys in between angle 160 and less than 360. longitude less than 0. 

In [211]:
def isValidBuoy(deg, long): 
    if deg >= 160 and deg <= 360 and long < 0: 
        return True
    return False


def builddata(df_buoys):
    df_main = pd.DataFrame(columns = df_buoy_data.columns.tolist())
    #for i in range(len(df_buoys)): 
    for i in range(len(df_buoys)):
        lat, long = parse_coordinates(df_buoys.LOCATION.iloc[i])
        angle = calculate_angle((lat, long))
        buoy_id = str(df_buoys.iloc[i, 0])
        if isValidBuoy(angle, long): 
            try: 
                df_main = pd.concat([df_main, getBuoyData(buoy_id)])
                # df_main.append(getBuoyData(buoy_id))
                print(f"got valid data for {buoy_id}")
            except: 
                pass
                # print(f"couldn't get data for buoy {buoy_id}")


    return df_main

In [213]:
df_main

Unnamed: 0,#YY,MM,DD,hh,mm,WVHT,SwH,SwP,WWH,WWP,SwD,WWD,STEEPNESS,APD,MWD
0,#yr,mo,dy,hr,mn,m,m,sec,m,sec,-,degT,-,sec,degT
1,2023,09,05,20,10,1.6,1.5,8.3,0.3,3.7,WSW,NW,AVERAGE,6.9,247
2,2023,09,05,19,40,1.6,1.6,16.0,0.3,3.8,SSE,WNW,SWELL,7.0,164
3,2023,09,05,19,10,1.6,1.6,16.0,0.3,3.3,SSE,NW,SWELL,7.3,155
4,2023,09,05,18,40,1.5,1.5,9.1,0.3,3.7,WSW,NW,AVERAGE,7.2,254
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2196,2023,07,22,02,28,0.8,0.4,13.3,0.7,8.3,S,SSE,,5.2,183
2197,2023,07,22,01,58,0.8,0.4,13.3,0.6,6.7,S,SSE,,5.2,183
2198,2023,07,22,01,28,0.7,0.4,12.5,0.6,6.7,S,S,,5.6,169
2199,2023,07,22,00,58,0.8,0.4,13.3,0.7,5.9,S,S,AVERAGE,5.3,186


In [182]:
df_buoys.dtypes

# STATION_ID    object
OWNER           object
TTYPE           object
HULL            object
NAME            object
PAYLOAD         object
LOCATION        object
TIMEZONE        object
FORECAST        object
NOTE            object
dtype: object

### Modeling surf in Los Angeles

- need to update database every night with the latest information... can delete the data from ~5 days ago. 
- regression from previous days. 
- classify how well old buoys predicted closer buoys