North East Pacific Significant Wave Height Analysis

'''
MONTEREY BAY BUOY DATA

Small coastal towns this winter in California like Santa Cruz were heavily affected by large swells 
combined with record-breaking precipitaion. 2/3 of the piers in Santa Cruz county were destroyed this last 
winter. Because of the revenue coastal areas generate, it's important for city planners and 
government agencies to understand the implications that may come from extreme ocean weather events 
and to predict the frequency of these events.

The question is: Could we have anticipated this?

Climate change predicts an increase in the frequency of extreme events. Using historical buoy data nearshore 
in Central California are we...
1. Observing an increase in wave height?
2. Observing an increase in the number of days that swell exceeds 15ft (high surf advisory)?
3. What direction are these damaging swells coming from, what other properties are associated with them? Wind?
4. What's going on with SST at this location? Is it matching with the global increase average air temperature?
5. Predicting frequency of swell events above 15ft into future?
6. Predicting water temperature into the future?
7. Cartopy map of the buoy location using lat and lon. 

Are we seeing a change in sea surface temperature that matches the increase in air temperature?

7.cartopy map of the buoy location using the lat,lons. !


'''

In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import cartopy
import cartopy.crs as ccrs 
import cartopy.feature as cfeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
from shapely.geometry import Point, LineString
import geopandas as gpd
from matplotlib.ticker import AutoMinorLocator

In [4]:
url_template = 'https://www.ndbc.noaa.gov/view_text_file.php?filename=46042h{year}.txt.gz&dir=data/historical/stdmet/'
    
# create an empty list to store the data frames
dfs = []

# loop through the years
# loop through the stations 

# 46092 = Monterey Bay # 2005 - 2023

# Station 46042 (LLNR 297) - MONTEREY - 27NM WNW of Monterey, CA
# Owned and maintained by National Data Buoy Center
## time_period = range(1987, 2022)
# 36.785 N 122.396 W (36°47'5" N 122°23'46" W)
#99.0 = error for wvht and 999.0 = error for wtmp
# filter out rows with those


for year in range(1988, 2023):

    # construct the file path
    url = url_template.format(year=year)
    
    # parse the data and create a data frame
    df = pd.read_csv(url, delim_whitespace=True, skiprows=1, header=None, names=['year', 'month', 'day', 'hour', 'minute', 'wdir', 'wspd', 'gst', 'wvht', 'dpd', 'apd', 'mwd', 'pres', 'atmp', 'wtmp', 'dewp', 'vis', 'ptdy', 'tide'])
    
    # add the data frame to the list
    dfs.append(df)

# concatenate all the data frames into a single data frame
df = pd.concat(dfs, ignore_index=True)

# print the resulting data frame
df

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,year,month,day,hour,minute,wdir,wspd,gst,wvht,dpd,apd,mwd,pres,atmp,wtmp,dewp,vis,ptdy,tide
0,88,1,1,1,10,3.0,4.0,1.7,10.0,7.4,999,1025.3,9.5,11.8,999.0,99.0,,,
1,88,1,1,2,360,3.0,4.0,1.5,9.1,7.2,999,1025.5,9.4,11.8,999.0,99.0,,,
2,88,1,1,3,50,6.0,7.0,1.6,10.0,7.0,999,1025.5,9.5,11.8,999.0,99.0,,,
3,88,1,1,4,50,7.0,8.0,1.5,16.7,6.8,999,1025.3,9.3,11.7,999.0,99.0,,,
4,88,1,1,5,60,8.0,10.0,1.6,16.7,5.9,999,1025.1,9.3,11.7,999.0,99.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
430195,2022,12,31,23,10,297,10.4,15.2,99.0,99.0,99.0,999,1001.6,12.2,999.0,999.0,99.0,99.0,
430196,2022,12,31,23,20,296,11.9,15.5,99.0,99.0,99.0,999,1001.5,12.2,999.0,999.0,99.0,99.0,
430197,2022,12,31,23,30,302,13.0,17.9,99.0,99.0,99.0,999,1001.3,12.3,999.0,999.0,99.0,99.0,
430198,2022,12,31,23,40,307,13.5,18.4,3.86,12.9,7.17,272,1001.2,12.3,999.0,999.0,99.0,99.0,


In [11]:
df_filtered = df[df['wvht'] <= 99.0]
df_filtered.to_csv('your_filtered_file.csv', index=False)

TypeError: '<=' not supported between instances of 'str' and 'float'

In [8]:
df['wvht'].isnull().sum()

0

In [None]:
9/5 * np.average(df['wtmp']) + 32

In [None]:
avg_wave_height_by_year = df.groupby('year')['wvht'].mean()
avg_wave_height_by_month = df.groupby('month')['wvht'].mean()
print(avg_wave_height_by_year)
print(avg_wave_height_by_month)

In [None]:
plt.plot(range(1,13,1),avg_wave_height_by_month)

In [None]:
avg_water_temp_by_month = df.groupby('month')['wtmp'].mean()

In [None]:
plt.plot(range(1,13,1), avg_water_temp_by_month)

In [None]:
import pandas as pd

# define the station ID
station_id = 46225

# create an empty list to store the data frames
dfs = []

# loop through the years
for year in range(1987, 2024):

    # construct the file path
    url = 'https://www.ndbc.noaa.gov/view_text_file.php?filename={station_id}h{year}.txt.gz&dir=data/historical/stdmet/'

    # download the file and extract the data
    with pd.read_csv(url, delim_whitespace=True, skiprows=1, header=None, names=['year', 'month', 'day', 'hour', 'minute', 'wdir', 'wspd', 'gst', 'wvht', 'dpd', 'apd', 'mwd', 'pres', 'atmp', 'wtmp', 'dewp', 'vis', 'ptdy', 'tide'], index_col=['year', 'month', 'day', 'hour']) as data:

        # filter out rows with 99.0 for wvht or 999.0 for wtmp
        data = data.loc[(data['wvht'] != 99.0) & (data['wtmp'] != 999.0)]

        # add the station ID as a new column
        data['station_id'] = station_id

        # add the data frame to the list
        dfs.append(data)

# concatenate all the data frames into a single data frame
df = pd.concat(dfs, ignore_index=False)

# calculate the average wvht for each year and station ID
avg_by_year_station = df.groupby(['station_id', pd.Grouper(freq='Y')])['wvht'].mean()

# print the result
print(avg_by_year_station)


In [None]:
import pandas as pd
#https://www.ndbc.noaa.gov/view_text_file.php?filename=46042h2005.txt.gz&dir=data/historical/stdmet/
station = '46042'
url_prefix = 'https://www.ndbc.noaa.gov/view_text_file.php?filename='+station+'h'

url_suffix = '.txt.gz&dir=data/historical/stdmet/'

# List to store data from all files
all_data = []

# Loop through years from 1987 to 2023
for year in range(1987, 2024):
    # Construct URL for this year's file
    url = url_prefix + str(year) + url_suffix
    
    # Read data from URL into a Pandas DataFrame
    data = pd.read_csv(url, delim_whitespace=True, header=None, names=['year', 'month', 'day', 'hour', 'minute', 'wind_dir', 'wind_spd', 'gust', 'wave_height', 'dominant_wave_period', 'average_wave_period', 'mean_wave_dir', 'atmos_pressure', 'air_temp', 'water_temp', 'dew_point', 'rel_humidity', 'station_visibility', 'pressure_tendency'])
    
    # Add year column to DataFrame
    data['year'] = year
    
    # Add data to list
    all_data.append(data)

# Concatenate all DataFrames into one
all_data = pd.concat(all_data)


In [None]:
url_template = 'https://www.ndbc.noaa.gov/view_text_file.php?filename=46042h{year}.txt.gz&dir=data/historical/stdmet/'
    
# create an empty list to store the data frames
dfs = []

# loop through the years
# loop through the stations 

# 46092 = Monterey Bay # 2005 - 2022

# Station 46042 (LLNR 297) - MONTEREY - 27NM WNW of Monterey, CA
# Owned and maintained by National Data Buoy Center
## time_period = range(1987, 2022)
# 36.785 N 122.396 W (36°47'5" N 122°23'46" W)
#99.0 = error for wvht and 999.0 = error for wtmp
# filter out rows with those


for year in range(1987, 2023):

    # construct the file path
    url = url_template.format(year=year)
    
    # parse the data and create a data frame
    df = pd.read_csv(url, delim_whitespace=True, skiprows=1, header=None, names=['year', 'month', 'day', 'hour', 'minute', 'wdir', 'wspd', 'gst', 'wvht', 'dpd', 'apd', 'mwd', 'pres', 'atmp', 'wtmp', 'dewp', 'vis', 'ptdy', 'tide'])
    
    df['year'] = year
    # Filter out rows with wvht = 99.0 or wtmp = 999.0
    data = data.loc[~(data['wave_height'] == 99.0) & (data['water_temp'] == 999.0)]
    
    # add the data frame to the list
    dfs.append(df)

# concatenate all the data frames into a single data frame
df = pd.concat(dfs, ignore_index=True)

# print the resulting data frame
df

Intro

Method

Code

Conclusion

In [None]:
years = np.arange(2004,2020)
print(years)
url = 'https://www.ndbc.noaa.gov/view_text_file.php?filename=46225h2007.txt.gz&dir=data/historical/stdmet/'
read = pd.read_csv(url,delim_whitespace=True)
read