In [1]:
import pandas as pd
import warnings
import xarray as xr
import ephem
from datetime import datetime, timedelta
import math
import numpy as np
warnings.filterwarnings('ignore')
%matplotlib inline
import matplotlib.pyplot as plt

from myconfig import *
output_path = output_path_buoy_data

# Function to calculate sunlight for each timestamp
def calculate_sunlight(lat, lon, timestamps):
    # Initialize observer
    observer = ephem.Observer()
    observer.lat = str(lat)
    observer.lon = str(lon)

    # Initialize the Sun object
    sun = ephem.Sun()

    # Store results
    sunlight_data = []

    for timestamp in timestamps:
        observer.date = timestamp

        # Compute the position of the sun for the observer at the given time
        sun.compute(observer)

        # Altitude is in degrees (-90 to 90), where 90 is zenith and 0 is on the horizon
        altitude = sun.alt

        # Normalize sunlight (1 for zenith, 0 for horizon, and negative for below horizon)
        normalized_sunlight = max(0, altitude / ephem.degree)

        sunlight_data.append(normalized_sunlight)

    return pd.DataFrame({'timestamp': timestamps, 'sunlight_fraction': sunlight_data})



In [2]:
fname = 'https://www.ndbc.noaa.gov/data/realtime2/'+station+'.txt'
#more info: https://www.ndbc.noaa.gov/station_page.php?station=41043. 10 min intervals

In [3]:
df_buoy = pd.read_csv(fname, delim_whitespace=True, header=0, parse_dates=True,na_values='MM')
#delete units row
df_buoy = df_buoy.drop(index=0)
#df_buoy.head()

In [4]:
# Rename columns to match expected input for to_datetime
df_buoy.rename(columns={"#YY": "year", "MM": "month", "DD": "day", "hh": "hour", "mm": "minute"}, inplace=True)

# Combine the date and time columns into a single timestamp
df_buoy['Timestamp'] = pd.to_datetime(df_buoy[['year', 'month', 'day', 'hour', 'minute']])

df_clean_buoy = df_buoy.drop(columns=['year', 'month', 'day', 'hour', 'minute', 'DPD', 'MWD', 'PRES', 'DEWP', 'VIS', 'PTDY', 'TIDE']).drop(index=1)
#df_clean_buoy.dtypes


In [5]:
# Get the current date and calculate the threshold for the last 7 days
seven_days_ago = datetime.now() - timedelta(days=7)

# Filter the DataFrame to only keep rows from the last 7 days
df_last_7_days = df_clean_buoy[df_clean_buoy['Timestamp'] >= seven_days_ago]

#df_last_7_days

In [6]:
#reverse the data
df_reversed_clean_buoy = df_last_7_days.iloc[::-1].reset_index(drop=True)
#df_last_1800 = df_reversed_clean_buoy.tail(1800).copy()

columns_to_convert = ['WDIR', 'WSPD', 'GST', 'WVHT', 'APD', 'ATMP', 'WTMP']

# Convert specified columns to float, using 'coerce' to handle errors
df_reversed_clean_buoy[columns_to_convert] = df_reversed_clean_buoy[columns_to_convert].apply(pd.to_numeric, errors='coerce')

df_reversed_clean_buoy = df_reversed_clean_buoy.reset_index(drop=True)
df_reversed_clean_buoy = df_reversed_clean_buoy.interpolate(method='linear', limit_direction='both')
#df_reversed_clean_buoy.head()

In [7]:
#df_reversed_clean_buoy.shape

In [8]:
# Latitude and longitude for the given location
#lat, lon =  19.71361111111111, -67.31083333333333

timestamps = df_reversed_clean_buoy['Timestamp']

# Calculate sunlight fraction for the entire time range
sunlight_df = calculate_sunlight(buoy_lat, buoy_lon, timestamps)

In [9]:
df_reversed_clean_buoy["sun_exposure"] = sunlight_df["sunlight_fraction"]

In [10]:
#plt.plot(df_reversed_clean_buoy.sun_exposure)

In [11]:
new_index = np.linspace(0, len(df_reversed_clean_buoy) - 1, 7200)

df = df_reversed_clean_buoy
df_interpolated = pd.DataFrame()
for var in df.columns:
    if var=='Timestamp':
        continue
    df_interpolated[var] = np.interp(new_index, np.arange(len(df)), df[var])
# Interpolate 'timestamp' column onto the new index (if you want to interpolate timestamps too)
df_interpolated['Timestamp'] = pd.to_datetime(np.interp(new_index, np.arange(len(df)), df['Timestamp'].astype(int)))
df_interpolated['Timestamp'] = pd.to_datetime(df_interpolated['Timestamp'])
#print(df_interpolated.sun_exposure[0:20])

In [12]:
#plt.plot(df_interpolated.sun_exposure)
#df_expanded.sun_exposure[60:120]

In [13]:
#normalize and park
# Assuming 'df' is already loaded and has the 'Sample' column
# Normalize the 'Sample' column using min-max normalization
df = df_interpolated
vars = ['WDIR','WSPD','GST','WVHT','APD','ATMP','WTMP']
df_normalized = df
for var in vars:
    df_normalized[var] = (df[var] - np.min(df[var])) / (np.max(df[var]) - np.min(df[var]))
# Display the resulting DataFrame (showing first and last 10 rows)

In [14]:
#df_normalized.head(10)

In [15]:
new_order = ['Timestamp','WDIR', 'WSPD', 'GST', 'WVHT', 'APD', 'ATMP', 'WTMP', 
       'sun_exposure']
new_df = df_normalized[new_order]
fname = output_path_nrt + 'buoy_nrt.csv'
new_df.to_csv(fname, index = False)

In [16]:
#new_df.head(10)