# Goal: 
### Merge swell, period, wind, and tide data into a singe dataframe
### Convert UTC to PST and filter for daylight hours
#### This file is the clean version of ```the_big_merge.ipynb```

In [1]:
import pandas as pd
import numpy as np
import random

from datetime import datetime, timezone, timedelta
from zoneinfo import ZoneInfo
from astral import LocationInfo
from astral.sun import sun

In [2]:
#reading cleaned datasets
swell = pd.read_csv('data/interim/01-swell.csv')
tide = pd.read_csv('data/interim/00-tide.csv')
wind = pd.read_csv('data/interim/00-wind.csv')
period = pd.read_csv('data/interim/00-period.csv')

In [3]:
#merging
ts = tide.merge(swell, how='left', on='UTC')
tsp= ts.merge(period, how='left', on='UTC')
tspw = tsp.merge(wind, how='left', on='UTC')

In [4]:
#fixing duplicate value
tspw.iloc[21452,6]=10
tspw = tspw.drop(index=21453)

In [5]:
#converting to datetime object because csvs can't store
tspw['UTC'] = pd.to_datetime(tspw['UTC'], utc=True)

In [6]:
#creating datetime column in PST
PST = ZoneInfo('America/Los_Angeles')
pstlist = [x.astimezone(tz=PST) for x in tspw['UTC']]
tspw['PST']=pstlist

In [7]:
#create Dawn column
iv = LocationInfo("Isla Vista", "California", "America/Los_Angeles", 34.41302853802114, -119.8615254859206)
dawn=[]
for i in tspw.index.to_list():
    s = sun(iv.observer, date=tspw['PST'][i], tzinfo=PST)
    dawn.append(s['dawn'])
tspw['Dawn'] = dawn

#create Dusk column
iv = LocationInfo("Isla Vista", "California", "America/Los_Angeles", 34.41302853802114, -119.8615254859206)
dusk=[]
for i in tspw.index.to_list():
    s = sun(iv.observer, date=tspw['PST'][i], tzinfo=PST)
    dusk.append(s['dusk'])
tspw['Dusk'] = dusk

In [8]:
#filtering for daylight hours
day=[]
for i in tspw.index.to_list():
    day.append(tspw['Dawn'][i] <= tspw['PST'][i] <= tspw['Dusk'][i])

In [10]:
#writing to csv
tspw[day][['PST','Tide','Height','Deg','Period','Wind Speed', 'Wind Direction']].to_csv('data/processed/final.csv')