In [1]:
from __future__ import print_function, division
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.options.display.max_columns = 25
%matplotlib inline

## Scraping NOAA for currents
The new beta predictions from NOAA allow requests for .csv, .txt, and .xml files.  A requesting URL looks like the following:

http://tidesandcurrents.noaa.gov/noaacurrents/DownloadPredictions?fmt=txt&i=&d=2015-07-12&r=1&tz=LST%2fLDT&u=1&id=ACT3431_1&t=24hr&i=&threshold=leEq&thresholdvalue=

where:
- __?fmt=txt__ could be any of the three permitted formats (__csv__, __txt__, __xml__)
- __?id=ACT3431__ is the ID for the station of interest
- __?d=2015-07-12__ is the date for the currents requested

http://tidesandcurrents.noaa.gov/noaacurrents/CreateAnnual

http://tidesandcurrents.noaa.gov/noaacurrents/CreateAnnual?id=ACT3431_1&fmt=xml&year=2016&tz=LST/LDT&u=1&t=24hr

get the whole year with CreateAnnual



In [2]:
#def getCsvData(url):
    #  Takes as input a url to http://tidesandcurrents.noaa.gov/noaacurrents/CreateAnnual 
#website = requests.get(url)
#print website.text
#thisStation = np.loadtxt(website.text,comments="Date",delimiter=",",unpack=True)
url = 'http://tidesandcurrents.noaa.gov/noaacurrents/CreateAnnual?id=ACT3431_1&fmt=csv&year=2016&tz=LST/LDT&u=1&t=24hr'
data = pd.read_csv(url,parse_dates=[1],infer_datetime_format=True)
data.head()

Unnamed: 0,Date_Time (LST/LDT),Event,Speed (knots)
0,2016-01-01 02:40,slack,-
1,2016-01-01 06:06,ebb,-2.1
2,2016-01-01 09:27,slack,-
3,2016-01-01 11:55,flood,1.1
4,2016-01-01 15:04,slack,-


In [5]:
data[data == '-'] = 0 #change slack values to zero knots
data = data.rename(columns={'Date_Time (LST/LDT)': 'date_time',' Event': 'event', ' Speed (knots)': 'knots'})
print('data keys=', data.keys())
data.head()

data keys= Index([u'date_time', u'event', u'knots'], dtype='object')


Unnamed: 0,date_time,event,knots
0,2016-01-01 02:40,slack,0.0
1,2016-01-01 06:06,ebb,-2.1
2,2016-01-01 09:27,slack,0.0
3,2016-01-01 11:55,flood,1.1
4,2016-01-01 15:04,slack,0.0


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2829 entries, 0 to 2828
Data columns (total 3 columns):
date_time    2829 non-null object
event        2829 non-null object
knots        2829 non-null object
dtypes: object(3)
memory usage: 88.4+ KB


In [7]:
#Create hourly version of data
trng = pd.date_range('1/1/2016','01/01/2017',freq='30min')
#print trng
print(trng[0])
print(trng.size)
speedKnots = np.zeros(trng.size)
event = (('interp',)*trng.size)
data_interp = {'date_time': trng,
        'event': event,
        'knots': speedKnots}
data_interp = pd.DataFrame(data_interp)
print('\ndata_interp data types\n', data_interp.dtypes,'\n')
data_interp.head()

2016-01-01 00:00:00
17569

data_interp data types
 date_time    datetime64[ns]
event                object
knots               float64
dtype: object 



Unnamed: 0,date_time,event,knots
0,2016-01-01 00:00:00,interp,0
1,2016-01-01 00:30:00,interp,0
2,2016-01-01 01:00:00,interp,0
3,2016-01-01 01:30:00,interp,0
4,2016-01-01 02:00:00,interp,0


In [8]:
#convert the data to the appropriate dtypes
print('data data types\n', data.dtypes,'\n')
data['knots'] = pd.to_numeric(data['knots'])
data['date_time'] = pd.to_datetime(data['date_time'])
print('\ndata data types\n', data.dtypes,'\n')

concatenated = pd.concat([data,data_interp])
concatenated = concatenated.sort_values(by='date_time',ascending=True)
concatenated

data data types
 date_time    object
event        object
knots        object
dtype: object 


data data types
 date_time    datetime64[ns]
event                object
knots               float64
dtype: object 





Unnamed: 0,date_time,event,knots
0,2016-01-01 00:00:00,interp,0.0
1,2016-01-01 00:30:00,interp,0.0
2,2016-01-01 01:00:00,interp,0.0
3,2016-01-01 01:30:00,interp,0.0
4,2016-01-01 02:00:00,interp,0.0
5,2016-01-01 02:30:00,interp,0.0
0,2016-01-01 02:40:00,slack,0.0
6,2016-01-01 03:00:00,interp,0.0
7,2016-01-01 03:30:00,interp,0.0
8,2016-01-01 04:00:00,interp,0.0


In [17]:
def CosineInterpolate(y1, y2, mu):
   mu2 = (1-np.cos(mu*np.pi))/2
   return (y1*(1-mu2)+y2*mu2)

print(CosineInterpolate(1.2,-2.18,200/401))
print(CosineInterpolate(-1.2,2.18,.5))
print(CosineInterpolate(-2.1,1.1,200/401))

-0.48337995261
0.49
-0.506267500487


In [None]:
trng = pd.date_range('1/1/2015','01/01/2016',freq='H')
print trng
print trng[0]
print trng.size
speedKnots = np.zeros(trng.size)
event = (('interp',)*trng.size)
#print event
data_interp = {'date_time': trng,
        'event': event,
        'knots': speedKnots}
data_interp = pd.DataFrame(data_interp)
data_interp

#df.dtypes
print data.dtypes
data = data.convert_objects(convert_dates=True,convert_numeric=True)
data['data_time'] = pd.to_datetime(data.date_time)
print data.dtypes
print data_interp.dtypes
concatenated = pd.concat([data,data_interp])

In [None]:
#df.dtypes
print data.dtypes
data = data.convert_objects(convert_dates=True,convert_numeric=True)
data['data_time'] = pd.to_datetime(data.date_time)
print data.dtypes
print data_interp.dtypes
concatenated = pd.concat([data,data_interp])
#concatenated = concatenated.sort('date_time',ascending=1)
#concatenated
#frame.dtypes
#print df['date_time'].describe()

In [None]:
newdf = df.convert_objects(convert_dates=True,convert_numeric=True)
newdf['date_time']=pd.to_datetime(df['date_time'])
newdf.dtypes
newdf

In [None]:
print newdf['date_time'][1]-frame['date_time'][1]

In [None]:
df.convert_objects(convert_dates=True,convert_numeric=True)
tdiff = df[pd.to_datetime(df['date_time'])>trng[4000]]
print tdiff

In [None]:
tdiff.abs()

In [3]:
#get day before and day after
url_dp_root = 'http://tidesandcurrents.noaa.gov/noaacurrents/DownloadPredictions?fmt=csv&tz=LST/LDT&u=1&t=24hr'
urlpre = url_dp_root+'&id=ACT3431_1&d=2015-12-31'
pre_data = pd.read_csv(urlpre,parse_dates=[1],infer_datetime_format=True)
print(pre_data.head(10))
print(pre_data.info())
urlpost = url_dp_root+'&id=ACT3431_1&d=2017-01-01'
post_data = pd.read_csv(urlpost,parse_dates=True,infer_datetime_format=True)
print(post_data.head())
print(post_data.info())

data_cat = pd.concat([pre_data,data,post_data])
data_cat.head(15)

  Date_Time (LST/LDT)   Event  Speed (knots)
0    2015-12-31 01:52   slack              -
1    2015-12-31 05:12     ebb          -2.20
2    2015-12-31 08:33   slack              -
3    2015-12-31 11:01   flood           1.20
4    2015-12-31 14:10   slack              -
5    2015-12-31 17:36     ebb          -2.18
6    2015-12-31 21:03   slack              -
7    2015-12-31 23:25   flood           1.16
8    2016-01-01 02:40   slack              -
9    2016-01-01 06:06     ebb          -2.12
<class 'pandas.core.frame.DataFrame'>
Int64Index: 15 entries, 0 to 14
Data columns (total 3 columns):
Date_Time (LST/LDT)    15 non-null object
 Event                 15 non-null object
 Speed (knots)         15 non-null object
dtypes: object(3)
memory usage: 480.0+ bytes
None
  Date_Time (LST/LDT)   Event  Speed (knots)
0    2017-01-01 02:54     ebb          -2.44
1    2017-01-01 05:57   slack              -
2    2017-01-01 08:31   flood           1.42
3    2017-01-01 11:46   slack              -
4 