In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.options.display.max_columns = 25
%matplotlib inline

## Scraping NOAA for currents
The new beta predictions from NOAA allow requests for .csv, .txt, and .xml files.  A requesting URL looks like the following:

http://tidesandcurrents.noaa.gov/noaacurrents/DownloadPredictions?fmt=txt&i=&d=2015-07-12&r=1&tz=LST%2fLDT&u=1&id=ACT3431_1&t=24hr&i=&threshold=leEq&thresholdvalue=

where:
- __?fmt=txt__ could be any of the three permitted formats (__csv__, __txt__, __xml__)
- __?id=ACT3431__ is the ID for the station of interest
- __?d=2015-07-12__ is the date for the currents requested

http://tidesandcurrents.noaa.gov/noaacurrents/CreateAnnual

http://tidesandcurrents.noaa.gov/noaacurrents/CreateAnnual?id=ACT3431_1&fmt=xml&year=2016&tz=LST/LDT&u=1&t=24hr

get the whole year with CreateAnnual



In [3]:
#def getCsvData(url):
    #  Takes as input a url to http://tidesandcurrents.noaa.gov/noaacurrents/CreateAnnual 
url = 'http://tidesandcurrents.noaa.gov/noaacurrents/CreateAnnual?id=ACT3431_1&fmt=csv&year=2016&tz=LST/LDT&u=1&t=24hr'
#website = requests.get(url)
#print website.text
#thisStation = np.loadtxt(website.text,comments="Date",delimiter=",",unpack=True)
data = pd.read_csv(url,parse_dates=[1],infer_datetime_format=True)
data.head()

Unnamed: 0,Date_Time (LST/LDT),Event,Speed (knots)
0,2016-01-01 02:40,slack,-
1,2016-01-01 06:06,ebb,-2.1
2,2016-01-01 09:27,slack,-
3,2016-01-01 11:55,flood,1.1
4,2016-01-01 15:04,slack,-


In [19]:
url_dp_root = 'http://tidesandcurrents.noaa.gov/noaacurrents/DownloadPredictions?fmt=csv&tz=LST/LDT&u=1&t=24hr'
urlpre = url_dp_root+'&id=ACT3431_1&d=2015-12-31'
urlpost = url_dp_root+'&id=ACT3431_1&d=2017-01-01'
post_data = pd.read_csv(urlpost,parse_dates=True,infer_datetime_format=True)
post_data.head()

Unnamed: 0,Date_Time (LST/LDT),Event,Speed (knots)
0,2017-01-01 02:54,ebb,-2.44
1,2017-01-01 05:57,slack,-
2,2017-01-01 08:31,flood,1.42
3,2017-01-01 11:46,slack,-
4,2017-01-01 15:18,ebb,-2.50


In [4]:
data[data == '-'] = 0 #change slack values to zero knots
data = data.rename(columns={'Date_Time (LST/LDT)': 'date_time',' Event': 'event', ' Speed (knots)': 'knots'})
print('data keys=', data.keys())
data.head()

data keys= Index(['date_time', 'event', 'knots'], dtype='object')


Unnamed: 0,date_time,event,knots
0,2016-01-01 02:40,slack,0.0
1,2016-01-01 06:06,ebb,-2.1
2,2016-01-01 09:27,slack,0.0
3,2016-01-01 11:55,flood,1.1
4,2016-01-01 15:04,slack,0.0


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2829 entries, 0 to 2828
Data columns (total 3 columns):
date_time    2829 non-null object
event        2829 non-null object
knots        2829 non-null object
dtypes: object(3)
memory usage: 88.4+ KB


In [17]:
#Create hourly version of data
trng = pd.date_range('1/1/2016','01/01/2017',freq='30min')
#print trng
print(trng[0])
print(trng.size)
speedKnots = np.zeros(trng.size)
event = (('interp',)*trng.size)
data_interp = {'date_time': trng,
        'event': event,
        'knots': speedKnots}
data_interp = pd.DataFrame(data_interp)
print('\ndata_interp data types\n', data_interp.dtypes,'\n')
data_interp.head()

2016-01-01 00:00:00
17569

data_interp data types
 date_time    datetime64[ns]
event                object
knots               float64
dtype: object 



Unnamed: 0,date_time,event,knots
0,2016-01-01 00:00:00,interp,0
1,2016-01-01 00:30:00,interp,0
2,2016-01-01 01:00:00,interp,0
3,2016-01-01 01:30:00,interp,0
4,2016-01-01 02:00:00,interp,0


In [18]:
#df.dtypes
print('data data types\n', data.dtypes,'\n')
data['knots'] = pd.to_numeric(data['knots'])
data['date_time'] = pd.to_datetime(data['date_time'])
print('\ndata data types\n', data.dtypes,'\n')
#print data_interp.dtypes
concatenated = pd.concat([data,data_interp])
concatenated = concatenated.sort('date_time',ascending=1)
concatenated

data data types
 date_time    object
event        object
knots        object
dtype: object 


data data types
 date_time    datetime64[ns]
event                object
knots               float64
dtype: object 



  app.launch_new_instance()


Unnamed: 0,date_time,event,knots
0,2016-01-01 00:00:00,interp,0.0
1,2016-01-01 00:30:00,interp,0.0
2,2016-01-01 01:00:00,interp,0.0
3,2016-01-01 01:30:00,interp,0.0
4,2016-01-01 02:00:00,interp,0.0
5,2016-01-01 02:30:00,interp,0.0
0,2016-01-01 02:40:00,slack,0.0
6,2016-01-01 03:00:00,interp,0.0
7,2016-01-01 03:30:00,interp,0.0
8,2016-01-01 04:00:00,interp,0.0


In [17]:
trng = pd.date_range('1/1/2015','01/01/2016',freq='H')
print trng
print trng[0]
print trng.size
speedKnots = np.zeros(trng.size)
event = (('interp',)*trng.size)
#print event
data_interp = {'date_time': trng,
        'event': event,
        'knots': speedKnots}
data_interp = pd.DataFrame(data_interp)
data_interp

#df.dtypes
print data.dtypes
data = data.convert_objects(convert_dates=True,convert_numeric=True)
data['data_time'] = pd.to_datetime(data.date_time)
print data.dtypes
print data_interp.dtypes
concatenated = pd.concat([data,data_interp])

<class 'pandas.tseries.index.DatetimeIndex'>
[2015-01-01 00:00:00, ..., 2016-01-01 00:00:00]
Length: 8761, Freq: H, Timezone: None
2015-01-01 00:00:00
8761


Unnamed: 0,date_time,event,knots
0,2015-01-01 00:00:00,interp,0
1,2015-01-01 01:00:00,interp,0
2,2015-01-01 02:00:00,interp,0
3,2015-01-01 03:00:00,interp,0
4,2015-01-01 04:00:00,interp,0
5,2015-01-01 05:00:00,interp,0
6,2015-01-01 06:00:00,interp,0
7,2015-01-01 07:00:00,interp,0
8,2015-01-01 08:00:00,interp,0
9,2015-01-01 09:00:00,interp,0


In [28]:
#df.dtypes
print data.dtypes
data = data.convert_objects(convert_dates=True,convert_numeric=True)
data['data_time'] = pd.to_datetime(data.date_time)
print data.dtypes
print data_interp.dtypes
concatenated = pd.concat([data,data_interp])
#concatenated = concatenated.sort('date_time',ascending=1)
#concatenated
#frame.dtypes
#print df['date_time'].describe()

date_time            object
event                object
knots               float64
data_time    datetime64[ns]
dtype: object
date_time            object
event                object
knots               float64
data_time    datetime64[ns]
dtype: object
date_time    datetime64[ns]
event                object
knots               float64
dtype: object


In [13]:
newdf = df.convert_objects(convert_dates=True,convert_numeric=True)
newdf['date_time']=pd.to_datetime(df['date_time'])
newdf.dtypes
newdf

Unnamed: 0,date_time,event,knots
0,2015-01-01 00:57:00,slack,0.0
1,2015-01-01 03:37:00,flood,1.3
2,2015-01-01 06:40:00,slack,0.0
3,2015-01-01 10:30:00,ebb,-2.3
4,2015-01-01 13:33:00,slack,0.0
5,2015-01-01 16:07:00,flood,1.3
6,2015-01-01 19:10:00,slack,0.0
7,2015-01-01 22:54:00,ebb,-2.3
8,2015-01-02 01:51:00,slack,0.0
9,2015-01-02 04:31:00,flood,1.4


In [6]:
print newdf['date_time'][1]-frame['date_time'][1]

0 days 02:37:00


In [14]:
df.convert_objects(convert_dates=True,convert_numeric=True)
tdiff = df[pd.to_datetime(df['date_time'])>trng[4000]]
print tdiff

             date_time   event knots
1288  2015-06-16 17:03   slack     0
1289  2015-06-16 19:43   flood   1.5
1290  2015-06-16 22:58   slack     0
1291  2015-06-17 02:36     ebb  -2.5
1292  2015-06-17 05:39   slack     0
1293  2015-06-17 08:13   flood   1.5
1294  2015-06-17 11:22   slack     0
1295  2015-06-17 14:54     ebb  -2.4
1296  2015-06-17 17:51   slack     0
1297  2015-06-17 20:31   flood   1.5
1298  2015-06-17 23:46   slack     0
1299  2015-06-18 03:18     ebb  -2.5
1300  2015-06-18 06:27   slack     0
1301  2015-06-18 09:01   flood   1.4
1302  2015-06-18 12:10   slack     0
1303  2015-06-18 15:36     ebb  -2.4
1304  2015-06-18 18:39   slack     0
1305  2015-06-18 21:19   flood   1.4
1306  2015-06-19 00:28   slack     0
1307  2015-06-19 04:00     ebb  -2.4
1308  2015-06-19 07:09   slack     0
1309  2015-06-19 09:43   flood   1.4
1310  2015-06-19 12:52   slack     0
1311  2015-06-19 16:18     ebb  -2.3
1312  2015-06-19 19:27   slack     0
1313  2015-06-19 22:01   flood   1.4
1

In [128]:
tdiff.abs()

TypeError: bad operand type for abs(): 'str'