<img src="http://dask.readthedocs.io/en/latest/_images/dask_horizontal.svg"
     align="right"
     width="30%"
     alt="Dask logo">

TimeSeries
=======================

<img src="http://pandas.pydata.org/_static/pandas_logo.png"
     align="left"
     width="50%"
     alt="Pandas logo">


### Connect to Cluster

In [None]:
from dask.distributed import Client, progress
c = Client('127.0.0.1:8786')
c

### Read Data from S3

In [None]:
import dask.dataframe as dd

df = dd.read_csv('s3://dask-data/nyc-taxi/2015/*.csv', 
                 parse_dates=['tpep_pickup_datetime', 'tpep_dropoff_datetime'],
                storage_options={'anon': True})

df = c.persist(df)
progress(df)

### Re-index by Datetime Column (shuffle)

In [None]:
df = c.persist(df.set_index('tpep_pickup_datetime'))
progress(df)

### Inspect result

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.loc['2015-05-05'].head()

### Resample by day

In [None]:
%matplotlib inline

In [None]:
(df.passenger_count
   .resample('1d')
   .mean()
   .compute()
   .plot())

### Group by day of week

In [None]:
(df.passenger_count
   .groupby(df.index.dayofweek)
   .mean()
   .compute()
   .sort_index()
   .plot())