In [None]:
import pandas
import numpy as np
%matplotlib inline

## Series

In [None]:
series = pandas.Series(np.random.standard_normal(10))

In [None]:
series

In [None]:
series[4]

In [None]:
series[1:4]

In [None]:
series.index

In [None]:
series.index = list('abcdefghij')

In [None]:
list('abcdefghij')

In [None]:
series['e']

In [None]:
series['a' : 'f']

In [None]:
pandas.Series([1,2,3], index=['foo', 'bar', 'baz'])['foo']

In [None]:
pandas.Series({'a': 12, 'b': 42})

In [None]:
series

In [None]:
series + 5

In [None]:
series**2

In [None]:
series + series

In [None]:
sum(series**2)

In [None]:
series + pandas.Series({'a': 2})

In [None]:
series + pandas.Series({'a': 2, 'c': 2})

In [None]:
(series + pandas.Series({'a': 2, 'c': 2})).dropna()

## Data Frames

In [None]:
fish = pandas.DataFrame({'size': [100, 120, 70],
                         'weight': [20, 30, 25]},
                        index = ['Brown Trout', 'Atlantic Salmon', 'Chinook Salmon'])

In [None]:
fish

In [None]:
fish.index

In [None]:
fish.weight

In [None]:
fish['size']

In [None]:
fish['size'] > 100

In [None]:
fish[fish['size'] > 100]

In [None]:
fish[fish['size'] > 100]['weight']

## Reading

In [None]:
for i in dir(pandas):
    if i.startswith("read"):
        print i

In [None]:
df = pandas.read_csv('data/201508_trip_data.csv.gz')

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df['Duration'].plot.hist()

In [None]:
df['Duration'].plot.hist(xlim=(0,100))

In [None]:
df[df['Duration'] < 60*45]['Duration'].plot.hist(bins=30)

In [None]:
df['Start Date'].head()

In [None]:
import datetime
def mdy_hm(datetimestring):
    return datetime.datetime.strptime(datetimestring,
                            '%m/%d/%Y %H:%M')
df['Start Date'] = df['Start Date'].apply(mdy_hm) # element-wise

## Pivot / Stack

In [None]:
fish

In [None]:
fish.stack()

In [None]:
type(fish.stack())

In [None]:
stacked = fish.stack().reset_index()

In [None]:
stacked

In [None]:
stacked.columns = ['name', 'info', 'value']

In [None]:
stacked

In [None]:
stacked.pivot(index='name', columns='info', values='value')

In [None]:
df.head()

In [None]:
stations = ['Embarcadero at Sansome',
 'Temporary Transbay Terminal (Howard at Beale)',
 'Harry Bridges Plaza (Ferry Building)',
 'San Francisco Caltrain 2 (330 Townsend)',
 'San Francisco Caltrain (Townsend at 4th)']
df = df[df['Start Station'].apply(lambda x: x in stations)]

In [None]:
df.head()

In [None]:
departures = df[['Start Station', 'Start Date', 'Duration']]

In [None]:
departures.head()

In [None]:
pivoted = departures.pivot_table(index='Start Date', columns='Start Station', values='Duration')

In [None]:
pivoted.head()

## Time Series

In [None]:
daily_averages = pivoted.resample('1d').mean()

In [None]:
daily_averages.head()

In [None]:
daily_averages['2014'].head()

In [None]:
daily_averages['2014-10'].head()

In [None]:
daily_averages['2014-10-2':'2014-10-7'].head()

## Groupby

In [None]:
groupby_example = pandas.DataFrame({'key': ['a', 'b', 'a', 'b'],
                                    'value': [1,2,1,2]})

In [None]:
groupby_example

In [None]:
groupby_example.groupby('key').sum()

In [None]:
daily_averages['Weekday'] = daily_averages.index.weekday

In [None]:
mean_weekday = daily_averages.groupby('Weekday').mean()

In [None]:
mean_weekday.plot(kind='bar', ylim=(0, 5000))

In [None]:
import calendar

In [None]:
daily_averages['Weekday'] = daily_averages['Weekday'].apply(lambda x: calendar.day_abbr[x])

In [None]:
mean_weekday = daily_averages.groupby('Weekday').mean()
mean_weekday.plot(kind='bar', ylim=(0, 5000))

In [None]:
mean_weekday['Embarcadero at Sansome']

In [None]:
mean_weekday.iloc[[1,3]]

In [None]:
daily_averages['Month'] = daily_averages.index.month

In [None]:
daily_averages.groupby('Month').mean().plot.bar(ylim=(0, 4000))