In [None]:
import pandas as pd

In [None]:
# this allows plotting within the notebook
%pylab inline

# Series - Pandas Columnar Data

In [None]:
# [random.uniform(0, 9999) for _ in range(5)]

In [None]:
s = pd.Series([random.uniform(0, 9999) for _ in range(5)])
s

In [None]:
type(s)

In [None]:
s.index

In [None]:
# You can slice Series objects like a list
s[0]

In [None]:
s[1:3]

In [None]:
s[[1,3]]

http://pandas.pydata.org/pandas-docs/stable/dsintro.html#series

http://pandas.pydata.org/pandas-docs/stable/api.html#series

## Simple Plotting

In [None]:
s.plot()

In [None]:
s.plot(kind='bar')

In [None]:
s.plot(kind='pie')

# Dataframes - Pandas Tabular Data

## Sales Data

In [None]:
sales = pd.read_csv('data/sales.csv', header=0)

In [None]:
sales.info()

In [None]:
sales.index

In [None]:
sales.head()

In [None]:
# sales.tail()

## Selecting Columns

In [None]:
sales['AGENT'][:10]

In [None]:
sales[['AGENT', 'CUSTOMER']][:10]

## Selecting Data By Index
.ix can use mixed indexes. .loc is for label based. .iloc is for integer based.

In [None]:
# single row
sales.ix[0]

In [None]:
# multiple rows
sales.ix[(1,3),]

In [None]:
# range of rows
sales.ix[0:2]

In [None]:
# single row and single column
sales.ix[0,'AGENT']

In [None]:
# range of rows and range of columns
sales.ix[1:3,'AGENT':'CUSTOMER']

In [None]:
# multiple rows and multiple columns
sales.ix[(1,3),['AGENT','CUSTOMER']]

http://pandas.pydata.org/pandas-docs/stable/indexing.html

## Calculating Total Sales

Dataframes are mutable

In [None]:
sales['TOTAL_SALE'] = sales['COUNT'] * sales['PRICE']

In [None]:
sales.head()

## Filtering by Year

SELL_DATE is actually a string.

In [None]:
sales.ix[0,'SELL_DATE']

In [None]:
type(sales.ix[0,'SELL_DATE'])

We can set it to a Pandas Timestamp object.

In [None]:
sales['SELL_DATE'] = pd.to_datetime(sales['SELL_DATE'])

In [None]:
sales.head()

In [None]:
sales.ix[0,'SELL_DATE']

In [None]:
type(sales.ix[0,'SELL_DATE'])

Now we can do things like grab the year.

In [None]:
sales.ix[0,'SELL_DATE'].year

Or, filter by year.

In [None]:
# sales.SELL_DATE.dt.year == 2015

In [None]:
sales[sales.SELL_DATE.dt.year == 2015].head()

http://pandas.pydata.org/pandas-docs/stable/api.html#datetimelike-properties

## Grouping By Year and Quarter

In [None]:
# sales.SELL_DATE.dt.year

In [None]:
# sales.SELL_DATE.dt.quarter

In [None]:
grouped = sales.groupby([sales.SELL_DATE.dt.year,sales.SELL_DATE.dt.quarter])['TOTAL_SALE'].sum()
grouped

In [None]:
grouped.index

http://pandas.pydata.org/pandas-docs/stable/groupby.html

## Plotting

In [None]:
grouped.plot()

In [None]:
# this is really discrete data though
grouped.plot(kind='bar')

In [None]:
grouped.head(10)

In [None]:
yearly = grouped.unstack(level=1)

In [None]:
yearly

In [None]:
yearly.plot(kind='bar')

In [None]:
yearly.plot(kind='bar', legend=False)

In [None]:
yearly.plot(kind='bar',stacked=True, legend=False)

In [None]:
yearly.plot(kind='barh',stacked=True, title='Yearly Sales By Quarter')

http://pandas.pydata.org/pandas-docs/stable/index.html

http://pandas.pydata.org/pandas-docs/stable/visualization.html

http://matplotlib.org/users/pyplot_tutorial.html

http://matplotlib.org/api/pyplot_api.html#module-matplotlib.pyplot