#Pandas Basics

In [None]:
%pylab inline

In [None]:
from pandas import Series, DataFrame
import pandas as pd
import numpy as np

In [None]:
np.random.seed(525)

##Series

In [None]:
s1 = Series([1, 2, 3])
s1

In [None]:
s1.values

In [None]:
s1.index

In [None]:
s2 = Series([1, 2, 3], index=['a', 'b', 'c'])
s2

In [None]:
s2[1]

In [None]:
s2['b']

In [None]:
s2[s2 % 2 == 1]

In [None]:
s2 * 5

In [None]:
np.exp(s2)

In [None]:
'b' in s2

In [None]:
2 in s2.values

In [None]:
s3 = Series({'foo': 10, 'bar': 20, 'baz': 30})
s3

In [None]:
s4 = Series({'b': 2, 'c': 3,'d': 4}, index=['a', 'b', 'c'])
s4

In [None]:
s4.isnull()

In [None]:
Series({'a': 1, 'b': 2}) + Series({'b': 10, 'c': 20})

In [None]:
s4.index = ['x', 'y', 'z']
s4

##Data Frame

In [None]:
d = {'foo': [1, 2, 3], 'bar': [5, 10, 15], 'baz': [True, True, False]}
f1 = DataFrame(d)
f1

In [None]:
f2 = DataFrame(d, columns=['foo', 'bar', 'baz'])
f2

In [None]:
f3 = DataFrame(d, columns=['foo', 'bar', 'baz', 'hello'])
f3

In [None]:
f3['foo']

In [None]:
f3.bar

In [None]:
f3.ix[2]

In [None]:
f3.hello = np.arange(3)
f3

In [None]:
f3['world'] = -4.5e-3
f3

In [None]:
del f3['foo']
f3

In [None]:
f3.columns

In [None]:
f3.T

In [None]:
f3.index.name = 'idx'
f3.columns.name = 'features'
f3

In [None]:
f3.values

##Index Objects

In [None]:
f4 = DataFrame({'a': range(3), 'b': 'asd'})
f4

In [None]:
f4.index

In [None]:
f4.index[2]

#Essential Funcionality

##Reindexing

In [None]:
f5 = DataFrame(np.arange(9).reshape((3, 3)), index=['a', 'b', 'c'])
f6 = f5.reindex(['b', 'c', 'd'], method='ffill')
f6

##Dropping Entries from an Axis

In [None]:
f6.drop('b')

In [None]:
f7 = DataFrame({'foo': range(5), 'bar': range(50, 100, 10), 'baz': 0})
f7

In [None]:
f7.drop(['bar', 'baz'], axis=1)

##Indexing, Selection and Filtering

In [None]:
f7.bar

In [None]:
f7[2:]

In [None]:
f7[f7.foo % 2 == 0]

In [None]:
f7[['bar', 'foo']]

In [None]:
(f7 / 10) % 2 == 0

In [None]:
f7.ix[[2, 4], ['bar']]

In [None]:
f7.ix[f7.foo %2 == 0, :1]

In [None]:
f8 = f7[:3]
f8

##Arithmetic and Data Alignment

In [None]:
f9 = DataFrame({'foo': [1.2, -3]}, index=['a', 'b'])
f10 = DataFrame({'foo': [2, 25]}, index=['a', 'c'])
f11 = f9 + f10
f11

In [None]:
f12 = DataFrame({'foo': [1.2, -3], 'bar': [0, 0]}, index=['a', 'b'])
f13 = DataFrame({'foo': [2, 25], 'baz': [5, 5]}, index=['a', 'b'])
f14 = f12 + f13
f14

In [None]:
f15 = DataFrame(np.arange(16).reshape((4, 4)), columns=list('abcd'))
f16 = DataFrame(np.arange(9).reshape((3, 3)), columns=list('abc'))
f17 = f15.sub(f16, fill_value=0)
f17

In [None]:
f18 = DataFrame(np.arange(9).reshape((3, 3)), index=list('abc'), columns=list('xyz'))
f18 - f18.ix[0]

In [None]:
f18.sub(f18['y'], axis=0)

##Function Application and Mapping

In [None]:
f19 = DataFrame(np.random.randn(9).reshape((3, 3)))
np.abs(f19)

In [None]:
f20 = DataFrame(np.random.randn(12).reshape((4, 3)))
f20

In [None]:
f = lambda x: x.max() - x.min()
f20.apply(f)

In [None]:
f20.apply(f, axis=1)

In [None]:
def stats(x):
    return Series([x.min(), x.max()], index=['min', 'max'])
f20.apply(stats)

In [None]:
f20.apply(stats, axis=1)

In [None]:
def myformat(x):
    return "%.2f" % x
f20.applymap(myformat)

##Sorting and Ranking

In [None]:
f21 = DataFrame(np.arange(16).reshape((4, 4)), index=list('dacb'), columns=list('opnm'))
f22 = f21.sort_index()
f22

In [None]:
f23 = f22.sort_index(axis=1)
f23

In [None]:
f23.sort_index(ascending=False)

In [None]:
f24 = DataFrame(np.random.randn(25).reshape((5, 5)), columns=list('abcde'))
f24.sort_index(by='c')

In [None]:
f24['c'] = 0
f24.sort_index(by=['c', 'a'])

In [None]:
f24

In [None]:
f24.rank()

In [None]:
f24.rank(method='first')

##Axis Indexes with Duplicate Values

In [None]:
s25 = Series(range(5), index=list('abaca'))
s25.index.is_unique

In [None]:
s25['a']

In [None]:
f25 = DataFrame(np.arange(8).reshape((4, 2)), index=list('abca'))
f25

In [None]:
f25.index.is_unique

In [None]:
f25.ix['a']

##Summarizing and Computing Descriptive Statistics

In [None]:
f26 = DataFrame([[1, -4],[np.nan, 7.3],[0, np.nan]], index=list('abc'), columns=['one', 'two'])
f26

In [None]:
f26.sum()

In [None]:
f26.sum(axis=1)

In [None]:
f26.sum(axis=1, skipna=False)

In [None]:
f26.idxmax()

In [None]:
f26.cumsum()

In [None]:
f26.describe()

In [None]:
f27 = DataFrame([['foo', 'bar'], ['foo', 'baz'], ['foo', 'bar']], columns=['hello', 'world'])
f27.describe()

In [None]:
import pandas.io.data as web
all_data = {}
for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']:
    all_data[ticker] = web.get_data_yahoo(ticker)
price = DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()})
volume = DataFrame({tic: data['Volume'] for tic, data in all_data.iteritems()})

In [None]:
returns = price.pct_change()
returns.tail()

In [None]:
returns.MSFT.corr(returns.IBM)

In [None]:
returns.MSFT.cov(returns.IBM)

In [None]:
returns.corr()

In [None]:
returns.cov()

In [None]:
returns.corrwith(returns.IBM)

##Unique Values, Value Counts and Membership

In [None]:
s28 = Series(list('abcbbacaacabacbaba'))
s28[:5]

In [None]:
s28.unique()

In [None]:
s28.value_counts()

In [None]:
s28.isin(list('ab'))[:5]

##Handling Missing Data

In [None]:
s29 = Series(['a', 'b', np.NaN, 'd', None, 'f'])
s29

In [None]:
s29.isnull()

In [None]:
from numpy import nan as NA

In [None]:
s30 = Series([1, NA, 3.5, NA, 7])
s30.dropna()

In [None]:
s30[s30.notnull()]

In [None]:
d31 = DataFrame([[1, NA, 3], [NA, NA, NA], [5, 5, 5]])
d31

In [None]:
d31.dropna()

In [None]:
d31.dropna(how='all')

In [None]:
d32 = DataFrame(np.random.randn(7, 3))
d32.ix[:4, 1] = NA
d32.ix[:2, 2] = NA
d32

In [None]:
d32.dropna(thresh=2)

In [None]:
d32.fillna(0)

In [None]:
d33 = d32.copy()
d33.fillna({1: 0, 2: 5}, inplace=True)

In [None]:
d32.fillna(d32.mean())

##Hierarchical Indexing

In [None]:
d33 = DataFrame(np.random.randn(10), index=[list('aaabbbbcdd'), [1, 2, 3, 1, 3, 2, 4, 1, 3, 2]])
d33

In [None]:
d33.index

In [None]:
d33.ix['b']

In [None]:
d33.ix['b', 2]

In [None]:
d33.unstack()

In [None]:
d33.index.names = ['foo', 'bar']
d33

In [None]:
d34 = d33.swaplevel('foo', 'bar')
d34

In [None]:
d34.stack()

In [None]:
d34.sortlevel(0)

In [None]:
d34.sum(level='foo')