Permalink
Switch branches/tags
Nothing to show
Find file
a49cba7 Apr 27, 2015
164 lines (97 sloc) 5.07 KB
Welcome!
1. Install Pandas and the IPython Notebook
The Anaconda distribution includes Pandas built-in:
http://continuum.io/downloads
2. Download and unzip "Pandas-Tutorial.zip" from
https://github.com/brandon-rhodes/pycon-pandas-tutorial/releases
3. Start the IPython Notebook and visit the "pandas-tutorial"
folder from inside the .zip
len(df) series + value df[df.c == value]
df.head() series + series2 df[(df.c >= value) & (df.d < value)]
df.tail() series.notnull() df[(df.c < value) | (df.d != value)]
df.COLUMN series.isnull() df.sort('column')
df['COLUMN'] series.order() df.sort(['column1', 'column2'])
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
http://continuum.io/downloads
https://github.com/brandon-rhodes/pycon-pandas-tutorial/releases
len(df) series + value df[df.c == value]
df.head() series + series2 df[(df.c >= value) & (df.d < value)]
df.tail() series.notnull() df[(df.c < value) | (df.d != value)]
df.COLUMN series.isnull() df.sort('column')
df['COLUMN'] series.order() df.sort(['column1', 'column2'])
s.str.len() s.value_counts() df[['column1', 'column2']]
s.str.contains() s.sort_index() df.plot(x='a', y='b', kind='scatter')
s.str.startswith() s.plot(...) df.plot(x='a', y='b', kind='bar')
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
len(df) series + value df[df.c == value]
df.head() series + series2 df[(df.c >= value) & (df.d < value)]
df.tail() series.notnull() df[(df.c < value) | (df.d != value)]
df.COLUMN series.isnull() df.sort('column')
df['COLUMN'] series.order() df.sort(['column1', 'column2'])
s.str.len() s.value_counts()
s.str.contains() s.sort_index() df[['column1', 'column2']]
s.str.startswith() s.plot(...) df.plot(x='a', y='b', kind='bar')
df.set_index('a').sort_index() df.loc['value']
df.set_index(['a', 'b']).sort_index() df.loc[('v','u')]
df.groupby('column') .size() .mean() .min() .max()
df.groupby(['column1', 'column2']) .agg(['min', 'max'])
"The Pink Panther"
brandon@rhodesmill.org
len(df) series + value df[df.c == value]
df.head() series + series2 df[(df.c >= value) & (df.d < value)]
df.tail() series.notnull() df[(df.c < value) | (df.d != value)]
df.COLUMN series.isnull() df.sort('column')
df['COLUMN'] series.order() df.sort(['column1', 'column2'])
s.str.len() s.value_counts()
s.str.contains() s.sort_index() df[['column1', 'column2']]
s.str.startswith() s.plot(...) df.plot(x='a', y='b', kind='bar')
df.set_index('a').sort_index() df.loc['value']
df.set_index(['a', 'b']).sort_index() df.loc[('v','u')]
df.groupby('column') .size() .mean() .min() .max()
df.groupby(['column1', 'column2']) .agg(['min', 'max'])
df.unstack()
df.stack()
df.fillna(value)
s.fillna(value)
len(df) series + value df[df.c == value]
df.head() series + series2 df[(df.c >= value) & (df.d < value)]
df.tail() series.notnull() df[(df.c < value) | (df.d != value)]
df.COLUMN series.isnull() df.sort('column')
df['COLUMN'] series.order() df.sort(['column1', 'column2'])
s.str.len() s.value_counts()
s.str.contains() s.sort_index() df[['column1', 'column2']]
s.str.startswith() s.plot(...) df.plot(x='a', y='b', kind='bar')
df.set_index('a').sort_index() df.loc['value']
df.set_index(['a', 'b']).sort_index() df.loc[('v','u')]
df.groupby('column') .size() .mean() .min() .max()
df.groupby(['column1', 'column2']) .agg(['min', 'max'])
df.unstack() s.dt.year
df.stack() s.dt.month
df.fillna(value) s.dt.day
s.fillna(value) s.dt.dayofweek
len(df) series + value df[df.c == value]
df.head() series + series2 df[(df.c >= value) & (df.d < value)]
df.tail() series.notnull() df[(df.c < value) | (df.d != value)]
df.COLUMN series.isnull() df.sort('column')
df['COLUMN'] series.order() df.sort(['column1', 'column2'])
s.str.len() s.value_counts()
s.str.contains() s.sort_index() df[['column1', 'column2']]
s.str.startswith() s.plot(...) df.plot(x='a', y='b', kind='bar')
df.set_index('a').sort_index() df.loc['value']
df.set_index(['a', 'b']).sort_index() df.loc[('v','u')]
df.groupby('column') .size() .mean() .min() .max()
df.groupby(['column1', 'column2']) .agg(['min', 'max'])
df.unstack() s.dt.year df.merge(df2, how='outer', ...)
df.stack() s.dt.month df.rename(columns={'a': 'y', 'b': 'z'})
df.fillna(value) s.dt.day pd.concat([df1, df2])
s.fillna(value) s.dt.dayofweek
Thanks!
Any questions?
Local variables:
mode:text
mode:page
End: