In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



Create a Pandas **series**

In [3]:
s = pd.Series([1,2,3,6,7])

s

0    1
1    2
2    3
3    6
4    7
dtype: int64

Create a Pandas **DataFrame** by passing a numpy array, index and column label

In [5]:
dates = pd.date_range('20160624', periods=5)
dates

DatetimeIndex(['2016-06-24', '2016-06-25', '2016-06-26', '2016-06-27',
               '2016-06-28'],
              dtype='datetime64[ns]', freq='D')

In [10]:
df = pd.DataFrame(np.random.randn(5,4), index=dates, columns=['one','two','three','four'])
df

Unnamed: 0,one,two,three,four
2016-06-24,-1.355018,-0.321554,1.079393,0.486469
2016-06-25,-0.252058,-1.381902,-1.778516,-0.67647
2016-06-26,-1.410848,-0.311351,1.310599,0.136171
2016-06-27,1.04785,0.321707,0.318735,0.429821
2016-06-28,-1.895402,-0.935007,0.307506,0.795169


Create a Pandas **DataFrame** by passing a dictionary of objects

In [11]:
dict = {'x':[1,2,3],'y':[2,3,1],'z':[0,4,5]}
df2 = pd.DataFrame(dict)
df2

Unnamed: 0,x,y,z
0,1,2,0
1,2,3,4
2,3,1,5


In [17]:
df2 = pd.DataFrame({ 'A' : 1.,
                     'B' : pd.Timestamp('20130102'),
                     'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                     'D' : np.array([3] * 4,dtype='int32'),
                     'E' : pd.Categorical(["test","train","test","train"]),
                     'F' : 'foo' })
print df2.dtypes
df2

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object


Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [18]:
print df2.index
print df2.columns
print df2.values

Int64Index([0, 1, 2, 3], dtype='int64')
Index([u'A', u'B', u'C', u'D', u'E', u'F'], dtype='object')
[[1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo']
 [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo']
 [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo']
 [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo']]


**Transposing** your data

In [19]:
df.T

Unnamed: 0,2016-06-24 00:00:00,2016-06-25 00:00:00,2016-06-26 00:00:00,2016-06-27 00:00:00,2016-06-28 00:00:00
one,-1.355018,-0.252058,-1.410848,1.04785,-1.895402
two,-0.321554,-1.381902,-0.311351,0.321707,-0.935007
three,1.079393,-1.778516,1.310599,0.318735,0.307506
four,0.486469,-0.67647,0.136171,0.429821,0.795169


**sorting** by an axis

In [23]:
df.sort_index(axis=1, ascending=True)

Unnamed: 0,four,one,three,two
2016-06-24,0.486469,-1.355018,1.079393,-0.321554
2016-06-25,-0.67647,-0.252058,-1.778516,-1.381902
2016-06-26,0.136171,-1.410848,1.310599,-0.311351
2016-06-27,0.429821,1.04785,0.318735,0.321707
2016-06-28,0.795169,-1.895402,0.307506,-0.935007


In [26]:
df.sort_index(axis=0, ascending=False)

Unnamed: 0,one,two,three,four
2016-06-28,-1.895402,-0.935007,0.307506,0.795169
2016-06-27,1.04785,0.321707,0.318735,0.429821
2016-06-26,-1.410848,-0.311351,1.310599,0.136171
2016-06-25,-0.252058,-1.381902,-1.778516,-0.67647
2016-06-24,-1.355018,-0.321554,1.079393,0.486469


In [27]:
df.sort_values(by='one')

Unnamed: 0,one,two,three,four
2016-06-28,-1.895402,-0.935007,0.307506,0.795169
2016-06-26,-1.410848,-0.311351,1.310599,0.136171
2016-06-24,-1.355018,-0.321554,1.079393,0.486469
2016-06-25,-0.252058,-1.381902,-1.778516,-0.67647
2016-06-27,1.04785,0.321707,0.318735,0.429821
