In [21]:
#There are numerous ways to construct a DataFrame, though one of the 
#most common is from a dict of equal-length lists or NumPy arrays

In [1]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

In [2]:
data={'state':['ohio', 'ohio', 'ohio', 'nevada', 'nevada'], # dic to DF
      'year': [2000,2001,2002,2001,2002],
      'popul':[1.5, 1.7, 3.6, 2.4, 2.9]}
frame=DataFrame(data)

In [3]:
frame

Unnamed: 0,popul,state,year
0,1.5,ohio,2000
1,1.7,ohio,2001
2,3.6,ohio,2002
3,2.4,nevada,2001
4,2.9,nevada,2002


In [5]:
df2 = frame.ix[3:]; #slicing by row
df2

Unnamed: 0,popul,state,year
3,2.4,nevada,2001
4,2.9,nevada,2002


In [25]:
DataFrame(data, columns=['year', 'state', 'popul']) #reorders the columns

Unnamed: 0,year,state,popul
0,2000,ohio,1.5
1,2001,ohio,1.7
2,2002,ohio,3.6
3,2001,nevada,2.4
4,2002,nevada,2.9


In [26]:
frame2=DataFrame(data, columns=['year', 'state', 'popul', 'debt'], #adding a column and 
                 index=['1', '2', '3', '4', '5']) #reindexing the default index

In [27]:
frame2

Unnamed: 0,year,state,popul,debt
1,2000,ohio,1.5,
2,2001,ohio,1.7,
3,2002,ohio,3.6,
4,2001,nevada,2.4,
5,2002,nevada,2.9,


In [28]:
frame2.columns

Index([u'year', u'state', u'popul', u'debt'], dtype='object')

In [29]:
frame2.year # retrive columns

1    2000
2    2001
3    2002
4    2001
5    2002
Name: year, dtype: int64

In [30]:
frame2['year'] [0:] #same

1    2000
2    2001
3    2002
4    2001
5    2002
Name: year, dtype: int64

In [31]:
frame2.ix['1'] [:3]#Rows can also be retrived in several ways

year     2000
state    ohio
popul     1.5
Name: 1, dtype: object

In [32]:
frame2['debt']=16.5 #assigning values to NaN

In [33]:
frame2

Unnamed: 0,year,state,popul,debt
1,2000,ohio,1.5,16.5
2,2001,ohio,1.7,16.5
3,2002,ohio,3.6,16.5
4,2001,nevada,2.4,16.5
5,2002,nevada,2.9,16.5


In [34]:
frame2['debt']=np.arange(5.)

In [35]:
frame2

Unnamed: 0,year,state,popul,debt
1,2000,ohio,1.5,0.0
2,2001,ohio,1.7,1.0
3,2002,ohio,3.6,2.0
4,2001,nevada,2.4,3.0
5,2002,nevada,2.9,4.0


In [36]:
val=Series([-1.2, -1.5, -1.7], index=['2', '4', '5']) #assigning specific numbers with Series          

In [37]:
val

2   -1.2
4   -1.5
5   -1.7
dtype: float64

In [38]:
frame2['debt']=val

In [39]:
frame2

Unnamed: 0,year,state,popul,debt
1,2000,ohio,1.5,
2,2001,ohio,1.7,-1.2
3,2002,ohio,3.6,
4,2001,nevada,2.4,-1.5
5,2002,nevada,2.9,-1.7


In [40]:
frame2['eastern']=frame2.state=='ohio' # creating a new column and automaticlly assigning categorical val to it 

In [41]:
frame2

Unnamed: 0,year,state,popul,debt,eastern
1,2000,ohio,1.5,,True
2,2001,ohio,1.7,-1.2,True
3,2002,ohio,3.6,,True
4,2001,nevada,2.4,-1.5,False
5,2002,nevada,2.9,-1.7,False


In [42]:
del frame2['eastern'] #delete column

In [43]:
frame2

Unnamed: 0,year,state,popul,debt
1,2000,ohio,1.5,
2,2001,ohio,1.7,-1.2
3,2002,ohio,3.6,
4,2001,nevada,2.4,-1.5
5,2002,nevada,2.9,-1.7


In [44]:
frame2.columns

Index([u'year', u'state', u'popul', u'debt'], dtype='object')

In [6]:
popul={'nevada':{2001:2.4, 2002:2.9},
       'ohio': {2000:1.5, 2001:1.7, 2002:3.6}}

In [8]:
frame3=DataFrame(popul)

In [9]:
frame3

Unnamed: 0,nevada,ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [48]:
frame3.T #transposing

Unnamed: 0,2000,2001,2002
nevada,,2.4,2.9
ohio,1.5,1.7,3.6


In [49]:
DataFrame(popul, index=[2001, 2002, 2003])

Unnamed: 0,nevada,ohio
2001,2.4,1.7
2002,2.9,3.6
2003,,


In [52]:
frame3

Unnamed: 0,nevada,ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [18]:
frame3.index.name='year'; # giving name o the index
frame3.columns.name='state' # giving single name to the cols

In [54]:
frame3.index.name

'year'

In [19]:
frame3.columns.name

'state'

In [55]:
frame3

state,nevada,ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [56]:
frame3.values

array([[ nan,  1.5],
       [ 2.4,  1.7],
       [ 2.9,  3.6]])

In [57]:
frame2

Unnamed: 0,year,state,popul,debt
1,2000,ohio,1.5,
2,2001,ohio,1.7,-1.2
3,2002,ohio,3.6,
4,2001,nevada,2.4,-1.5
5,2002,nevada,2.9,-1.7


In [58]:
frame2.values

array([[2000L, 'ohio', 1.5, nan],
       [2001L, 'ohio', 1.7, -1.2],
       [2002L, 'ohio', 3.6, nan],
       [2001L, 'nevada', 2.4, -1.5],
       [2002L, 'nevada', 2.9, -1.7]], dtype=object)

Index Objects

In [59]:
obj=Series(range(3), index=['a', 'b', 'c'])

In [60]:
obj

a    0
b    1
c    2
dtype: int64

In [61]:
index=obj.index

In [62]:
index

Index([u'a', u'b', u'c'], dtype='object')

In [63]:
index[1:]

Index([u'b', u'c'], dtype='object')

In [20]:
index=pd.Index(np.arange(3))

In [21]:
obj2=Series([1.5, -2.5,0], index=index)

In [22]:
obj2

0    1.5
1   -2.5
2    0.0
dtype: float64

In [8]:
obj2.index is index

True

In [9]:
obj2

0    1.5
1   -2.5
2    0.0
dtype: float64

In [23]:
frame3

state,nevada,ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [24]:
'ohio' in frame3.columns # checking membership in cols or inx:

True

In [25]:
2003 in frame3.index

False