In [88]:
from pandas import Series, DataFrame
import pandas as pd

### Series

In [2]:
obj = Series([4, 7, -5, 3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [4]:
print(obj.values)
print(obj.index)
print(list(obj.index))

[ 4  7 -5  3]
RangeIndex(start=0, stop=4, step=1)
[0, 1, 2, 3]


In [5]:
obj2 = Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [6]:
print(obj2['a'])
obj2['d'] = 6
print(obj2[['c', 'a', 'd']])

-5
c    3
a   -5
d    6
dtype: int64


In [7]:
obj2[obj2 > 0]

d    6
b    7
c    3
dtype: int64

In [8]:
obj2 * 2.k

d    12
b    14
a   -10
c     6
dtype: int64

In [9]:
'b' in obj2

True

In [46]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj3 = Series(sdata)
obj3

Ohio      35000
Oregon    16000
Texas     71000
Utah       5000
dtype: int64

In [47]:
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = Series(sdata, index=states)
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [12]:
pd.isnull(obj4)   # pd.notnull(obj4)    # obj4.isnull()

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [58]:
obj5 = obj3 + obj4

In [83]:
list(obj5[obj5.notnull()].index)

['Ohio', 'Oregon', 'Texas']

In [None]:
# create a series with odd number from 50 to 100 

In [13]:
ser = Series(range(51, 100, 2))

In [11]:
Series([x for x in range(51, 100) if x % 2 == 1])

0     51
1     53
2     55
3     57
4     59
5     61
6     63
7     65
8     67
9     69
10    71
11    73
12    75
13    77
14    79
15    81
16    83
17    85
18    87
19    89
20    91
21    93
22    95
23    97
24    99
dtype: int64

In [None]:
# get first 5 elements for the series

In [14]:
ser[:5]

0    51
1    53
2    55
3    57
4    59
dtype: int64

In [15]:
ser[:5].values

array([51, 53, 55, 57, 59])

In [None]:
# get numbers between 70 and 80

In [19]:
ser[(ser > 70) & (ser < 80)]

10    71
11    73
12    75
13    77
14    79
dtype: int64

In [20]:
ser[ser > 70][ser < 80]

10    71
11    73
12    75
13    77
14    79
dtype: int64

In [None]:
can you check if [0,46,88,89] are present in series

In [27]:
[(x in ser.values) or (x in ser) for x in [0, 96, 89, 6]]

[True, False, True, True]

In [32]:
ser.values

array([51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, 81, 83,
       85, 87, 89, 91, 93, 95, 97, 99])

In [33]:
ser

0     51
1     53
2     55
3     57
4     59
5     61
6     63
7     65
8     67
9     69
10    71
11    73
12    75
13    77
14    79
15    81
16    83
17    85
18    87
19    89
20    91
21    93
22    95
23    97
24    99
dtype: int64

In [43]:
25 in ser

False

In [44]:
ser < 80

0      True
1      True
2      True
3      True
4      True
5      True
6      True
7      True
8      True
9      True
10     True
11     True
12     True
13     True
14     True
15    False
16    False
17    False
18    False
19    False
20    False
21    False
22    False
23    False
24    False
dtype: bool

### DataFrame

In [14]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], 'year': [2000, 2001, 2002, 2001, 2002],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}
frame = DataFrame(data)
frame

Unnamed: 0,pop,state,year
0,1.5,Ohio,2000
1,1.7,Ohio,2001
2,3.6,Ohio,2002
3,2.4,Nevada,2001
4,2.9,Nevada,2002


In [15]:
DataFrame(data, columns=['year', 'state', 'pop'])

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9


In [17]:
frame2 = DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
                   index=['one', 'two', 'three', 'four', 'five'])
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,


In [18]:
frame2['state']    # frame2.state

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
Name: state, dtype: object

In [19]:
frame2.ix['three']     # To get the row

year     2002
state    Ohio
pop       3.6
debt      NaN
Name: three, dtype: object

In [20]:
frame2['debt'] = 16.5
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,16.5
two,2001,Ohio,1.7,16.5
three,2002,Ohio,3.6,16.5
four,2001,Nevada,2.4,16.5
five,2002,Nevada,2.9,16.5


In [22]:
import numpy as np
frame2['debt'] = np.arange(5.)
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,0.0
two,2001,Ohio,1.7,1.0
three,2002,Ohio,3.6,2.0
four,2001,Nevada,2.4,3.0
five,2002,Nevada,2.9,4.0


In [23]:
val = Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])
frame2['debt'] = val
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,-1.2
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,-1.5
five,2002,Nevada,2.9,-1.7


In [25]:
frame2['eastern'] = frame2.state == 'Ohio'
del frame2['eastern']
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')

In [26]:
pop = {'Nevada': {2001: 2.4, 2002: 2.9},
    'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}
frame3 = DataFrame(pop)
frame3

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [27]:
frame3.T

Unnamed: 0,2000,2001,2002
Nevada,,2.4,2.9
Ohio,1.5,1.7,3.6


In [28]:
frame3.index.name = 'year'; frame3.columns.name = 'state'
frame3

state,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [90]:
df = pd.read_csv('/Users/wuyan/Documents/courses/2016 fall/python/DataAnalysisPython/Chicago_crime.csv', index_col=0).reset_index(drop=True)

In [91]:
df.head()

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
0,3415797,HK472635,07/04/2004 01:30:00 AM,005XX E 105TH ST,1310,CRIMINAL DAMAGE,TO PROPERTY,RESIDENCE,False,False,...,9.0,49.0,14,1182020.0,1835463.0,2004,04/15/2016 08:55:02 AM,41.703732,-87.609089,"(41.703731533, -87.609088836)"
1,5623921,HN433010,06/28/2007 05:15:00 AM,063XX S INGLESIDE AVE,4387,OTHER OFFENSE,VIOLATE ORDER OF PROTECTION,RESIDENCE,False,True,...,20.0,42.0,26,1183816.0,1863135.0,2007,04/15/2016 08:55:02 AM,41.779625,-87.601651,"(41.77962486, -87.601650748)"
2,8117703,HT352098,05/09/2011 11:30:00 AM,064XX S TROY ST,460,BATTERY,SIMPLE,APARTMENT,True,False,...,15.0,66.0,08B,1156497.0,1861673.0,2011,02/04/2016 06:33:39 AM,41.776207,-87.701845,"(41.776207139, -87.701845312)"
3,7626397,HS428765,07/25/2010 08:00:00 AM,026XX S CHRISTIANA AVE,910,MOTOR VEHICLE THEFT,AUTOMOBILE,PARKING LOT/GARAGE(NON.RESID.),False,False,...,22.0,30.0,07,1154454.0,1886367.0,2010,02/04/2016 06:33:39 AM,41.844012,-87.708677,"(41.84401176, -87.708676885)"
4,3897311,HL274719,04/04/2005 10:10:00 PM,054XX N WINTHROP AVE,420,BATTERY,AGGRAVATED:KNIFE/CUTTING INSTR,APARTMENT,True,False,...,48.0,77.0,04B,1167890.0,1936228.0,2005,04/15/2016 08:55:02 AM,41.980555,-87.657928,"(41.980554579, -87.657928425)"


In [92]:
df[df['Case Number'] == 'HL274719']['Location Description'].values[0]

'APARTMENT'

In [94]:
df[df['Case Number'] == 'HL274719']

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
4,3897311,HL274719,04/04/2005 10:10:00 PM,054XX N WINTHROP AVE,420,BATTERY,AGGRAVATED:KNIFE/CUTTING INSTR,APARTMENT,True,False,...,48.0,77.0,04B,1167890.0,1936228.0,2005,04/15/2016 08:55:02 AM,41.980555,-87.657928,"(41.980554579, -87.657928425)"


In [None]:
# ratio of arrested

In [97]:
100*len(df[df['Arrest'] == True])/len(df)

28.28701917411035

In [None]:
for each year, find out how many people were arrested

In [102]:
years = list(range(2001, 2016))

In [104]:
data = []
for year in years:
    data.append(len(df[df['Year'] == year]))

[48647,
 48747,
 47696,
 46846,
 45299,
 44786,
 44033,
 42554,
 39027,
 36843,
 34941,
 33766,
 30542,
 27601,
 26167]

In [105]:
temp = {"number": data, "year": years}
DataFrame(temp, columns=['number', 'year'])

Unnamed: 0,number,year
0,48647,2001
1,48747,2002
2,47696,2003
3,46846,2004
4,45299,2005
5,44786,2006
6,44033,2007
7,42554,2008
8,39027,2009
9,36843,2010


In [108]:
df.groupby(['Year']).size()

Year
2001    48647
2002    48747
2003    47696
2004    46846
2005    45299
2006    44786
2007    44033
2008    42554
2009    39027
2010    36843
2011    34941
2012    33766
2013    30542
2014    27601
2015    26167
2016    21934
dtype: int64

In [113]:
df[df['Arrest']==True]['Year'].value_counts()

2001    14337
2004    14308
2003    14181
2002    14074
2005    13964
2006    13529
2007    13148
2009    10878
2008    10821
2010     9941
2011     9527
2012     9136
2013     8493
2014     7913
2015     6780
2016     4188
Name: Year, dtype: int64

In [114]:
df[df.Arrest==True]['Year'].value_counts()

2001    14337
2004    14308
2003    14181
2002    14074
2005    13964
2006    13529
2007    13148
2009    10878
2008    10821
2010     9941
2011     9527
2012     9136
2013     8493
2014     7913
2015     6780
2016     4188
Name: Year, dtype: int64

In [112]:
df.groupby(['Year', 'Arrest']).size()

Year  Arrest
2001  False     34310
      True      14337
2002  False     34673
      True      14074
2003  False     33515
      True      14181
2004  False     32538
      True      14308
2005  False     31335
      True      13964
2006  False     31257
      True      13529
2007  False     30885
      True      13148
2008  False     31733
      True      10821
2009  False     28149
      True      10878
2010  False     26902
      True       9941
2011  False     25414
      True       9527
2012  False     24630
      True       9136
2013  False     22049
      True       8493
2014  False     19688
      True       7913
2015  False     19387
      True       6780
2016  False     17746
      True       4188
dtype: int64

### Reindexing

In [29]:
obj = Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [30]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [31]:
obj.reindex(['a', 'b', 'c', 'd', 'e'], fill_value=0)

a   -5.3
b    7.2
c    3.6
d    4.5
e    0.0
dtype: float64

In [32]:
obj3 = Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
obj3.reindex(range(6), method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [33]:
frame = DataFrame(np.arange(9).reshape((3, 3)), index=['a', 'c', 'd'], 
                  columns=['Ohio', 'Texas', 'California'])
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [34]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [35]:
states = ['Texas', 'Utah', 'California']
frame.reindex(columns=states)

Unnamed: 0,Texas,Utah,California
a,1,,2
c,4,,5
d,7,,8


In [36]:
frame.reindex(index=['a', 'b', 'c', 'd'], method='ffill', 
              columns=states)

Unnamed: 0,Texas,Utah,California
a,1,,2
b,1,,2
c,4,,5
d,7,,8


In [37]:
frame.ix[['a', 'b', 'c', 'd'], states]

Unnamed: 0,Texas,Utah,California
a,1.0,,2.0
b,,,
c,4.0,,5.0
d,7.0,,8.0


### Dropping entries from an axis

In [38]:
obj = Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
new_obj = obj.drop('c')
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [39]:
obj.drop(['d', 'c'])

a    0.0
b    1.0
e    4.0
dtype: float64

In [40]:
data = DataFrame(np.arange(16).reshape((4, 4)),
    index=['Ohio', 'Colorado', 'Utah', 'New York'],
    columns=['one', 'two', 'three', 'four'])
data.drop(['Colorado', 'Ohio'])

Unnamed: 0,one,two,three,four
Utah,8,9,10,11
New York,12,13,14,15


In [41]:
data.drop('two',axis=1)

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
Utah,8,10,11
New York,12,14,15


### Indexing, selection, and filtering

In [42]:
obj = Series(np.arange(4.),index=['a', 'b', 'c', 'd'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [44]:
print(obj['b'])
print('-'*10)
print(obj[2:4])
print('-'*10)
print(obj[['b', 'a', 'd']])
print('-'*10)
print(obj[obj < 2])

1.0
----------
c    2.0
d    3.0
dtype: float64
----------
b    1.0
a    0.0
d    3.0
dtype: float64
----------
a    0.0
b    1.0
dtype: float64


In [45]:
data = DataFrame(np.arange(16).reshape((4, 4)),
    index=['Ohio', 'Colorado', 'Utah', 'New York'],
    columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [46]:
data[['three', 'one']]

Unnamed: 0,three,one
Ohio,2,0
Colorado,6,4
Utah,10,8
New York,14,12


In [47]:
data[:2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [48]:
data[data['three'] > 5]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [49]:
data.ix['Colorado', ['two', 'three']]

two      5
three    6
Name: Colorado, dtype: int64

In [50]:
data.ix[['Colorado', 'Utah'], [3, 0, 1]]

Unnamed: 0,four,one,two
Colorado,7,4,5
Utah,11,8,9


### Arithmetic and data alignment

In [52]:
df1 = DataFrame(np.arange(9.).reshape((3, 3)), 
                columns=list('bcd'), index=['Ohio', 'Texas', 'Colorado'])
df2 = DataFrame(np.arange(12.).reshape((4, 3)), 
                  columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
df1

Unnamed: 0,b,c,d
Ohio,0.0,1.0,2.0
Texas,3.0,4.0,5.0
Colorado,6.0,7.0,8.0


In [53]:
df2

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [54]:
df1 + df2

Unnamed: 0,b,c,d,e
Colorado,,,,
Ohio,3.0,,6.0,
Oregon,,,,
Texas,9.0,,12.0,
Utah,,,,


In [55]:
df1.add(df2, fill_value=0)

Unnamed: 0,b,c,d,e
Colorado,6.0,7.0,8.0,
Ohio,3.0,1.0,6.0,5.0
Oregon,9.0,,10.0,11.0
Texas,9.0,4.0,12.0,8.0
Utah,0.0,,1.0,2.0


In [56]:
arr = np.arange(12.).reshape((3, 4))
arr

array([[  0.,   1.,   2.,   3.],
       [  4.,   5.,   6.,   7.],
       [  8.,   9.,  10.,  11.]])

In [57]:
arr - arr[0]

array([[ 0.,  0.,  0.,  0.],
       [ 4.,  4.,  4.,  4.],
       [ 8.,  8.,  8.,  8.]])

In [58]:
frame = DataFrame(np.arange(12.).reshape((4, 3)), 
                  columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
series = frame.ix[0]
frame

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [59]:
series

b    0.0
d    1.0
e    2.0
Name: Utah, dtype: float64

In [60]:
frame - series

Unnamed: 0,b,d,e
Utah,0.0,0.0,0.0
Ohio,3.0,3.0,3.0
Texas,6.0,6.0,6.0
Oregon,9.0,9.0,9.0


In [61]:
series2 = Series(range(3), index=['b', 'e', 'f'])
frame + series2

Unnamed: 0,b,d,e,f
Utah,0.0,,3.0,
Ohio,3.0,,6.0,
Texas,6.0,,9.0,
Oregon,9.0,,12.0,


In [62]:
series3 = frame['d']
series3

Utah       1.0
Ohio       4.0
Texas      7.0
Oregon    10.0
Name: d, dtype: float64

In [63]:
frame.sub(series3, axis=0)

Unnamed: 0,b,d,e
Utah,-1.0,0.0,1.0
Ohio,-1.0,0.0,1.0
Texas,-1.0,0.0,1.0
Oregon,-1.0,0.0,1.0


### Sorting and ranking

In [64]:
obj = Series(range(4), index=['d', 'a', 'b', 'c'])
obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int64

In [65]:
frame = DataFrame(np.arange(8).reshape((2, 4)),
                  index=['three', 'one'], columns=['d', 'a', 'b', 'c'])
frame

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [66]:
frame.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [67]:
frame.sort_index(axis=1)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


In [68]:
frame.sort_index(axis=1, ascending=False)

Unnamed: 0,d,c,b,a
three,0,3,2,1
one,4,7,6,5


In [69]:
frame = DataFrame({'b': [4, 7, -3, 2], 'a': [0, 1, 0, 1]})
frame

Unnamed: 0,a,b
0,0,4
1,1,7
2,0,-3
3,1,2


In [71]:
frame.sort_values(by='b')

Unnamed: 0,a,b
2,0,-3
3,1,2
0,0,4
1,1,7


### Axis indexes with duplicate values

In [72]:
obj = Series(range(5), index=['a', 'a', 'b', 'b', 'c'])
obj

a    0
a    1
b    2
b    3
c    4
dtype: int64

In [73]:
obj['a']

a    0
a    1
dtype: int64

### Summarizing and Computing Descriptive Statistics

In [75]:
df = DataFrame([[1.4, np.nan], [7.1, -4.5], [np.nan, np.nan], [0.75, -1.3]],
index=['a', 'b', 'c', 'd'], columns=['one', 'two'])
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [76]:
df.sum()

one    9.25
two   -5.80
dtype: float64

In [77]:
df.sum(axis=1)

a    1.40
b    2.60
c     NaN
d   -0.55
dtype: float64

!['Common dictionary literals and operations'](img/p1.png)

### Unique Values, Value Counts, and Membership

In [79]:
obj = Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])
obj.unique()

array(['c', 'a', 'd', 'b'], dtype=object)

In [80]:
obj.value_counts()

a    3
c    3
b    2
d    1
dtype: int64

### Handling Missing Data

In [81]:
from numpy import nan as NA
data = Series([1, NA, 3.5, NA, 7])
data.dropna()

0    1.0
2    3.5
4    7.0
dtype: float64

In [82]:
data[data.notnull()]

0    1.0
2    3.5
4    7.0
dtype: float64

In [83]:
data = DataFrame([[1., 6.5, 3.], [1., NA, NA], [NA, NA, NA], [NA, 6.5, 3.]])
data

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,,
2,,,
3,,6.5,3.0


In [85]:
data.dropna()

Unnamed: 0,0,1,2
0,1.0,6.5,3.0


In [86]:
data[4] = NA
data.dropna(axis=1, how='all')

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,,
2,,,
3,,6.5,3.0


In [88]:
df.fillna(0)

Unnamed: 0,one,two
a,1.4,0.0
b,7.1,-4.5
c,0.0,0.0
d,0.75,-1.3


In [90]:
df.fillna({'one': 0.5, 'two': -1})

Unnamed: 0,one,two
a,1.4,-1.0
b,7.1,-4.5
c,0.5,-1.0
d,0.75,-1.3


In [91]:
df.fillna(0, inplace=True)
df.fillna(method='ffill', limit=2)

In [92]:
data = Series([1., NA, 3.5, NA, 7])
data.fillna(data.mean())

0    1.000000
1    3.833333
2    3.500000
3    3.833333
4    7.000000
dtype: float64

## Hierarchical Indexing

In [93]:
data = Series(np.random.randn(10),
    index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'], 
           [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
data

a  1    0.897879
   2    2.932146
   3   -1.264780
b  1    0.974741
   2   -1.487217
   3    0.690542
c  1   -0.984598
   2    0.302868
d  2    0.943715
   3   -0.098596
dtype: float64

In [94]:
data['b']

1    0.974741
2   -1.487217
3    0.690542
dtype: float64

In [95]:
data['b':'c']         #  data.ix[['b', 'c']]

b  1    0.974741
   2   -1.487217
   3    0.690542
c  1   -0.984598
   2    0.302868
dtype: float64

In [96]:
data[:, 2]

a    2.932146
b   -1.487217
c    0.302868
d    0.943715
dtype: float64

In [97]:
frame = DataFrame(np.arange(12).reshape((4, 3)), 
                  index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                  columns=[['Ohio', 'Ohio', 'Colorado'], 
                           ['Green', 'Red', 'Green']])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [98]:
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [99]:
frame['Ohio']

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [86]:
df = pd.read_csv('/Users/wuyan/Documents/courses/2016\ fall/python/DataAnalysisPython/Chicago_crime.csv', index_col=0).reset_index(drop=True)

FileNotFoundError: File b'/Users/wuyan/Documents/courses/2016\\ fall/python/DataAnalysisPython/Chicago_crime.csv' does not exist

In [None]:
df.head()