In [1]:
##Label,Integer and Mixed Indexing
#Label-oriented indexing

import pandas as pd
snow_data = {'Months' :['January','February','March', 'April', 'May', 'June'],
'Avg SnowDays' : [5.0,4.2,0.7,1.2,3.4,2.7],
'Avg Precip(cm)' : [17.8,22.4,9.1,1.5,0.8,12.2],
'Avg Low Temp(F)' : [27,29,35,45,42,32] }
snow_data


{'Avg Low Temp(F)': [27, 29, 35, 45, 42, 32],
 'Avg Precip(cm)': [17.8, 22.4, 9.1, 1.5, 0.8, 12.2],
 'Avg SnowDays': [5.0, 4.2, 0.7, 1.2, 3.4, 2.7],
 'Months': ['January', 'February', 'March', 'April', 'May', 'June']}

In [5]:
snowdata_df = pd.DataFrame(snow_data,index=snow_data['Months'], columns=['Avg SnowDays','Avg Precip(cm)', 'Avg Low Temp(F)'])
snowdata_df

Unnamed: 0,Avg SnowDays,Avg Precip(cm),Avg Low Temp(F)
January,5.0,17.8,27
February,4.2,22.4,29
March,0.7,9.1,35
April,1.2,1.5,45
May,3.4,0.8,42
June,2.7,12.2,32


In [6]:
##Access using single label

snowdata_df.loc['January']

Avg SnowDays        5.0
Avg Precip(cm)     17.8
Avg Low Temp(F)    27.0
Name: January, dtype: float64

In [7]:
##Access using list of labels

snowdata_df.loc[['January','April']]

Unnamed: 0,Avg SnowDays,Avg Precip(cm),Avg Low Temp(F)
January,5.0,17.8,27
April,1.2,1.5,45


In [8]:
##using loc
snowdata_df.loc['Avg SnowDays']

##Throw error

KeyError: 'the label [Avg SnowDays] is not in the [index]'

In [12]:
snowdata_df.loc[:,'Avg SnowDays']

January     5.0
February    4.2
March       0.7
April       1.2
May         3.4
June        2.7
Name: Avg SnowDays, dtype: float64

In [13]:
##select specific value

snowdata_df.loc['March','Avg SnowDays']

0.69999999999999996

In [14]:
##Alternate way

snowdata_df.loc['March']['Avg SnowDays']

0.69999999999999996

In [15]:
##The above is equivalent with [] operator

snowdata_df['Avg SnowDays']['March'] 

0.69999999999999996

In [16]:
snowdata_df['March']['Avg SnowDays']

KeyError: 'March'

In [17]:
snowdata_df['March']

KeyError: 'March'

In [18]:
snowdata_df.loc['March']

Avg SnowDays        0.7
Avg Precip(cm)      9.1
Avg Low Temp(F)    35.0
Name: March, dtype: float64

In [19]:
##Selection using Boolean Array
##select months which have less than one snow day on average
snowdata_df.loc[snowdata_df['Avg SnowDays']<1,:]

Unnamed: 0,Avg SnowDays,Avg Precip(cm),Avg Low Temp(F)
March,0.7,9.1,35


In [9]:
crudeoil_data={'France' : {'2015-Q1':100.1, '2015-Q2':104.0, '2015-Q3':112.5, '2015-Q4':108.4},
                'Germany':{'2015-Q1':115.3,'2015-Q2':103.2, '2015-Q3':106.1,'2015-Q4':120.4},
                'Belgium':{'2015-Q1':92.5, '2015-Q2':98.4, '2015-Q3':94.5,'2015-Q4':91.2}}


In [10]:
crudeoil_df = pd.DataFrame.from_dict(crudeoil_data)
crudeoil_df

Unnamed: 0,Belgium,France,Germany
2015-Q1,92.5,100.1,115.3
2015-Q2,98.4,104.0,103.2
2015-Q3,94.5,112.5,106.1
2015-Q4,91.2,108.4,120.4


In [27]:
crudeoil_df.loc[:,crudeoil_df.loc['2015-Q1']>110]

Unnamed: 0,Germany
2015-Q1,115.3
2015-Q2,103.2
2015-Q3,106.1
2015-Q4,120.4


In [28]:
##evaluating using boolean arrays
crudeoil_df.loc['2015-Q1']>110

Belgium    False
France     False
Germany     True
Name: 2015-Q1, dtype: bool

In [11]:
##Integer-oriented indexing

sample_data = {'x' :[1,2,3,4,5],
'y' : [11,12,13,14,15],
'z' : [21,22,23,24,25] }

x1 = pd.DataFrame(sample_data, index=list(range(0,10,2)))
x1

Unnamed: 0,x,y,z
0,1,11,21
2,2,12,22
4,3,13,23
6,4,14,24
8,5,15,25


In [48]:
x1.iloc[:2]

Unnamed: 0,x,y,z
0,1,11,21
2,2,12,22


In [49]:
x1.iloc[2,0:2]

x     3
y    13
Name: 4, dtype: int64

In [12]:
##label-based indexing

x1.iloc[4]

x     5
y    15
z    25
Name: 8, dtype: int64

In [51]:
x1.iloc[1]

x     2
y    12
z    22
Name: 2, dtype: int64

In [52]:
##slice out specific row

x1.iloc[2:3,:]

Unnamed: 0,x,y,z
4,3,13,23


In [53]:
##slice past the end of the array, we obtain IndexError

x1.iloc[6,:]

IndexError: single positional indexer is out-of-bounds

In [54]:
##.iat and .at operators

x1.iloc[3,0]

4

In [55]:
x1.iat[3,0]

4

In [56]:
%timeit x1.iloc[3,0]

The slowest run took 11.33 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 11.1 µs per loop


In [58]:
%timeit x1.iat[3,0]

The slowest run took 7.86 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 6.84 µs per loop


In [None]:
##.iat is much faster than the .iloc/.ix operators. The same applies to .at versus .loc.