In [1]:
import pandas

In [2]:
pandas.Series(['a','b','c','d'])

0    a
1    b
2    c
3    d
dtype: object

In [3]:
pandas.Series(['a','b','c','d'], index=["apple", "banana", "cat", "dog"])

apple     a
banana    b
cat       c
dog       d
dtype: object

In [5]:
d = {'apple':'a', 'banana':'b', 'cat': 'c', 'dog': 'd'}
pandas.Series(d.values(), index=d.keys())

dog       d
cat       c
apple     a
banana    b
dtype: object

In [6]:
list_created = pandas.Series(['a','b','c','d'], index=["apple", "banana", "cat", "dog"])

In [7]:
dictionary_created = pandas.Series(d.values(), index=d.keys())

In [8]:
dictionary_created[0]

'd'

In [9]:
list_created[0]

'a'

In [10]:
pandas.Series(range(100))

0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
12    12
13    13
14    14
15    15
16    16
17    17
18    18
19    19
20    20
21    21
22    22
23    23
24    24
25    25
26    26
27    27
28    28
29    29
      ..
70    70
71    71
72    72
73    73
74    74
75    75
76    76
77    77
78    78
79    79
80    80
81    81
82    82
83    83
84    84
85    85
86    86
87    87
88    88
89    89
90    90
91    91
92    92
93    93
94    94
95    95
96    96
97    97
98    98
99    99
dtype: int64

In [12]:
colouring = pandas.Series(
    data = ['red', 'yellow', 'black', 'blue', 'purple'],
    index = ['apple', 'banana', 'cat', 'dog', 'elephant'])
colouring

apple          red
banana      yellow
cat          black
dog           blue
elephant    purple
dtype: object

In [13]:
living_things = pandas.DataFrame(
    {
        'colour': colouring,
        'first_letter': dictionary_created
    }
)
living_things

Unnamed: 0,colour,first_letter
apple,red,a
banana,yellow,b
cat,black,c
dog,blue,d
elephant,purple,


In [14]:
living_things.first_letter.isnull()

apple       False
banana      False
cat         False
dog         False
elephant     True
Name: first_letter, dtype: bool

In [16]:
living_things[living_things.first_letter.isnull()].first_letter[0]

nan

In [17]:
elephant_first_letter = living_things[living_things.first_letter.isnull()].first_letter[0]

In [18]:
elephant_first_letter == elephant_first_letter

False

In [21]:
import numpy

In [22]:
numpy.nan

nan

This is how you check for NaN

In [24]:
numpy.isnan(elephant_first_letter)

True

In [26]:
living_things.colour.map(lambda x: x.upper())

apple          RED
banana      YELLOW
cat          BLACK
dog           BLUE
elephant    PURPLE
Name: colour, dtype: object

In [28]:
def safe_uppercase(x):
    if type(x) != type(''):
        return x
    return x.upper()
living_things.first_letter.map(safe_uppercase)

apple         A
banana        B
cat           C
dog           D
elephant    NaN
Name: first_letter, dtype: object

In [29]:
living_things.applymap(safe_uppercase)

Unnamed: 0,colour,first_letter
apple,RED,A
banana,YELLOW,B
cat,BLACK,C
dog,BLUE,D
elephant,PURPLE,


# Sydney Trains
Now let's work with a more complex dataset

In [30]:
trains = pandas.read_csv('sydtrains.csv', index_col=1, thousands=',')
trains

Unnamed: 0_level_0,LINE,YEAR,SURVEY_DATE_USED,WHETHER_SURVEYED,IN_0200_0600,OUT_0200_0600,IN_0600_0930,OUT_0600_0930,IN_0930_1500,OUT_0930_1500,IN_1500_1830,OUT_1500_1830,IN_1830_0200,OUT_1830_0200,IN_24_HOURS,OUT_24_HOURS,RANK,STATION_SORT_ID
STATION,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Central,CBD,2014,6/05/2014,Yes,940,920,10390,42070,21400,30150,45370,16270,19010,7700,97110,97110,1,101
Town Hall,CBD,2014,7/05/2014,Yes,880,880,6380,43210,16930,23550,43670,15270,21760,6720,89620,89620,2,102
Wynyard,CBD,2014,20/05/2014,Yes,590,570,4710,39620,9200,11180,35010,6510,10690,2330,60200,60200,3,103
Parramatta,Western,2014,19/11/2014,Yes,660,330,7790,13700,7400,7020,14960,8660,4150,5260,34960,34960,4,905
North Sydney,North Shore,2014,19/08/2014,Yes,270,270,2340,18340,4450,5590,17760,3050,3790,1360,28610,28610,5,1402
Redfern,CBD,2014,1/05/2014,Yes,250,250,3910,10690,5350,8170,12510,4250,3660,2320,25680,25680,6,108
Chatswood,North Shore,2014,19/08/2014,Yes,210,200,5030,8200,4710,4930,9430,6000,2820,2860,22200,22200,7,1407
Bondi Junction,Eastern Suburbs,2014,2/09/2014,Yes,210,210,10150,4350,4370,4600,5240,8330,1910,4380,21880,21880,8,203
Strathfield,Inner West,2014,9/09/2014,Yes,190,190,7900,4050,4980,3950,5540,7230,2100,5280,20710,20710,9,1110
Circular Quay,CBD,2014,22/05/2014,Yes,200,200,2110,8750,4340,6710,9910,3380,4070,1600,20630,20630,10,104


In [31]:
trains.columns

Index([u'LINE', u'YEAR', u'SURVEY_DATE_USED', u'WHETHER_SURVEYED',
       u'IN_0200_0600', u'OUT_0200_0600', u'IN_0600_0930', u'OUT_0600_0930',
       u'IN_0930_1500', u'OUT_0930_1500', u'IN_1500_1830', u'OUT_1500_1830',
       u'IN_1830_0200', u'OUT_1830_0200', u'IN_24_HOURS', u'OUT_24_HOURS',
       u'RANK', u'STATION_SORT_ID'],
      dtype='object')

In [33]:
numeric_columns = [x for x in trains.columns if x.startswith('IN') or x.startswith('OUT')]
numeric_columns

['IN_0200_0600',
 'OUT_0200_0600',
 'IN_0600_0930',
 'OUT_0600_0930',
 'IN_0930_1500',
 'OUT_0930_1500',
 'IN_1500_1830',
 'OUT_1500_1830',
 'IN_1830_0200',
 'OUT_1830_0200',
 'IN_24_HOURS',
 'OUT_24_HOURS']

In [36]:
trains[numeric_columns].applymap(lambda x: x/1000.0)

Unnamed: 0_level_0,IN_0200_0600,OUT_0200_0600,IN_0600_0930,OUT_0600_0930,IN_0930_1500,OUT_0930_1500,IN_1500_1830,OUT_1500_1830,IN_1830_0200,OUT_1830_0200,IN_24_HOURS,OUT_24_HOURS
STATION,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Central,0.94,0.92,10.39,42.07,21.40,30.15,45.37,16.27,19.01,7.70,97.11,97.11
Town Hall,0.88,0.88,6.38,43.21,16.93,23.55,43.67,15.27,21.76,6.72,89.62,89.62
Wynyard,0.59,0.57,4.71,39.62,9.20,11.18,35.01,6.51,10.69,2.33,60.20,60.20
Parramatta,0.66,0.33,7.79,13.70,7.40,7.02,14.96,8.66,4.15,5.26,34.96,34.96
North Sydney,0.27,0.27,2.34,18.34,4.45,5.59,17.76,3.05,3.79,1.36,28.61,28.61
Redfern,0.25,0.25,3.91,10.69,5.35,8.17,12.51,4.25,3.66,2.32,25.68,25.68
Chatswood,0.21,0.20,5.03,8.20,4.71,4.93,9.43,6.00,2.82,2.86,22.20,22.20
Bondi Junction,0.21,0.21,10.15,4.35,4.37,4.60,5.24,8.33,1.91,4.38,21.88,21.88
Strathfield,0.19,0.19,7.90,4.05,4.98,3.95,5.54,7.23,2.10,5.28,20.71,20.71
Circular Quay,0.20,0.20,2.11,8.75,4.34,6.71,9.91,3.38,4.07,1.60,20.63,20.63


In [38]:
def threshold_count(xs):
    above_threshold = 0
    for x in xs:
        if x > 100:
            above_threshold += 1
    return above_threshold

trains[numeric_columns].apply(threshold_count)

IN_0200_0600      382
OUT_0200_0600     191
IN_0600_0930     2463
OUT_0600_0930    1855
IN_0930_1500     2071
OUT_0930_1500    2023
IN_1500_1830     1936
OUT_1500_1830    2429
IN_1830_0200      910
OUT_1830_0200    1876
IN_24_HOURS      2702
OUT_24_HOURS     2702
dtype: int64

In [39]:
trains[numeric_columns].apply(threshold_count, axis=1)

STATION
Central                   12
Town Hall                 12
Wynyard                   12
Parramatta                12
North Sydney              12
Redfern                   12
Chatswood                 12
Bondi Junction            12
Strathfield               12
Circular Quay             12
Martin Place              12
Hurstville                12
St Leonards               12
Blacktown                 12
Burwood                   12
Kings Cross               12
Hornsby                   11
Ashfield                  10
Kogarah                   10
Lidcombe                  10
Auburn                    11
Museum                    10
Epping                    11
Macquarie University      10
St James                  10
Rockdale                  10
Cabramatta                11
Bankstown                 10
Liverpool                 11
Sutherland                11
                          ..
Newcastle                  9
Waratah                    6
Warabrook (University)     6
Sandga

# Pivot Tables

In [41]:
trains[trains.index == "Central"]

Unnamed: 0_level_0,LINE,YEAR,SURVEY_DATE_USED,WHETHER_SURVEYED,IN_0200_0600,OUT_0200_0600,IN_0600_0930,OUT_0600_0930,IN_0930_1500,OUT_0930_1500,IN_1500_1830,OUT_1500_1830,IN_1830_0200,OUT_1830_0200,IN_24_HOURS,OUT_24_HOURS,RANK,STATION_SORT_ID
STATION,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Central,CBD,2014,6/05/2014,Yes,940,920,10390,42070,21400,30150,45370,16270,19010,7700,97110,97110,1,101
Central,CBD,2013,1/05/2013,Yes,910,910,10630,40280,19420,30940,44660,15130,18680,7050,94300,94300,1,101
Central,CBD,2012,2/05/2012,Yes,890,880,10270,38450,20080,30160,41990,14550,17820,7010,91050,91050,1,101
Central,CBD,2011,26/05/2011,Yes,840,830,8800,35390,19760,29110,40040,14540,17340,6910,86780,86780,1,101
Central,CBD,2010,11/05/2010,Yes,810,820,8750,35220,19140,28200,39240,14040,16220,5890,84160,84160,1,101
Central,CBD,2009,19/05/2009,Yes,830,820,8260,37720,19120,27680,41000,13480,16050,5570,85260,85260,1,101
Central,CBD,2008,20/05/2008,Yes,820,810,7460,38860,19140,26140,41280,12890,15460,5460,84160,84160,1,101
Central,CBD,2007,22/05/2007,Yes,780,770,7940,36610,18540,24930,38600,12650,14190,5090,80050,80050,1,101
Central,CBD,2006,16/05/2006,Yes,740,720,7510,34550,17460,24440,37650,11660,13300,5300,76660,76660,1,101
Central,CBD,2005,31/05/2005,Yes,700,690,7540,33030,16450,23550,35710,11260,12620,4480,73020,73020,1,101


Can't groupby twice:

In [43]:
trains.groupby('YEAR').groupby('LINE').IN_24_HOURS.sum()

AttributeError: Cannot access callable attribute 'groupby' of 'DataFrameGroupBy' objects, try using the 'apply' method

In [46]:
pandas.pivot_table(trains, index='YEAR', values=['IN_24_HOURS', 'OUT_24_HOURS'])

Unnamed: 0_level_0,IN_24_HOURS,OUT_24_HOURS
YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1
2004,2958.366013,2958.366013
2005,2928.092105,2928.092105
2006,2974.967105,2974.967105
2007,3110.855263,3110.855263
2008,3256.25,3256.25
2009,3253.485342,3253.485342
2010,3265.635179,3265.635179
2011,3390.749186,3390.749186
2012,3411.038961,3411.038961
2013,3461.623377,3461.623377


In [47]:
pandas.pivot_table(trains, index='YEAR', values=numeric_columns)

Unnamed: 0_level_0,IN_0200_0600,IN_0600_0930,IN_0930_1500,IN_1500_1830,IN_1830_0200,IN_24_HOURS,OUT_0200_0600,OUT_0600_0930,OUT_0930_1500,OUT_1500_1830,OUT_1830_0200,OUT_24_HOURS
YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2004,40.522876,987.48366,669.411765,1000.784314,260.130719,2958.366013,25.588235,912.581699,685.784314,973.398693,362.222222,2958.366013
2005,43.223684,974.177632,652.138158,998.519737,259.967105,2928.092105,25.559211,917.105263,671.282895,951.447368,362.631579,2928.092105
2006,43.717105,974.901316,658.717105,1019.934211,277.631579,2974.967105,26.348684,944.276316,673.289474,953.388158,377.335526,2974.967105
2007,45.361842,1021.940789,680.559211,1066.381579,296.546053,3110.855263,28.223684,987.927632,694.243421,993.552632,406.809211,3110.855263
2008,48.618421,1071.085526,718.980263,1100.098684,317.401316,3256.25,30.0,1033.190789,723.421053,1039.835526,429.835526,3256.25
2009,48.241042,1061.270358,734.267101,1079.120521,330.390879,3253.485342,29.967427,1003.127036,732.117264,1051.368078,436.872964,3253.485342
2010,48.436482,1062.28013,727.13355,1093.159609,334.462541,3265.635179,29.934853,1005.504886,728.469055,1051.726384,449.967427,3265.635179
2011,51.302932,1105.179153,747.52443,1129.087948,357.491857,3390.749186,30.944625,1048.078176,751.726384,1087.52443,472.86645,3390.749186
2012,50.844156,1116.493506,737.75974,1145.194805,360.584416,3411.038961,31.331169,1059.188312,757.24026,1082.75974,480.38961,3411.038961
2013,50.25974,1125.162338,734.512987,1165.487013,386.006494,3461.623377,31.915584,1076.785714,774.772727,1082.824675,495.811688,3461.623377


In [48]:
pandas.pivot_table(trains, index='YEAR', values=numeric_columns, aggfunc=sum)

Unnamed: 0_level_0,IN_0200_0600,IN_0600_0930,IN_0930_1500,IN_1500_1830,IN_1830_0200,IN_24_HOURS,OUT_0200_0600,OUT_0600_0930,OUT_0930_1500,OUT_1500_1830,OUT_1830_0200,OUT_24_HOURS
YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2004,12400,302170,204840,306240,79600,905260,7830,279250,209850,297860,110840,905260
2005,13140,296150,198250,303550,79030,890140,7770,278800,204070,289240,110240,890140
2006,13290,296370,200250,310060,84400,904390,8010,287060,204680,289830,114710,904390
2007,13790,310670,206890,324180,90150,945700,8580,300330,211050,302040,123670,945700
2008,14780,325610,218570,334430,96490,989900,9120,314090,219920,316110,130670,989900
2009,14810,325810,225420,331290,101430,998820,9200,307960,224760,322770,134120,998820
2010,14870,326120,223230,335600,102680,1002550,9190,308690,223640,322880,138140,1002550
2011,15750,339290,229490,346630,109750,1040960,9500,321760,230780,333870,145170,1040960
2012,15660,343880,227230,352720,111060,1050600,9650,326230,233230,333490,147960,1050600
2013,15480,346550,226230,358970,118890,1066180,9830,331650,238630,333510,152710,1066180


In [49]:
pandas.pivot_table(trains, index='YEAR', values=numeric_columns, aggfunc=threshold_count)

Unnamed: 0_level_0,IN_0200_0600,IN_0600_0930,IN_0930_1500,IN_1500_1830,IN_1830_0200,IN_24_HOURS,OUT_0200_0600,OUT_0600_0930,OUT_0930_1500,OUT_1500_1830,OUT_1830_0200,OUT_24_HOURS
YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2004,30,224,192,179,77,249,15,168,186,221,168,249
2005,32,223,189,177,74,247,15,167,184,220,167,247
2006,31,223,189,176,73,245,14,167,185,222,168,245
2007,35,224,187,174,78,245,17,165,183,222,169,245
2008,38,221,188,176,82,242,20,167,180,219,166,242
2009,35,222,184,178,90,245,20,171,180,219,169,245
2010,34,223,185,177,90,244,17,170,183,221,167,244
2011,35,226,188,178,89,245,18,168,184,223,173,245
2012,36,224,189,177,88,246,18,170,186,223,174,246
2013,39,226,189,171,87,246,19,167,185,219,178,246


In [50]:
pandas.pivot_table(trains, 
                   index=['LINE','YEAR'], 
                   values=numeric_columns, 
                   aggfunc=sum)

Unnamed: 0_level_0,Unnamed: 1_level_0,IN_0200_0600,IN_0600_0930,IN_0930_1500,IN_1500_1830,IN_1830_0200,IN_24_HOURS,OUT_0200_0600,OUT_0600_0930,OUT_0930_1500,OUT_1500_1830,OUT_1830_0200,OUT_24_HOURS
LINE,YEAR,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Airport,2004,0,1330,1470,3160,720,6680,0,3360,1550,1480,310,6680
Airport,2005,0,1400,1550,3230,750,6930,0,3450,1620,1530,340,6930
Airport,2006,10,1480,1960,2990,770,7210,120,3440,1940,1330,390,7210
Airport,2007,20,1860,2290,3970,1100,9240,300,4400,2180,1820,530,9240
Airport,2008,40,1920,2540,4700,1260,10460,330,4940,2800,1880,540,10460
Airport,2009,30,2060,2620,3890,1280,9880,320,4220,2590,2190,580,9880
Airport,2010,30,2390,2670,4290,1460,10840,370,4590,2860,2380,630,10840
Airport,2011,40,2730,3310,6600,1990,14670,350,6270,3570,3310,1160,14670
Airport,2012,50,3410,4090,7260,2480,17290,550,7590,4010,3610,1510,17290
Airport,2013,50,4080,4940,7880,2760,19710,540,8210,4740,4340,1880,19710


In [52]:
pandas.pivot_table(trains, 
                   index=['LINE','YEAR'], 
                   values=["IN_24_HOURS", "OUT_24_HOURS"], 
                   aggfunc=[sum, len, threshold_count])

Unnamed: 0_level_0,Unnamed: 1_level_0,sum,sum,len,len,threshold_count,threshold_count
Unnamed: 0_level_1,Unnamed: 1_level_1,IN_24_HOURS,OUT_24_HOURS,IN_24_HOURS,OUT_24_HOURS,IN_24_HOURS,OUT_24_HOURS
LINE,YEAR,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Airport,2004,6680,6680,4,4,4,4
Airport,2005,6930,6930,4,4,4,4
Airport,2006,7210,7210,4,4,4,4
Airport,2007,9240,9240,4,4,4,4
Airport,2008,10460,10460,4,4,4,4
Airport,2009,9880,9880,4,4,4,4
Airport,2010,10840,10840,4,4,4,4
Airport,2011,14670,14670,4,4,4,4
Airport,2012,17290,17290,4,4,4,4
Airport,2013,19710,19710,4,4,4,4


In [54]:
pandas.pivot_table(trains, 
                   index=['LINE'], 
                   values=["IN_24_HOURS"],
                   columns=["YEAR"],
                   aggfunc=sum)

Unnamed: 0_level_0,IN_24_HOURS,IN_24_HOURS,IN_24_HOURS,IN_24_HOURS,IN_24_HOURS,IN_24_HOURS,IN_24_HOURS,IN_24_HOURS,IN_24_HOURS,IN_24_HOURS,IN_24_HOURS
YEAR,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014
LINE,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Airport,6680,6930,7210,9240,10460,9880,10840,14670,17290,19710,21880
Bankstown,48550,45030,46260,47620,52910,53880,52960,52830,53230,53340,54050
Blue Mountains,8660,8750,8710,8360,8110,8130,7940,7880,8310,7870,7980
CBD,252840,254910,268030,280840,291910,288920,292950,303640,310660,321830,334580
Carlingford,1450,1410,1410,1370,1380,1420,1470,1400,1270,1170,1340
Central Coast,16190,16250,15910,15890,15060,15120,14830,15320,15100,14800,14480
East Hills,48290,47290,47670,49160,51800,51650,48980,53080,52040,49910,51380
Eastern Suburbs,35120,34290,35580,36320,38020,36770,36560,39950,40230,41250,41940
Hunter,3530,3750,3960,3800,3810,3540,3390,3750,3640,3800,3940
Illawarra,81090,80320,78880,82270,85300,90120,87980,91420,89880,90660,99550


In [55]:
ugly_df = pandas.pivot_table(trains, 
                   index=['LINE','YEAR'], 
                   values=["IN_24_HOURS", "OUT_24_HOURS"], 
                   aggfunc=[sum, len, threshold_count])

In [57]:
ugly_df[('sum', 'IN_24_HOURS')]

LINE                YEAR
Airport             2004      6680
                    2005      6930
                    2006      7210
                    2007      9240
                    2008     10460
                    2009      9880
                    2010     10840
                    2011     14670
                    2012     17290
                    2013     19710
                    2014     21880
Bankstown           2004     48550
                    2005     45030
                    2006     46260
                    2007     47620
                    2008     52910
                    2009     53880
                    2010     52960
                    2011     52830
                    2012     53230
                    2013     53340
                    2014     54050
Blue Mountains      2004      8660
                    2005      8750
                    2006      8710
                    2007      8360
                    2008      8110
                    2009      

In [58]:
ugly_df.loc[('Bankstown', 2014), ('sum', "IN_24_HOURS")]

54050