**2-D Arrays** with NumPy

In [4]:
import numpy as np

# Subway ridership for 5 stations on 10 different days
ridership = np.array([
    [   0,    0,    2,    5,    0],
    [1478, 3877, 3674, 2328, 2539],
    [1613, 4088, 3991, 6461, 2691],
    [1560, 3392, 3826, 4787, 2613],
    [1608, 4802, 3932, 4477, 2705],
    [1576, 3933, 3909, 4979, 2685],
    [  95,  229,  255,  496,  201],
    [   2,    0,    1,   27,    0],
    [1438, 3785, 3589, 4174, 2215],
    [1342, 4043, 4009, 4665, 3033]
])

# Change False to True for each block of code to see what it does

# Accessing elements
if False:
    print ridership[1, 3]
    print ridership[1:3, 3:5]
    print ridership[1, :]
    
# Vectorized operations on rows or columns
if False:
    print ridership[0, :] + ridership[1, :]
    print ridership[:, 0] + ridership[:, 1]
    
# Vectorized operations on entire arrays
if True:
    a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    b = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
    print a + b

[[ 2  3  4]
 [ 6  7  8]
 [10 11 12]]


In [15]:
def mean_riders_for_max_station(ridership):
    '''
    Fill in this function to find the station with the maximum riders on the
    first day, then return the mean riders per day for that station. Also return the mean ridership overall for comparsion.
    
    Hint: NumPy's argmax() function might be useful:
    http://docs.scipy.org/doc/numpy/reference/generated/numpy.argmax.html
    '''
    overall_mean = ridership.mean()
    
    max_station=ridership[0,:].argmax() # find the station with max passengers on first day
    
    mean_for_max = ridership[:,max_station].mean() # find the mean for the max_station above
    
    return (overall_mean, mean_for_max)

In [16]:
print mean_riders_for_max_station(ridership)

(2342.5999999999999, 3239.9000000000001)


In [20]:
def min_and_max_riders_per_day(ridership):
    '''
    Fill in this function. First, for each subway station, calculate the mean ridership per day. 
    Then, out of all the subway stations, return the maximum and minimum of these values. That is, find the maximum
    mean-ridership-per-day and the minimum mean-ridership-per-day for any subway station.
    '''
    # mean ridership per station
    mean_per_station = ridership.mean(axis=0) # axis=0 will perform the airthmetic operation on column
    
    max_daily_ridership = mean_per_station.max()
    min_daily_ridership = mean_per_station.min()
    
    return (max_daily_ridership, min_daily_ridership)

In [21]:
print min_and_max_riders_per_day(ridership)

(3239.9000000000001, 1071.2)


## ** DataFrames in Pandas**

In [1]:
import pandas as pd

# Subway ridership for 5 stations on 10 different days
ridership_df = pd.DataFrame(
    data=[[   0,    0,    2,    5,    0],
          [1478, 3877, 3674, 2328, 2539],
          [1613, 4088, 3991, 6461, 2691],
          [1560, 3392, 3826, 4787, 2613],
          [1608, 4802, 3932, 4477, 2705],
          [1576, 3933, 3909, 4979, 2685],
          [  95,  229,  255,  496,  201],
          [   2,    0,    1,   27,    0],
          [1438, 3785, 3589, 4174, 2215],
          [1342, 4043, 4009, 4665, 3033]],
    index=['05-01-11', '05-02-11', '05-03-11', '05-04-11', '05-05-11',
           '05-06-11', '05-07-11', '05-08-11', '05-09-11', '05-10-11'],
    columns=['R003', 'R004', 'R005', 'R006', 'R007']
)

In [3]:
# Accessing elements

#print ridership_df.iloc[0]
print ridership_df.loc['05-05-11']
#print ridership_df['R003']
#print ridership_df.iloc[1, 3]

R003    1608
R004    4802
R005    3932
R006    4477
R007    2705
Name: 05-05-11, dtype: int64


In [27]:
# Pandas axis

print ridership_df.sum()
#print df.sum(axis=1)
#print df.values.sum()

R003    10712
R004    28149
R005    27188
R006    32399
R007    18682
dtype: int64


In [42]:
m= ridership_df.mean()
mean_ = m.mean()
print mean_

2342.6


In [33]:
max_station = ridership_df.iloc[0].argmax()

In [38]:
ridership_df[max_station].mean()

3239.9

In [43]:
def mean_riders_for_max_station(ridership):
    '''
    Fill in this function to find the station with the maximum riders on the
    first day, then return the mean riders per day for that station. Also
    return the mean ridership overall for comparsion.
    
    This is the same as a previous exercise, but this time the
    input is a Pandas DataFrame rather than a 2D NumPy array.
    '''
    overall_mean = ridership.values.mean()
    
    max_station = ridership.iloc[0].argmax()
    mean_for_max = ridership[max_station].mean()
    
    return (overall_mean, mean_for_max)

In [47]:
mean_riders_for_max_station(ridership_df)

(2342.6000000000004, 3239.9)

In [46]:
ridership_df.values.mean()

2342.5999999999999

# Loading data thru Pandas

In [50]:
subway_df = pd.read_csv('datasets\\nyc_subway_weather.csv')

In [51]:
subway_df.head()

Unnamed: 0,UNIT,DATEn,TIMEn,ENTRIESn,EXITSn,ENTRIESn_hourly,EXITSn_hourly,datetime,hour,day_week,...,pressurei,rain,tempi,wspdi,meanprecipi,meanpressurei,meantempi,meanwspdi,weather_lat,weather_lon
0,R003,05-01-11,00:00:00,4388333,2911002,0.0,0.0,2011-05-01 00:00:00,0,6,...,30.22,0,55.9,3.5,0.0,30.258,55.98,7.86,40.700348,-73.887177
1,R003,05-01-11,04:00:00,4388333,2911002,0.0,0.0,2011-05-01 04:00:00,4,6,...,30.25,0,52.0,3.5,0.0,30.258,55.98,7.86,40.700348,-73.887177
2,R003,05-01-11,12:00:00,4388333,2911002,0.0,0.0,2011-05-01 12:00:00,12,6,...,30.28,0,62.1,6.9,0.0,30.258,55.98,7.86,40.700348,-73.887177
3,R003,05-01-11,16:00:00,4388333,2911002,0.0,0.0,2011-05-01 16:00:00,16,6,...,30.26,0,57.9,15.0,0.0,30.258,55.98,7.86,40.700348,-73.887177
4,R003,05-01-11,20:00:00,4388333,2911002,0.0,0.0,2011-05-01 20:00:00,20,6,...,30.28,0,52.0,10.4,0.0,30.258,55.98,7.86,40.700348,-73.887177


In [52]:
subway_df.describe()

Unnamed: 0,ENTRIESn,EXITSn,ENTRIESn_hourly,EXITSn_hourly,hour,day_week,weekday,latitude,longitude,fog,...,pressurei,rain,tempi,wspdi,meanprecipi,meanpressurei,meantempi,meanwspdi,weather_lat,weather_lon
count,42649.0,42649.0,42649.0,42649.0,42649.0,42649.0,42649.0,42649.0,42649.0,42649.0,...,42649.0,42649.0,42649.0,42649.0,42649.0,42649.0,42649.0,42649.0,42649.0,42649.0
mean,28124860.0,19869930.0,1886.589955,1361.487866,10.046754,2.905719,0.714436,40.724647,-73.940364,0.009824,...,29.971096,0.224741,63.10378,6.927872,0.004618,29.971096,63.10378,6.927872,40.728555,-73.938693
std,30436070.0,20289860.0,2952.385585,2183.845409,6.938928,2.079231,0.451688,0.07165,0.059713,0.098631,...,0.137942,0.417417,8.455597,4.510178,0.016344,0.131158,6.939011,3.179832,0.06542,0.059582
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.576152,-74.073622,0.0,...,29.55,0.0,46.9,0.0,0.0,29.59,49.4,0.0,40.600204,-74.01487
25%,10397620.0,7613712.0,274.0,237.0,4.0,1.0,0.0,40.677107,-73.987342,0.0,...,29.89,0.0,57.0,4.6,0.0,29.913333,58.283333,4.816667,40.688591,-73.98513
50%,18183890.0,13316090.0,905.0,664.0,12.0,3.0,1.0,40.717241,-73.953459,0.0,...,29.96,0.0,61.0,6.9,0.0,29.958,60.95,6.166667,40.72057,-73.94915
75%,32630490.0,23937710.0,2255.0,1537.0,16.0,5.0,1.0,40.759123,-73.907733,0.0,...,30.06,0.0,69.1,9.2,0.0,30.06,67.466667,8.85,40.755226,-73.912033
max,235774600.0,149378200.0,32814.0,34828.0,20.0,6.0,1.0,40.889185,-73.755383,1.0,...,30.32,1.0,86.0,23.0,0.1575,30.293333,79.8,17.083333,40.862064,-73.694176


In [70]:
def correlation(x, y):
    '''
    Fill in this function to compute the correlation between the two
    input variables. Each input is either a NumPy array or a Pandas
    Series.
    
    correlation = average of (x in standard units) times (y in standard units)
    
    Remember to pass the argument "ddof=0" to the Pandas std() function!
    '''
    # Get std. dev for both x & y, ddof=0 - degrees of freedom to hold while calc std.dev
    
    x_std = x.std(axis=0,ddof=0)
    y_std = y.std(axis=0,ddof=0)
    
    # compute mean for each variable
    x_mean = x.mean()
    y_mean = y.mean()

    # Standardize the data: (x - x.mean)/std.dev(x)
    
    stdized_x = (x - x_mean)/x_std
    stdized_y = (y - y_mean)/y_std
    
    # correlation = average of (x in standard units) times (y in standard units)
    
    correlation = (stdized_x * stdized_y).mean()
    return correlation

entries = subway_df['ENTRIESn_hourly']
cum_entries = subway_df['ENTRIESn']
rain = subway_df['meanprecipi']
temp = subway_df['meantempi']

print correlation(entries, rain)
print correlation(entries, temp)
print correlation(rain, temp)
print correlation(entries, cum_entries)

0.0356485157722
-0.0266933483216
-0.229034323408
0.585895470766


# Examples of vectorized operations on DataFrames:

In [2]:
import pandas as pd

In [3]:
# Adding DataFrames with the column names

df1 = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]})
df2 = pd.DataFrame({'a': [10, 20, 30], 'b': [40, 50, 60], 'c': [70, 80, 90]})
print df1 + df2

    a   b   c
0  11  44  77
1  22  55  88
2  33  66  99


In [4]:
# Adding DataFrames with overlapping column names 

df1 = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]})
df2 = pd.DataFrame({'d': [10, 20, 30], 'c': [40, 50, 60], 'b': [70, 80, 90]})
print df1 + df2

    a   b   c   d
0 NaN  74  47 NaN
1 NaN  85  58 NaN
2 NaN  96  69 NaN


In [5]:
# Adding DataFrames with overlapping row indexes

df1 = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]},
                   index=['row1', 'row2', 'row3'])
df2 = pd.DataFrame({'a': [10, 20, 30], 'b': [40, 50, 60], 'c': [70, 80, 90]},
                   index=['row4', 'row3', 'row2'])
print df1 + df2

         a     b     c
row1   NaN   NaN   NaN
row2  32.0  65.0  98.0
row3  23.0  56.0  89.0
row4   NaN   NaN   NaN


In [19]:

# Cumulative entries and exits for one station for a few hours.
entries_and_exits = pd.DataFrame({
    'ENTRIESn': [3144312, 3144335, 3144353, 3144424, 3144594,
                 3144808, 3144895, 3144905, 3144941, 3145094],
    'EXITSn': [1088151, 1088159, 1088177, 1088231, 1088275,
               1088317, 1088328, 1088331, 1088420, 1088753]
})

def get_hourly_entries_and_exits(entries_and_exits):
    '''
    Fill in this function to take a DataFrame with cumulative entries
    and exits (entries in the first column, exits in the second) and
    return a DataFrame with hourly entries and exits (entries in the
    first column, exits in the second).
    '''
    ## Use shift() function 
    hourly_entries = entries_and_exits['ENTRIESn'] - entries_and_exits['ENTRIESn'].shift(periods=1) # shift by 1
    hourly_exits = entries_and_exits['EXITSn'] - entries_and_exits['EXITSn'].shift(periods=1) 
    
    df = pd.DataFrame({
    'Hourly_ENTRIESn': hourly_entries,
    'Hourly_EXITSn': hourly_exits
            })
    return df

In [20]:
Hourly_enty_exit = get_hourly_entries_and_exits(entries_and_exits)

print Hourly_enty_exit

   Hourly_ENTRIESn  Hourly_EXITSn
0              NaN            NaN
1             23.0            8.0
2             18.0           18.0
3             71.0           54.0
4            170.0           44.0
5            214.0           42.0
6             87.0           11.0
7             10.0            3.0
8             36.0           89.0
9            153.0          333.0


# Using applymap()

In [25]:
# DataFrame applymap()
df = pd.DataFrame({
    'a': [1, 2, 3],
    'b': [10, 20, 30],
    'c': [5, 10, 15]
})

def add_one(x):
    return x + 1

print df, '\n'
print df.applymap(add_one)

   a   b   c
0  1  10   5
1  2  20  10
2  3  30  15 

   a   b   c
0  2  11   6
1  3  21  11
2  4  31  16


In [37]:
###    Fill in this function to convert the given DataFrame of numerical
###    grades to letter grades. Return a new DataFrame with the converted
###    grade

grades_df = pd.DataFrame(
    data={'exam1': [43, 81, 78, 75, 89, 70, 91, 65, 98, 87],
          'exam2': [24, 63, 56, 56, 67, 51, 79, 46, 72, 60]},
    index=['Andre', 'Barry', 'Chris', 'Dan', 'Emilio', 
           'Fred', 'Greta', 'Humbert', 'Ivan', 'James']
)

## first write a function to covert a single numeric value to a letter grade.

def convert_grade(grade):
    
    if grade >= 90:
        return 'A'
    elif grade >= 80:
        return 'B'
    elif grade >= 70:
        return 'C'
    elif grade >= 60:
        return 'C'
    else:
        return 'D'

# Now applymap() the function on to the dataframe
def convert_grades(grades):
    return grades.applymap(convert_grade)

In [38]:
print convert_grades(grades_df)

        exam1 exam2
Andre       D     D
Barry       B     C
Chris       C     D
Dan         C     D
Emilio      B     C
Fred        C     D
Greta       A     C
Humbert     C     D
Ivan        A     C
James       B     C


# Using apply() on a dataframe

#### apply() is more flexible than applymap(). The map function will apply transformation on ALL elements of a dataframe, if we need to apply different function to each column of the dataframe based on the column values the apply() is better suited than applymap()

In [41]:
grades_df = pd.DataFrame(
    data={'exam1': [43, 81, 78, 75, 89, 70, 91, 65, 98, 87],
          'exam2': [24, 63, 56, 56, 67, 51, 79, 46, 72, 60]},
    index=['Andre', 'Barry', 'Chris', 'Dan', 'Emilio', 
           'Fred', 'Greta', 'Humbert', 'Ivan', 'James']
)

# DataFrame apply()

def convert_grades_curve(exam_grades):
    # Pandas has a bult-in function that will perform this calculation
    # This will give the bottom 0% to 10% of students the grade 'F',
    # 10% to 20% the grade 'D', and so on. You can read more about
    # the qcut() function here:
    # http://pandas.pydata.org/pandas-docs/stable/generated/pandas.qcut.html
    return pd.qcut(exam_grades,
                   [0, 0.1, 0.2, 0.5, 0.8, 1],
                   labels=['F', 'D', 'C', 'B', 'A'])

# qcut() operates on a list, array, or Series. This is the
# result of running the function on a single column of the
# DataFrame.
print convert_grades_curve(grades_df['exam1'])

# qcut() does not work on DataFrames, but we can use apply()
# to call the function on each column separately
print grades_df.apply(convert_grades_curve)

Andre      F
Barry      B
Chris      C
Dan        C
Emilio     B
Fred       C
Greta      A
Humbert    D
Ivan       A
James      B
Name: exam1, dtype: category
Categories (5, object): [F < D < C < B < A]
        exam1 exam2
Andre       F     F
Barry       B     B
Chris       C     C
Dan         C     C
Emilio      B     B
Fred        C     C
Greta       A     A
Humbert     D     D
Ivan        A     A
James       B     B


In [52]:
'''
    Write a function to standardize each column of the given
    DataFrame. To standardize a variable, convert each value to the
    number of standard deviations it is above or below the mean.
'''

# Again first standardize one column, then we can call apply()

def stdize_column(column):
    return (column - column.mean())/(column.std(ddof=0))

In [53]:
def standardize(df):
    return df.apply(stdize_column)

In [55]:
#try
stdize_column(grades_df['exam1'])

Andre     -2.315341
Barry      0.220191
Chris      0.020017
Dan       -0.180156
Emilio     0.753987
Fred      -0.513779
Greta      0.887436
Humbert   -0.847401
Ivan       1.354508
James      0.620538
Name: exam1, dtype: float64

In [56]:
standardize(grades_df)

Unnamed: 0,exam1,exam2
Andre,-2.315341,-2.304599
Barry,0.220191,0.3864
Chris,0.020017,-0.0966
Dan,-0.180156,-0.0966
Emilio,0.753987,0.6624
Fred,-0.513779,-0.4416
Greta,0.887436,1.4904
Humbert,-0.847401,-0.7866
Ivan,1.354508,1.0074
James,0.620538,0.1794


In [57]:
import numpy as np

df = pd.DataFrame({
        'a': [4, 5, 3, 1, 2],
        'b': [20, 10, 40, 50, 30],
        'c': [25, 20, 5, 15, 10]
    })

# Change False to True for this block of code to see what it does

# DataFrame apply() - use case 2

print df.apply(np.mean)
print df.apply(np.max)

a     3.0
b    30.0
c    15.0
dtype: float64
a     5
b    50
c    25
dtype: int64


In [73]:
def second_largest_value(column):
    sorted_column = column.sort_values(ascending=False,inplace=False)
    
    ##iloc provides the index location of the sorted series where 0 has max value and 1 has second largest value
    return sorted_column.iloc[1]  

# try
print second_largest_value(df['a'])


4


In [78]:
def second_largest(df):
    '''
    Fill in this function to return the second-largest value of each 
    column of the input DataFrame.
    '''
    return df.apply(second_largest_value) # now apply columnwise using apply()

In [79]:
second_largest(df)

a     4
b    40
c    20
dtype: int64

In [80]:
second_largest(grades_df)

exam1    91
exam2    72
dtype: int64

# Adding Series to a Dataframe

In [81]:
# Adding a Series to a square DataFrame

s = pd.Series([1, 2, 3, 4])
df = pd.DataFrame({
    0: [10, 20, 30, 40],
    1: [50, 60, 70, 80],
    2: [90, 100, 110, 120],
    3: [130, 140, 150, 160]
})

print df
print '' # Create a blank line between outputs
print df + s

    0   1    2    3
0  10  50   90  130
1  20  60  100  140
2  30  70  110  150
3  40  80  120  160

    0   1    2    3
0  11  52   93  134
1  21  62  103  144
2  31  72  113  154
3  41  82  123  164


In [82]:
# Adding a Series to a one-row DataFrame 

s = pd.Series([1, 2, 3, 4])
df = pd.DataFrame({0: [10], 1: [20], 2: [30], 3: [40]})

print df
print '' # Create a blank line between outputs
print df + s

    0   1   2   3
0  10  20  30  40

    0   1   2   3
0  11  22  33  44


In [83]:
# Adding a Series to a one-column DataFrame

s = pd.Series([1, 2, 3, 4])
df = pd.DataFrame({0: [10, 20, 30, 40]})

print df
print '' # Create a blank line between outputs
print df + s

    0
0  10
1  20
2  30
3  40

    0   1   2   3
0  11 NaN NaN NaN
1  21 NaN NaN NaN
2  31 NaN NaN NaN
3  41 NaN NaN NaN


In [84]:
# Adding when DataFrame column names match Series index

s = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
df = pd.DataFrame({
    'a': [10, 20, 30, 40],
    'b': [50, 60, 70, 80],
    'c': [90, 100, 110, 120],
    'd': [130, 140, 150, 160]
})

print df
print '' # Create a blank line between outputs
print df + s

    a   b    c    d
0  10  50   90  130
1  20  60  100  140
2  30  70  110  150
3  40  80  120  160

    a   b    c    d
0  11  52   93  134
1  21  62  103  144
2  31  72  113  154
3  41  82  123  164


In [85]:
# Adding when DataFrame column names don't match Series index

s = pd.Series([1, 2, 3, 4])
df = pd.DataFrame({
    'a': [10, 20, 30, 40],
    'b': [50, 60, 70, 80],
    'c': [90, 100, 110, 120],
    'd': [130, 140, 150, 160]
})

print df
print '' # Create a blank line between outputs
print df + s

    a   b    c    d
0  10  50   90  130
1  20  60  100  140
2  30  70  110  150
3  40  80  120  160

    0   1   2   3   a   b   c   d
0 NaN NaN NaN NaN NaN NaN NaN NaN
1 NaN NaN NaN NaN NaN NaN NaN NaN
2 NaN NaN NaN NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN NaN NaN NaN NaN


## Adding with series to a dataframe with .add() and index options

In [1]:
# Adding with axis='index'
import pandas as pd
import numpy as np

s = pd.Series([1, 2, 3, 4])
df = pd.DataFrame({
    0: [10, 20, 30, 40],
    1: [50, 60, 70, 80],
    2: [90, 100, 110, 120],
    3: [130, 140, 150, 160]
})
    
print df
print '' # Create a blank line between outputs
print df.add(s, axis='index')
# The functions sub(), mul(), and div() work similarly to add()

    0   1    2    3
0  10  50   90  130
1  20  60  100  140
2  30  70  110  150
3  40  80  120  160

    0   1    2    3
0  11  51   91  131
1  22  62  102  142
2  33  73  113  153
3  44  84  124  164


In [2]:
# Adding with axis='columns'
s = pd.Series([1, 2, 3, 4])
df = pd.DataFrame({
    0: [10, 20, 30, 40],
    1: [50, 60, 70, 80],
    2: [90, 100, 110, 120],
    3: [130, 140, 150, 160]
})

print df
print '' # Create a blank line between outputs
print df.add(s, axis='columns')
# The functions sub(), mul(), and div() work similarly to add()

    0   1    2    3
0  10  50   90  130
1  20  60  100  140
2  30  70  110  150
3  40  80  120  160

    0   1    2    3
0  11  52   93  134
1  21  62  103  144
2  31  72  113  154
3  41  82  123  164


In [5]:
grades_df = pd.DataFrame(
    data={'exam1': [43, 81, 78, 75, 89, 70, 91, 65, 98, 87],
          'exam2': [24, 63, 56, 56, 67, 51, 79, 46, 72, 60]},
    index=['Andre', 'Barry', 'Chris', 'Dan', 'Emilio', 
           'Fred', 'Greta', 'Humbert', 'Ivan', 'James']
)

print grades_df

         exam1  exam2
Andre       43     24
Barry       81     63
Chris       78     56
Dan         75     56
Emilio      89     67
Fred        70     51
Greta       91     79
Humbert     65     46
Ivan        98     72
James       87     60


In [14]:
def standardize(df):
    '''
    Fill in this function to standardize each column of the given
    DataFrame. To standardize a variable, convert each value to the
    number of standard deviations it is above or below the mean.
    
    This time, try to use vectorized operations instead of apply().
    You should get the same results as you did before.
    '''
    numerator = df.sub(df.mean(),axis='columns') # subtract mean from each value, this is still a dataframe
    
    denominator = df.std(ddof=0) ## this will be a series since std deviation for each column is one scalar value

    return numerator.div(denominator,axis='columns')



print standardize(grades_df)


            exam1     exam2
Andre   -2.315341 -2.304599
Barry    0.220191  0.386400
Chris    0.020017 -0.096600
Dan     -0.180156 -0.096600
Emilio   0.753987  0.662400
Fred    -0.513779 -0.441600
Greta    0.887436  1.490400
Humbert -0.847401 -0.786600
Ivan     1.354508  1.007400
James    0.620538  0.179400


In [29]:
def standardize_rows(df):
    '''
    Optional: Fill in this function to standardize each row of the given
    DataFrame. Again, try not to use apply().
    
    This one is more challenging than standardizing each column!
    '''
    mean_diffs = df.sub(df.mean(axis='columns'),axis='index')
    
    return mean_diffs.div(df.std(axis='columns',ddof=0),axis='index')


print standardize_rows(grades_df)

         exam1  exam2
Andre      1.0   -1.0
Barry      1.0   -1.0
Chris      1.0   -1.0
Dan        1.0   -1.0
Emilio     1.0   -1.0
Fred       1.0   -1.0
Greta      1.0   -1.0
Humbert    1.0   -1.0
Ivan       1.0   -1.0
James      1.0   -1.0
