In [53]:
import pandas as pd

# Examples of vectorized operations on DataFrames:
# Change False to True for each block of code to see what it does

# Adding DataFrames with the column names
if False:
    df1 = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]})
    df2 = pd.DataFrame({'a': [10, 20, 30], 'b': [40, 50, 60], 'c': [70, 80, 90]})
    print df1 + df2
    
# Adding DataFrames with overlapping column names 
if False:
    df1 = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]})
    df2 = pd.DataFrame({'d': [10, 20, 30], 'c': [40, 50, 60], 'b': [70, 80, 90]})
    print df1 + df2

# Adding DataFrames with overlapping row indexes
if False:
    df1 = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]},
                       index=['row1', 'row2', 'row3'])
    df2 = pd.DataFrame({'a': [10, 20, 30], 'b': [40, 50, 60], 'c': [70, 80, 90]},
                       index=['row4', 'row3', 'row2'])
    print df1 + df2

# --- Quiz ---
# Cumulative entries and exits for one station for a few hours.
entries_and_exits = pd.DataFrame({
    'ENTRIESn': [3144312, 3144335, 3144353, 3144424, 3144594,
                 3144808, 3144895, 3144905, 3144941, 3145094],
    'EXITSn': [1088151, 1088159, 1088177, 1088231, 1088275,
               1088317, 1088328, 1088331, 1088420, 1088753]
})

def get_hourly_entries_and_exits(entries_and_exits):
    '''
    Fill in this function to take a DataFrame with cumulative entries
    and exits (entries in the first column, exits in the second) and
    return a DataFrame with hourly entries and exits (entries in the
    first column, exits in the second).
    '''
    '''
    # this works but doesn't include a NaN row like the exercise tests want
    def differences(entries):
        return pd.Series([entries[hour]-entries[hour-1] for hour in xrange(1,len(entries))])
    return pd.DataFrame({column: differences(entries_and_exits[column]) for column in list(entries_and_exits)})
    '''

    '''
    # this produces desired exercise output but has a lot of steps
    # 1. adds a blank row at beginning at index -1
    # 2. increments the indices so they start at 0
    # 3. sort the DataFrame so that 0 is not at the end
    def differences(entries):
        return pd.Series([entries[hour]-entries[hour-1] for hour in xrange(1,len(entries))])
    diff_df.loc[-1] = None
    diff_df.index += 1
    return diff_df.sort_index()    
    '''
    
    '''
    # this produces desired exercise output
    def differences(entries):
        return pd.Series([entries[hour]-entries[hour-1] for hour in xrange(1,len(entries))])
    diff_df.loc[max(diff_df.index)+1, :] = None
    return diff_df.shift(1)
    '''

    def differences(entries):
        ''' returns the differences between adjoining elements with a 
            None in the front to maintain the same series length
        input: pandas Series
        output: pandas Series
        '''
        return pd.Series([None] + [entries[hour]-entries[hour-1] for hour in xrange(1,len(entries))])
    
    return pd.DataFrame({column: differences(entries_and_exits[column]) for column in list(entries_and_exits)})

In [54]:
entries_and_exits.diff()

Unnamed: 0,ENTRIESn,EXITSn
0,,
1,23.0,8.0
2,18.0,18.0
3,71.0,54.0
4,170.0,44.0
5,214.0,42.0
6,87.0,11.0
7,10.0,3.0
8,36.0,89.0
9,153.0,333.0


In [55]:
get_hourly_entries_and_exits(entries_and_exits)

Unnamed: 0,ENTRIESn,EXITSn
0,,
1,23.0,8.0
2,18.0,18.0
3,71.0,54.0
4,170.0,44.0
5,214.0,42.0
6,87.0,11.0
7,10.0,3.0
8,36.0,89.0
9,153.0,333.0
