In [11]:
import pandas as pd

In [12]:
# Read BART.far data into DataFrame
df = pd.read_table('BART.far', header=None, names=['node_from', 'node_to', 
                                                  'fare', ';', 'name_from', 'name_to'])
df = df.drop(';', axis=1)
df.name_from = df.name_from.str.replace(" to", "")

In [26]:
df.head()

Unnamed: 0,node_from,node_to,fare,name_from,name_to
0,15538,15545,129,Dublin/ Pleasanton,West Dublin/Pleasanton
1,15538,15537,129,Dublin/ Pleasanton,Castro Valley
2,15538,15526,325,Dublin/ Pleasanton,Fremont
3,15538,15527,304,Dublin/ Pleasanton,Union City
4,15538,15528,269,Dublin/ Pleasanton,South Hayward


In [94]:
# Make DataFrame of new nodes with additive fares to each proxy station. 
# We will take fares from each proxy station to each other station in the BART network,
# Add the fare from the new station of interest (e.g. fare from Ballpark to Fruitvale)
# And find the minimum fare to each other station in the network

# Set additive fares below:

new_nodes = {
    'name':['Ballpark', '2nd/Mission', 'Union Square', 'Van Ness', 'Fillmore', 'Jack London Square'],
    'Fruitvale':[217, 217, 217, 217, 217, 0],
    'Montgomery': [0, 0, 0, 0, 0, 217],
    '12th St Oakland City Center': [217, 217, 217, 217, 217, 0]
}

testnodes = pd.DataFrame(new_nodes)
testnodes.set_index('name', inplace=True)

In [101]:
testnodes

Unnamed: 0_level_0,12th St Oakland City Center,Fruitvale,Montgomery
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ballpark,217,217,0
2nd/Mission,217,217,0
Union Square,217,217,0
Van Ness,217,217,0
Fillmore,217,217,0
Jack London Square,0,0,217


In [128]:
def get_fares_from(df, name, add_fare):
    """
    This function gets all the fares from one station to every other in the original fare table.
    
    df : pandas DataFrame
        The original fare table
    name : str
        The name of the station that you want to get fares from
    add_fare : int
        Additional fare to add to each value in the existing fare table
        
    Returns a pandas Series of stations and fares
    """
    station = df.loc[(df.name_from == name) | (df.name_to == name)]
    station['destination'] = ''
    station.loc[station.name_from != name, 'destination'] = station.loc[station.name_from != name, 'name_from']
    station.loc[station.name_to != name, 'destination'] = station.loc[station.name_to != name, 'name_to']
    station.set_index('destination', inplace=True)
    
    series = pd.Series(data=(station.fare + add_fare), name=name)
    
    return series

In [76]:
fruitvale = get_fares_from(df, 'Fruitvale', 20)
mont = get_fares_from(df, 'Montgomery', 20)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [124]:
ballpark = testnodes.loc['Ballpark']

In [126]:
def min_fare(series, fare_df):
    """
    Returns minimum fare to each station based on a series where index is the proxy station
    that we calculate all fares from, and value is the additive fare (e.g. from new stop to
    existing proxy station)
    """
    ser = pd.Series()
    for index, value in ballpark.iteritems():
        fares = get_fares_from(df, index, value)
        ser = pd.concat([ser, fares], axis=1)

    fare = ser.min(axis=1)
    return fare

In [154]:
def get_all_fares(node_df, fare_df):
    """
    Takes in DataFrame of new nodes, with new station names as index, and proxy stations as columns,
    with additive fares to proxy stations as values in each column. Returns DataFrame in similar format to 
    original data. Need to add node IDs.
    """
    
    df = pd.DataFrame(columns=['name_from', 'name_to', 'fare'])
    
    for index, row in node_df.iterrows():
        results = pd.DataFrame(min_fare(row, fare_df))
        results.reset_index(inplace=True)
        results.columns = ['name_to', 'fare']
        results['name_from'] = index
        df = pd.concat([df,results], axis = 0)
        
    return df
        

In [155]:
get_all_fares(testnodes, df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,fare,name_from,name_to
0,231.0,Ballpark,12th St Oakland City Center
1,129.0,Ballpark,16th St Mission
2,231.0,Ballpark,19th St Oakland
3,129.0,Ballpark,24th St Mission
4,521.0,Ballpark,Alum Rock
5,262.0,Ballpark,Ashby
6,133.0,Ballpark,Balboa Park
7,322.0,Ballpark,Bay Fair
8,273.0,Ballpark,Berkeley
9,469.0,Ballpark,Berryessa
