In [48]:
import pandas as pd

In [49]:
def AddStaToLookup(row,lookup):
    name = row[1]
    node = row[0]
    lookup[name] = node

In [50]:
def get_fares_from(df, name, add_fare):
    """
    This function gets all the fares from one station to every other in the original fare table.
    
    df : pandas DataFrame
        The original fare table
    name : str
        The name of the station that you want to get fares from
    add_fare : int
        Additional fare to add to each value in the existing fare table
        
    Returns a pandas Series of stations and fares
    """
    station = df.loc[(df.name_from == name) | (df.name_to == name)]
    station['destination'] = ''
    station.loc[station.name_from != name, 'destination'] = station.loc[station.name_from != name, 'name_from']
    station.loc[station.name_to != name, 'destination'] = station.loc[station.name_to != name, 'name_to']
    station.set_index('destination', inplace=True)
    
    series = pd.Series(data=(station.fare + add_fare), name=name)
    
    return series

In [51]:
def min_fare(series, df):
    """
    Returns minimum fare to each station based on a series where index is the proxy station
    that we calculate all fares from, and value is the additive fare (e.g. from new stop to
    existing proxy station)
    
    df is original fare table
    """
    ser = pd.Series()
    for index, value in series.iteritems():
        fares = get_fares_from(df, index, value)
        ser = pd.concat([ser, fares], axis=1)

    fare = ser.min(axis=1)
    return fare

In [52]:
def get_all_fares(node_df, fare_df, output_df, nodelookup):
    """
    Takes in DataFrame of new nodes, with new station names as index, and proxy stations as columns,
    with additive fares to proxy stations as values in each column. Returns DataFrame in similar format to 
    original data. Need to add node IDs.
    """
    
    for index, row in node_df.iterrows():
        results = pd.DataFrame(min_fare(row, fare_df))
        results.reset_index(inplace=True)
        results.columns = ['name_to', 'fare']
        results['name_from'] = index
        output_df = pd.concat([output_df,results], axis = 0)
    
    output_df['node_from'] = output_df['name_from'].apply(lambda x: lookup[x])
    output_df['node_to'] = output_df['name_to'].apply(lambda x: lookup[x])
        
    return output_df

In [53]:
# Read BART.far data into DataFrame
df = pd.read_table('test_data/BART.far', header=None, names=['node_from', 'node_to', 
                                                  'fare', ';', 'name_from', 'name_to'])
df = df.drop(';', axis=1)
df.name_from = df.name_from.str.replace(" to", "")

In [54]:
# Read new stations definition
new_sta = pd.DataFrame.from_csv('test_data/new_station.csv')
new_sta['Station'] = new_sta.index
new_sta.reset_index(drop=True, inplace=True)
new_sta

Unnamed: 0,Node,Station
0,18001,Ballpark
1,18002,2nd/Mission
2,18003,Union Square
3,18004,Van ness
4,18005,Fillmore
5,18006,Jack London Square


In [55]:
# get unique node number and name:
lookup = {}
for x in list(set(list(df[['node_from','name_from']].apply(lambda x: str(x[0]) + '@' + x[1], axis=1).unique()) + list(df[['node_to','name_to']].apply(lambda x: str(x[0]) + '@' + x[1], axis=1).unique()))):
    lookup[x.split('@')[1]] = int(x.split('@')[0])
new_sta.apply(lambda x: AddStaToLookup(x, lookup), axis=1)

0    None
1    None
2    None
3    None
4    None
5    None
dtype: object

In [56]:
# Make DataFrame of new nodes with additive fares to each proxy station. 
# We will take fares from each proxy station to each other station in the BART network,
# Add the fare from the new station of interest (e.g. fare from Ballpark to Fruitvale)
# And find the minimum fare to each other station in the network
farelink = pd.DataFrame.from_csv('test_data/farelink.csv')

In [57]:
output_df = pd.DataFrame(columns=['node_from','node_to','fare', 'name_from', 'name_to'])
output_df = get_all_fares(farelink, df, output_df, lookup)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [61]:
result = df.append(output_df)
result['key'] = result[['node_from','node_to']].apply(lambda x: str(set([x[0],x[1]])), axis=1)
len(result)

1696

In [64]:
result.drop_duplicates('key', keep='first', inplace=True)