In [51]:
import pandas as pd

In [52]:
def AddStaToLookup(row,lookup):
    name = row[1]
    node = row[0]
    lookup[name] = node

In [53]:
def get_fares_from(df, name, add_fare):
    """
    This function gets all the fares from one station to every other in the original fare table.
    
    df : pandas DataFrame
        The original fare table
    name : str
        The name of the station that you want to get fares from
    add_fare : int
        Additional fare to add to each value in the existing fare table
        
    Returns a pandas Series of stations and fares
    """
    station = df.loc[(df.name_from == name) | (df.name_to == name)]
    station['destination'] = ''
    station.loc[station.name_from != name, 'destination'] = station.loc[station.name_from != name, 'name_from']
    station.loc[station.name_to != name, 'destination'] = station.loc[station.name_to != name, 'name_to']
    station.set_index('destination', inplace=True)
    
    series = pd.Series(data=(station.fare + add_fare), name=name)
    
    return series

In [54]:
def min_fare(series, df):
    """
    Returns minimum fare to each station based on a series where index is the proxy station
    that we calculate all fares from, and value is the additive fare (e.g. from new stop to
    existing proxy station)
    
    df is original fare table
    """
    ser = pd.Series()
    for index, value in series.iteritems():
        fares = get_fares_from(df, index, value)
        ser = pd.concat([ser, fares], axis=1)

    fare = ser.min(axis=1)
    return fare

In [55]:
def get_all_fares(node_df, fare_df, output_df, nodelookup):
    """
    Takes in DataFrame of new nodes, with new station names as index, and proxy stations as columns,
    with additive fares to proxy stations as values in each column. Returns DataFrame in similar format to 
    original data. Need to add node IDs.
    """
    
    for index, row in node_df.iterrows():
        results = pd.DataFrame(min_fare(row, fare_df))
        results.reset_index(inplace=True)
        results.columns = ['name_to', 'fare']
        results['name_from'] = index
        output_df = pd.concat([output_df,results], axis = 0)
    
    output_df['node_from'] = output_df['name_from'].apply(lambda x: lookup[x])
    output_df['node_to'] = output_df['name_to'].apply(lambda x: lookup[x])
        
    return output_df

In [103]:
# Read BART.far data into DataFrame
df = pd.read_table('test_data/BART.far', header=None, names=['node_from', 'node_to', 
                                                  'fare', ';', 'name_from', 'name_to'])
df = df.drop(';', axis=1)
df.name_from = df.name_from.str.replace(" to", "")

# Read new stations definition
new_sta = pd.DataFrame.from_csv('test_data/new_station_alt1.csv')
new_sta['Station'] = new_sta.index
new_sta.reset_index(drop=True, inplace=True)

# get unique node number and name:
lookup = {}
for x in list(set(list(df[['node_from','name_from']].apply(lambda x: str(x[0]) + '@' + x[1], axis=1).unique()) + list(df[['node_to','name_to']].apply(lambda x: str(x[0]) + '@' + x[1], axis=1).unique()))):
    lookup[x.split('@')[1]] = int(x.split('@')[0])
new_sta.apply(lambda x: AddStaToLookup(x, lookup), axis=1)

Unnamed: 0,node_from,node_to,fare,name_from,name_to
0,15538,15545,129,Dublin/ Pleasanton,West Dublin/Pleasanton
1,15538,15537,129,Dublin/ Pleasanton,Castro Valley
2,15538,15526,325,Dublin/ Pleasanton,Fremont
3,15538,15527,304,Dublin/ Pleasanton,Union City
4,15538,15528,269,Dublin/ Pleasanton,South Hayward


In [94]:
# Make DataFrame of new nodes with additive fares to each proxy station. 
# We will take fares from each proxy station to each other station in the BART network,
# Add the fare from the new station of interest (e.g. fare from Ballpark to Fruitvale)
# And find the minimum fare to each other station in the network
farelink = pd.DataFrame.from_csv('test_data/farelink_alt2_op.csv')

In [95]:
farelink

Unnamed: 0_level_0,MacArthur,Fruitvale
Station,Unnamed: 1_level_1,Unnamed: 2_level_1
15th/Franklin,129,258
Jack London Square,258,129
Alameda,387,258
4th/Brannan,604,475
6th/Brannan,604,475
Howard,604,475
Hyde,604,475
Van Ness,604,475
Fillmore,604,475


In [10]:
output_df = pd.DataFrame(columns=['node_from','node_to','fare', 'name_from', 'name_to'])
output_df = get_all_fares(farelink, df, output_df, lookup)

NameError: name 'get_all_fares' is not defined

In [97]:
result = df.append(output_df)
result['key'] = result[['node_from','node_to']].apply(lambda x: str(min([x[0],x[1]]))+str(max([x[0],x[1]])), axis=1)
len(result)

1855

In [98]:
result.drop_duplicates('key', keep='first', inplace=True)
result.reset_index(drop=True,inplace=True)
result.head()

Unnamed: 0,fare,name_from,name_to,node_from,node_to,key
0,129,Dublin/ Pleasanton,West Dublin/Pleasanton,15538,15545,1553815545
1,129,Dublin/ Pleasanton,Castro Valley,15538,15537,1553715538
2,325,Dublin/ Pleasanton,Fremont,15538,15526,1552615538
3,304,Dublin/ Pleasanton,Union City,15538,15527,1552715538
4,269,Dublin/ Pleasanton,South Hayward,15538,15528,1552815538


In [87]:
# need to manually add fare links between new stations

In [99]:
i = result.index.max() + 1
allsta = list(new_sta.Station.unique())
for station in allsta:
    othersta = [x for x in allsta if x!=station]
    for item in othersta:
        result.loc[i] = [0, station, item, lookup[station], lookup[item], str(min([lookup[station], lookup[item]]))+str(max([lookup[station], lookup[item]]))]
        i +=1

In [100]:
result.drop_duplicates('key', keep='first', inplace=True)
result.reset_index(drop=True,inplace=True)
result.head()

Unnamed: 0,fare,name_from,name_to,node_from,node_to,key
0,129,Dublin/ Pleasanton,West Dublin/Pleasanton,15538,15545,1553815545
1,129,Dublin/ Pleasanton,Castro Valley,15538,15537,1553715538
2,325,Dublin/ Pleasanton,Fremont,15538,15526,1552615538
3,304,Dublin/ Pleasanton,Union City,15538,15527,1552715538
4,269,Dublin/ Pleasanton,South Hayward,15538,15528,1552815538


In [101]:
result.to_csv('test_data/bartfare.csv')

In [96]:
# Read BART.far data into DataFrame
df = pd.DataFrame.from_csv('test_data/output_atl1.csv')
df.head()

Unnamed: 0,node_from,node_to,fare,name_from,name_to,key,output
0,15536,15539,472,Pittsburg/ Bay Point,Colma,1553615539,15536 15539 472 ; Pittsburg/ Bay Point to C...
1,15536,15544,507,Pittsburg/ Bay Point,Warm Springs,1553615544,15536 15544 507 ; Pittsburg/ Bay Point to W...
2,15536,15520,364,Pittsburg/ Bay Point,Richmond,1552015536,15536 15520 364 ; Pittsburg/ Bay Point to R...
3,15536,15537,402,Pittsburg/ Bay Point,Castro Valley,1553615537,15536 15537 402 ; Pittsburg/ Bay Point to C...
4,15536,15527,455,Pittsburg/ Bay Point,Union City,1552715536,15536 15527 455 ; Pittsburg/ Bay Point to U...


In [97]:
stations_from = list(df['name_from'].unique())
stations_from.append('Millbrae')

In [98]:
stations_to = stations_from

In [99]:
matrix = pd.DataFrame(columns=(stations_from))

In [100]:
for sta_from in stations_from:
    for sta_to in stations_to:
        if sta_from == sta_to:
            matrix.set_value(sta_from,sta_to,0)
        else:
            value = get_fares_from(df, sta_from, 0)[sta_to]
            matrix.set_value(sta_from, sta_to, value)

AttributeError: 'BlockManager' object has no attribute 'T'

In [190]:
matrix.to_csv('test_data/farematrix.csv')

In [125]:
# Read BART.far data into DataFrame
df = pd.read_table('test_data/BART.far', header=None, names=['node_from', 'node_to', 
                                                  'fare', ';', 'name_from', 'name_to'])
df = df.drop(';', axis=1)
df.name_from = df.name_from.str.replace(" to", "")

# Read new stations definition
new_sta = pd.DataFrame.from_csv('test_data/new_station_alt4.csv')
new_sta['Station'] = new_sta.index
new_sta.reset_index(drop=True, inplace=True)

# get unique node number and name:
lookup = {}
for x in list(set(list(df[['node_from','name_from']].apply(lambda x: str(x[0]) + '@' + x[1], axis=1).unique()) + list(df[['node_to','name_to']].apply(lambda x: str(x[0]) + '@' + x[1], axis=1).unique()))):
    lookup[x.split('@')[1]] = int(x.split('@')[0])
new_sta.apply(lambda x: AddStaToLookup(x, lookup), axis=1)

matrix = pd.DataFrame.from_csv('test_data/farematrix_alt4.csv')

In [126]:
allstations_from = list(lookup.keys())
allstations_to = list(lookup.keys())

In [127]:
output_df = pd.DataFrame(columns=['node_from','node_to','fare', 'name_from', 'name_to'])
i=0
for station_from in allstations_from:
    for station_to in allstations_to:
        output_df.loc[i] = [lookup[station_from],lookup[station_to],matrix.at[station_from, station_to],station_from,station_to]
        i+=1

In [128]:
output_df['fare'].fillna(0, inplace=True)
output_df['fare']=output_df['fare'].astype(int)
output_df = output_df[output_df['fare'] > 0]

In [129]:
output_df['key'] = output_df[['node_from','node_to']].apply(lambda x: str(min([int(x[0]),int(x[1])]))+str(max([int(x[0]),int(x[1])])), axis=1)
output_df.drop_duplicates('key', keep='first', inplace=True)
output_df.reset_index(drop=True,inplace=True)
output_df.head()

Unnamed: 0,node_from,node_to,fare,name_from,name_to,key
0,15536,15539,472,Pittsburg/ Bay Point,Colma,1553615539
1,15536,15544,507,Pittsburg/ Bay Point,Warm Springs,1553615544
2,15536,15520,364,Pittsburg/ Bay Point,Richmond,1552015536
3,15536,15537,402,Pittsburg/ Bay Point,Castro Valley,1553615537
4,15536,15527,455,Pittsburg/ Bay Point,Union City,1552715536


In [130]:
output_df['output']=output_df.apply(lambda x: str(int(x[0])) + "   " + str(int(x[1])) + "  " + str(int(x[2]))+ " ; "+ x[3] + " to " + x[4], axis=1)

In [131]:
output_df.to_csv('test_data/output_atl4.csv')