In [1]:


###############################################################################
#
# FILE: stadium_matching.ipynb
#
# BY: Timur Abbiasov
#
# DATE: Aug 25 2020
#
# DESC: This code contains three parts: 
# (1) first, I match subsidies data from Long 2013 to the main stadiums table
# (2) second, I match the data on stadium capacity from Wikipedi to the main stadiums table
# (3) finally, I use the matched data to conduct cost-benefit analysis of stadium subsidies using a number of assumptions
#
# COMMENT: 
#
###############################################################################

In [75]:
################################ Libraries #####################################

import sqlalchemy as db
import pandas as pd

import nltk
import os
from dotenv import load_dotenv

from datetime import datetime

import numpy as np
import re
import math
################################################################################

In [68]:
################################ Options and definitions #####################################

pd.options.mode.chained_assignment = None

def flatten(l):
  out = []
  for item in l:
    if isinstance(item, (list, tuple)):
      out.extend(flatten(item))
    else:
      out.append(item)
  return out

def percentile(n):
    def percentile_(x):
        return x.quantile(n)
    percentile_.__name__ = 'percentile_{:2.0f}'.format(n*100)
    return percentile_

leagueBySport = {'hockey': 'NHL', 'football': 'NFL', 'baseball': 'MLB', 'basketball': 'NBA'} 

################################ Env variables #####################################

load_dotenv()
PSQL_USER = os.getenv('PSQL_USER')
PSQL_PASS = os.getenv('PSQL_PASS')

In [None]:
################################################################################
####################### Part I: Subsidy Matching ###############################
################################################################################

In [3]:
def get_stadiums():
    """Read in the stadiums data."""
    
    # PostgreSQL connection
    engine = db.create_engine(f'postgresql://{PSQL_USER}:{PSQL_PASS}@134.209.70.145/dataname2')
    
    # Read stadiums into a dataframe

    get_stadiums_table = f"""
            SELECT
                sname_place_id as stadium_id,
                sport as stadium_sport,
                location_name,
                city,
                cbg as stadium_cbg
            FROM
                stadiums
                ;
            """
    
    results = pd.read_sql(get_stadiums_table, con = engine)
    engine.dispose()
    
    return results

In [5]:
# Create new table with subsidies using the data from Long 2013

subsidies = pd.read_csv("./data/subsidies_long_2013.csv", header = None,
                        names = ['year_opened', 'location', "league", "facility_name",
                                  "total_cost_reported","public_cost_reported", 
                                  "total_cost_2010", "public_cost_2010", "public_share_pct"])

subsidies['record_index'] = subsidies.index

In [6]:
# Create new table with stadiums

stadiums = get_stadiums()

In [7]:
# Convert tables to dictionaries (to be used in the matching procedure)

subsidy_records = subsidies[['record_index','location', 'league', 'facility_name']].to_dict('records');
stadium_records = stadiums[['stadium_id','location_name', 'city']].to_dict('records');

In [8]:
# Define function that returns 'exact matches', 
# returns true if all words in both stadium name and its location can be found in the corresponding subsidy record:

def compare_entries(stadium_record, subsidy_record):
    check_location = all((word.lower() in subsidy_record['location'].lower()) for word in stadium_record['city'].split(' '))
    check_name = all((word.lower() in subsidy_record['facility_name'].lower()) for word in stadium_record['location_name'].split(' '))
    return all([check_name])

In [9]:
# Create a list that associates each stadium record with the list of subsidy records that matches exactly
# (using compare_entries)

stadium_matching = list(
    map(lambda stadium_record: {
        stadium_record['stadium_id']: list(
            map(lambda x: {**x, 'stadium_id': stadium_record['stadium_id']},
                filter(lambda subsidy_record: compare_entries(stadium_record,subsidy_record),subsidy_records)
            )
        )
    },
    stadium_records
))

In [13]:
print(stadium_matching[1])

{'sg:1cc8dbe5600542ceba0de4a2f15eb7a6': [{'record_index': 104, 'location': 'New York', 'league': 'NBA/ NHL', 'facility_name': 'Madison Square Garden (R)', 'stadium_id': 'sg:1cc8dbe5600542ceba0de4a2f15eb7a6'}, {'record_index': 150, 'location': 'New York', 'league': 'NBA/ NHL', 'facility_name': 'Madison Square Garden', 'stadium_id': 'sg:1cc8dbe5600542ceba0de4a2f15eb7a6'}, {'record_index': 206, 'location': 'New York', 'league': 'NBA/NHL', 'facility_name': 'Madison Square Garden III', 'stadium_id': 'sg:1cc8dbe5600542ceba0de4a2f15eb7a6'}]}


In [15]:
# save matched data to matched_data table, and save unmatched stadium records separately for further processing

unmatched_stadium_ids = list(map(lambda x: list(x.keys())[0], [f for f in stadium_matching if not list(f.values())[0]]))
unmatched_stadiums = stadiums[stadiums['stadium_id'].isin(unmatched_stadium_ids)]
unmatched_stadiums.reset_index(inplace=True)

stadium_matched_records = list(filter(lambda x: list(x.values())[0], stadium_matching))
matched_data = pd.DataFrame.from_dict(flatten(map(lambda x: list(x.values()), stadium_matched_records)))  

In [18]:
# define functions that return best matching subsidy record(s) for a given stadium record,
# using nltk edit distance for stadium name to determine similarity

def findMatches(stadium_name,start=None,end=None):
    sortedResults = sorted(
        map(lambda sub: {'facility_name': sub['facility_name'],
                         'location': sub['location'],
                         'league': sub['league'],
                         'record_index': sub['record_index'],
                         'best_match_distance': nltk.edit_distance(
                             stadium_name.lower().replace('\n','').replace('/',''),
                             sub['facility_name'].lower().replace('\n','').replace('/','')
                         )},
            subsidies.to_dict('records'),
           ), key = lambda item: item['best_match_distance'], reverse = False
    )
    return sortedResults[start:end]

def findBestMatch(stadium_name):
    return findMatches(stadium_name,start = None, end = 1)[0]

In [22]:
matches = unmatched_stadiums['location_name'].apply(findBestMatch)

In [23]:
matches = unmatched_stadiums.join(pd.DataFrame.from_records(matches)[['record_index','location','league','facility_name','best_match_distance']])
matches['city_distance'] = matches[['city','location']].apply(lambda x: nltk.edit_distance(x[0].lower(),x[1].lower()),axis=1)

In [24]:
filter_matches_by_city = (
    matches[['city_distance','best_match_distance']].apply(min,axis=1) < 4) & (
    matches['best_match_distance']*matches['city_distance']<=20)

matches_by_city = matched_data.append(
    matches[filter_matches_by_city][['record_index','location','league','facility_name','stadium_id']]
)

In [25]:
subsidies_matched_v1 = pd.merge(matches_by_city[['record_index','stadium_id']],subsidies, left_on='record_index' ,right_on='record_index', how='left')

In [29]:
remaining_unmatched_stadiums = stadiums[~stadiums['stadium_id'].isin(subsidies_matched_v1['stadium_id'])]

In [440]:
# define a function that return best matching subsidy records for a given stadium record,
# using nltk edit distance for both stadium name and city name:
# returns all records with the minimum edit distance in city name 
# and sorts the results by edit ditance in stadium name

def findCityMatches(stadium_dict,start=None,end=None):
    stadium_id = stadium_dict['stadium_id']
    city_name = stadium_dict['city']
    stadium_name = stadium_dict['location_name']
    stadium_league = leagueBySport[stadium_dict['stadium_sport']]
    sortedResults = sorted(
        map(lambda sub: {'stadium_id': stadium_id,
                         'stadium_name': stadium_name,
                         'stadium_city': city_name,
                         'stadium_league': stadium_league,
                         'sub_name': sub['facility_name'],
                         'sub_city': sub['location'],
                         'sub_league': sub['league'],
                         'record_index': sub['record_index'],
                         'best_city_match_distance': nltk.edit_distance(
                             city_name.lower().replace('\n','').replace('/',''),
                             sub['location'].lower().replace('\n','').replace('/','')
                         ),
                         'best_match_distance': nltk.edit_distance(
                             stadium_name.lower().replace('\n','').replace('/',''),
                             sub['facility_name'].lower().replace('\n','').replace('/','')
                         )},
            subsidies.to_dict('records'),
           ), key = lambda item: item['best_city_match_distance'], reverse = False
    )
    
    minCityMatchDistance = sortedResults[0]['best_city_match_distance']
    sortedResultsSimilarCity = sorted(
        [sub for sub in sortedResults if (
            sub['best_city_match_distance'] <= minCityMatchDistance
        ) 
        & (
            stadium_league in sub['sub_league'] 
        )
        ],
        key = lambda item: item['best_match_distance'], reverse = False
    )
    if sortedResultsSimilarCity: 
        return sortedResultsSimilarCity[start:end]
    else:
        return [{'stadium_id': stadium_id,
                'stadium_name': stadium_name,
                'stadium_city': city_name,
                'stadium_league': stadium_league,
                'best_city_match_distance': None}]
    
def findBestCityMatch(stadium_dict):
    return findCityMatches(stadium_dict ,start = None, end = 1)[0]

In [526]:
# findCityMatches({'location_name': 'Enterprise Center',
#                  'city': 'saint louis', 
#                  'stadium_sport': 'hockey', 
#                  'stadium_id': 'sg:ac283081b6e44c1cb19feb1a5ce6fa5d'
#                 })

In [504]:
# Obtain best matches for each unmatched stadium record using findBestCityMatch() 

city_matches = pd.DataFrame.from_records(
    flatten(remaining_unmatched_stadiums[
        remaining_unmatched_stadiums['stadium_sport'] != 'soccer'
    ][['stadium_id','location_name','city','stadium_sport']].apply(
        lambda x: findCityMatches(
            dict(zip(['stadium_id','location_name','city','stadium_sport'],x))
        ), axis=1
    ))
)



In [505]:
# Export the candidate matches to csv for manual verification

city_matches.to_csv("./data/tmp/by_location_and_league.csv", index=False)

In [31]:
# Import manually verified matches:

manual_matches = pd.read_csv("./data/manual-matches/by_location_and_league.csv")

In [33]:
# Merge the manual matches to the subsidies data:

subsidies_matched_manual = pd.merge(
                            manual_matches[manual_matches['match']=='1'][['stadium_id','record_index']],
                            subsidies,
                            left_on='record_index' ,right_on='record_index', how='left')


In [34]:
# Merge the previously obtained mathes wih the remaining manual ones:

subsidies_matched_all = subsidies_matched_v1.append(subsidies_matched_manual)

In [533]:
# Export results:

subsidies_matched_all.to_csv("./data/output/all_matches.csv", index=False)

In [40]:
# major_league_stadiums = stadiums[stadiums['stadium_sport']!='soccer']
# major_league_stadiums[
#     ~major_league_stadiums['stadium_id'].isin(subsidies_matched_all["stadium_id"])
# ]

In [35]:
# Group the resulting matches by stadium_id and subsidy record_index: 

subsidies_by_stadium_record = subsidies_matched_all.groupby(['stadium_id','record_index','year_opened']).agg(
    {'total_cost_reported': 'first',
     'public_cost_reported': 'first',
     'total_cost_2010': 'first',
     'public_cost_2010': 'first',
     'public_share_pct': 'first'
    }
)

subsidies_by_stadium_record = subsidies_by_stadium_record.reset_index()

In [36]:
# Group the results by stadium_id and obtain total public costs: 

subsidies_by_stadium = subsidies_by_stadium_record.groupby(['stadium_id']).agg(
        {'public_cost_2010': np.nansum}
    )

subsidies_by_stadium = subsidies_by_stadium.reset_index()

In [38]:
# Mergre the data on total public costs with the initial stadiums table:

subsidies_by_stadium = subsidies_by_stadium.merge(stadiums, how = 'left', left_on = 'stadium_id',  right_on = 'stadium_id')

In [39]:
subsidies_by_stadium.head()

Unnamed: 0,stadium_id,public_cost_2010,stadium_sport,location_name,city,stadium_cbg
0,sg:008eedc461cd430f8be8e2450a1f5702,0.0,hockey,Verizon Center,washington,110010058002
1,sg:008eedc461cd430f8be8e2450a1f5702,0.0,basketball,Verizon Center,washington,110010058002
2,sg:022558c60a6b480aaa6bb0e7dadb4e6a,0.0,basketball,Bmo Harris Bradley Center,milwaukee,550791863001
3,sg:082e609ab2c544d6b33bec38563ad068,531.0,basketball,Amway Center,orlando,120950105001
4,sg:08fa621a2cb740d8a33603438abbd0fc,129.0,hockey,Xcel Energy Center,saint paul,271230342011


In [None]:
################################################################################
####################### Part II: Capacity Matching #############################
################################################################################

In [607]:
# Read in capacity data

capacity_football_baseball = pd.read_csv("./data/wiki/capacity_football_baseball.csv")
capacity_nhl = pd.read_csv("./data/wiki/capacity_nhl.csv")
capacity_nba = pd.read_csv("./data/wiki/capacity_nba.csv")

In [613]:
# Define function that outputs candidate matches for each stadium based on stadium name and city name:

def findWikiMatches(stadium_dict, 
                    name_field = 'Stadium', 
                    city_field = 'City', 
                    type_field = 'Type', 
                    capacity_field = 'Capacity',
                    data = capacity_data,
                    start=None,end=None):
    stadium_id = stadium_dict['stadium_id']
    city_name = stadium_dict['city']
    stadium_name = stadium_dict['location_name']
    stadium_sport = stadium_dict['stadium_sport']
    sortedResults = sorted(
        map(lambda item: {'stadium_id': stadium_id,
                         'stadium_name': stadium_name,
                         'stadium_city': city_name,
                         'stadium_sport': stadium_sport,
                         'wiki_name': re.sub('(\[.*\])', '', item[name_field]),
                         'wiki_city': item[city_field],
                         'capacity': int(re.sub('(\[.*\])', '',item[capacity_field]).replace(",","")),
                         'wiki_type': item[type_field],
                         'best_city_match_distance': nltk.edit_distance(
                             city_name.lower().replace('\n','').replace('/',''),
                             item[city_field].split(',')[0].lower().replace('\n','').replace('/','')
                         ),
                         'best_match_distance': nltk.edit_distance(
                             stadium_name.lower().replace('\n','').replace('/',''),
                             re.sub('(\[.*\])', '', item[name_field]).lower().replace('\n','')
                         )},
            data.to_dict('records'),
           ), key = lambda item: item['best_city_match_distance'], reverse = False
    )
    
    minCityMatchDistance = sortedResults[0]['best_city_match_distance']
    
    plausible_matches = [item for item in sortedResults if item['best_match_distance']<3]
    if plausible_matches:
        return plausible_matches


    sortedResultsSimilarCity = sorted(
        [item for item in sortedResults if (
            item['best_city_match_distance'] <= minCityMatchDistance
        )
        ],
        key = lambda item: item['best_match_distance'], reverse = False
    )
    if sortedResultsSimilarCity:
        if sortedResultsSimilarCity[0]['best_match_distance']<6:
            return [sortedResultsSimilarCity[0]]
        else:
            return sortedResultsSimilarCity[start:end]
    else:
        return [{'stadium_id': stadium_id,
                'stadium_name': stadium_name,
                'stadium_city': city_name,
                'best_city_match_distance': None}]

In [614]:
# obtain capacity record matches for each sport category:

wiki_matches_fb = pd.DataFrame.from_records(
    flatten(subsidies_by_stadium[subsidies_by_stadium['stadium_sport'].isin(['football','baseball'])][['stadium_id','location_name','city','stadium_sport']].apply(
        lambda x: findWikiMatches(
            dict(zip(['stadium_id','location_name','city','stadium_sport'],x))
        ), axis=1
    ))
)

wiki_matches_nba = pd.DataFrame.from_records(
    flatten(subsidies_by_stadium[subsidies_by_stadium['stadium_sport'].isin(['basketball'])][['stadium_id','location_name','city','stadium_sport']].apply(
        lambda x: findWikiMatches(
            dict(zip(['stadium_id','location_name','city','stadium_sport'],x)),
            city_field = 'Location',
            name_field = 'Arena',
            type_field = 'Opened',
            data = capacity_nba
        ), axis=1
    ))
)

wiki_matches_nhl = pd.DataFrame.from_records(
    flatten(subsidies_by_stadium[subsidies_by_stadium['stadium_sport'].isin(['hockey'])][['stadium_id','location_name','city','stadium_sport']].apply(
        lambda x: findWikiMatches(
            dict(zip(['stadium_id','location_name','city','stadium_sport'],x)),
            city_field = 'Location',
            name_field = 'Arena',
            type_field = 'Opened',
            data = capacity_nhl
        ), axis=1
    ))
)

In [618]:
# Output candidate matches to csv for manual review:

wiki_matches_fb.to_csv("./tmp/wiki_matches_football_baseball.csv", index=False)
wiki_matches_nba.to_csv("./tmp/wiki_matches_nba.csv", index=False)
wiki_matches_nhl.to_csv("./tmp/wiki_matches_nhl.csv", index=False)

In [43]:
# Read in the manually verified matches:

wiki_data = pd.DataFrame()
for sport in ['nba','nhl','football_baseball']:
    wiki_data = wiki_data.append(pd.read_csv(f"./data/manual-matches/wiki_matches_{sport}.csv"))
    
wiki_data = wiki_data[wiki_data['match']==1].reset_index()

In [44]:
# Merge capacity data with subsidies by stadium table:


subsidies_and_capacity_by_stadium = subsidies_by_stadium.merge(
    wiki_data, 
    how = 'left', 
    left_on = ['stadium_id','stadium_sport'], 
    right_on = ['stadium_id','stadium_sport'],
)[[
    'stadium_id',
    'public_cost_2010',
    'stadium_sport',
    'location_name',
    'city',
    'match',
    'wiki_name',
    'wiki_city',
    'capacity'
]]

In [45]:
# Read in the data on games:

stadiums_games = {}
stadiums_games['hockey'] = pd.read_csv(
    "/home/user/projects/stadiums/data/processed/descriptive/hockey_stadiums_summary_2018.csv"
)
stadiums_games['basketball'] = pd.read_csv(
    "/home/user/projects/stadiums/data/processed/descriptive/basketball_stadiums_summary_2018.csv"
)
stadiums_games['baseball'] = pd.read_csv(
    "/home/user/projects/stadiums/data/processed/descriptive/baseball_stadiums_summary_2018.csv"
)
stadiums_games['football'] = pd.read_csv(
    "/home/user/projects/stadiums/data/processed/descriptive/football_stadiums_summary_2018.csv"
)

stadiums_games['all'] = pd.DataFrame()

In [46]:
# Group games data by stadium:

for sport in ['hockey','football','basketball','baseball']:
    stadiums_games[sport] = stadiums_games[sport].groupby(['stadium_id']).agg({'games': np.sum})
    stadiums_games[sport] = stadiums_games[sport].reset_index()
    stadiums_games[sport]['stadium_sport'] = sport
    stadiums_games['all'] = stadiums_games['all'].append(stadiums_games[sport])

In [48]:
# Gather all data on subsidies, capacity and games into the final table 'subsidies_descriptive':

subsidies_descriptive = subsidies_and_capacity_by_stadium[
    subsidies_and_capacity_by_stadium['stadium_sport']!='soccer'
].merge(
    stadiums_games['all'], 
    how = 'inner', 
    on = ['stadium_id','stadium_sport'])

subsidies_descriptive = subsidies_descriptive[subsidies_descriptive['capacity'].notnull()]

In [51]:
# Number of stadiums in the final table:

print(subsidies_descriptive['stadium_id'].nunique())

82


In [None]:
################################################################################
####################### Part III: Cost-Benefit Analysis ########################
################################################################################

In [52]:
# Define the externality multiplier on additional stadium visits (using the main results in the paper)

visits_coef = {
    'basketball': 0.1963 + 0.0097,
    'baseball': 0.2929 + 0.0648,
    'football': 0.3978 + 0.1258,
    'hockey': 0.1963 + 0.0097}

In [53]:
# Define average capacity load on game days by sport, using the data from wiki: 
# https://en.wikipedia.org/wiki/List_of_attendance_figures_at_domestic_professional_sports_leagues

capacity_coef = {'basketball': 0.9406, 'baseball': 0.6647, 'football': 0.9613 , 'hockey': 0.9517}#

In [60]:
# Define the dollar per customer value assumed for each business visit, and the intererst rate:

individial_monetary_value = 15
risk_free_rate = 0.06

In [54]:
# Merge the above defined estimates to the main table:

subsidies_descriptive['visits_coef'] =  subsidies_descriptive['stadium_sport'].map(lambda x: visits_coef[x])
subsidies_descriptive['capacity_coef'] =  subsidies_descriptive['stadium_sport'].map(lambda x: capacity_coef[x])

In [55]:
# Read in data on stadium visits on game/no-game days

game_days = pd.read_csv('/home/user/projects/stadiums/data/processed/descriptive/stadium_game_no_game_visits.csv')
game_days = game_days.pivot(index=['stadium_id','sport'], columns='game', values='stadium_visits')
game_days = game_days.reset_index()
game_days.columns = ['stadium_id','stadium_sport','visits_nogame','visits_game']

# Compute the share of visits on non-game days (as a fraction of the average visits on game days) by stadium:

game_days['visits_ng_share'] = game_days['visits_nogame']/game_days['visits_game']
game_days = game_days.groupby(['stadium_id']).agg({'visits_ng_share': 'mean'}).reset_index()

In [56]:
# Merge the no-game visit shares to the main table:

subsidies_descriptive = subsidies_descriptive.merge(game_days, how='left', on='stadium_id')

In [63]:
# Function to caclulate discounted years to return value:

def yearsToReturn(cost,cash_flow,rate):
    if rate: 
        try:
            ytr = -1 * (math.log(1 - (cost * rate / cash_flow)))/math.log(1+rate)
        except ValueError:
            ytr = float('inf')
    else:
        ytr = cost/cash_flow
    return ytr

In [None]:
# Function to calculate annual benefits to the surrounding businesses for each stadium:

def calculateBenefits(beta,e_value, games, capacity, capacity_share, no_game_share, no_game_scale):
    gameAttendance = (games * capacity *capacity_share)
    noGameAttendance = ((365-games) * capacity * capacity_share * no_game_share)
    totalAttendance = gameAttendance + noGameAttendance * no_game_scale
    return (totalAttendance * beta * e_value / (10.0**6), totalAttendance/(10.0**6))


In [66]:
# Function to print final summary tables:

def printSummary(df, e_value, no_game_scale):
    data = df.copy()
    data = data[data['public_cost_2010']>0]
    data['annual_visits_est'] = data[
        [
        'visits_coef',
        'games',
        'capacity',
        'capacity_coef',
        'visits_ng_share'
        ]
    ].apply(
        lambda x: calculateBenefits(x[0],e_value,x[1],x[2],x[3],x[4],no_game_scale)[1], axis=1
    )
    data['annual_benefit_est'] = data[
        [
        'visits_coef',
        'games',
        'capacity',
        'capacity_coef',
        'visits_ng_share'
        ]
    ].apply(
        lambda x: calculateBenefits(x[0],e_value,x[1],x[2],x[3],x[4],no_game_scale)[0], axis=1
    )
    
    data = data.groupby(['stadium_id','location_name']).agg({
        'stadium_sport': (lambda x: ','.join(x)),
        'annual_visits_est': 'sum',
        'annual_benefit_est': 'sum',
        'public_cost_2010': 'first'
    })
    data['sport'] = data['stadium_sport'].map(
        lambda x: "hockey or basketball" if x in ['basketball', 'hockey', 'hockey,basketball'] else x
    )
    data['years_to_return_nd'] = data[['public_cost_2010','annual_benefit_est']].apply(
        lambda x: yearsToReturn(x[0],x[1],0),
        axis = 1
    )
    
    data['years_to_return_d'] = data[['public_cost_2010','annual_benefit_est']].apply(
        lambda x: yearsToReturn(x[0],x[1],risk_free_rate),
        axis = 1
    )
    
    data['npv_30y'] = data[['public_cost_2010','annual_benefit_est']].apply(
        lambda x: x[1]*(( 1 - (1 + risk_free_rate) ** (-30) )/risk_free_rate) - x[0],
        axis = 1
    )
    
    stats = ['mean', percentile(0.25), percentile(0.50), percentile(0.75)]
    variables = [
        'annual_visits_est','annual_benefit_est','public_cost_2010',
        'npv_30y'
    ]
    aggfuncs = { v: stats for v in variables }
        
    summ_all = data.groupby([True]*len(data)).agg(aggfuncs).round(2)

    sum_by_sport = data.groupby(['sport']).agg(aggfuncs).round(2)
    
    res = summ_all.append(sum_by_sport).rename(
        columns={
            'annual_benefit_est': 'Annual benefits ($M)',
            'annual_visits_est': 'Annual attendance (m)',
            'public_cost_2010': 'Public costs at 2010 ($M)',
            'npv_30y': 'NPV over 30 years ($M)',
#             'years_to_return_nd':'Payback period (years, not discounted)',
#             'years_to_return_d':'Payback period (years, discounted)',
            'percentile_25': 'Q25',
            'percentile_50': 'Median',
            'percentile_75': 'Q75',
            'mean': 'Mean'
        }
    ).stack(0)[['Mean','Q25','Median','Q75']]
    return res
        