In [2]:
import pandas as pd
import numpy as np
import warnings

pd.options.display.float_format = '{:.4f}'.format

In [3]:
def data_cleaning(csv):
    df = pd.read_csv(csv, low_memory=False)
    df = df[['street_nam', 'mgis_town', 'surfacetyp', 'surfacewid', 'assignedle', 'routesyste']]
    # converting assignedle to feet (currently in miles)
    df['assignedle_feet'] = df['assignedle'] * 5280
    df.drop('assignedle', axis=1, inplace=True)
    return df

def city_geometry(df, city_name, exclude_highways=True):
    city_df = df[(df.mgis_town == city_name)]
    if exclude_highways:
        city_df = city_df.loc[city_df['routesyste'] != "SR"]
        city_df = city_df.loc[city_df['routesyste'] != "US"]
        city_df = city_df.loc[city_df['routesyste'] != "I"]
    city_df.loc[city_df["surfacewid"]==0,"surfacewid"] = 24
    city_df["surface_area"] = city_df["assignedle_feet"]*city_df["surfacewid"]
    return city_df

def get_asphalt_concrete(df):
    asph = 0
    conc = 0
    asph_idx = set([0,2,3,4,5,7])
    conc_idx = set([6,8,9,10])
    df_surface_types = df.groupby(['surfacetyp'])['surface_area'].sum().reset_index()
    for i in range(len(df_surface_types)):
        if df_surface_types.iloc[i]["surfacetyp"] in asph_idx:
            asph+=df_surface_types.iloc[i]["surface_area"]
        elif df_surface_types.iloc[i]["surfacetyp"] in conc_idx:
            conc+=df_surface_types.iloc[i]["surface_area"]
    return asph,conc
    
def get_road_areas(city_name):
    city_name = city_name.upper()
    df = data_cleaning('./MASS_DOT_roads.csv')
    cgeom = city_geometry(df, city_name)
    asphalt_area, concrete_area = get_asphalt_concrete(cgeom)
    total_area = cgeom['surface_area'].sum()
    return asphalt_area,concrete_area,total_area

In [6]:
asphalt_area,concrete_area,total_area = get_road_areas('springfield')
print "ASPHALT AREA: {}".format(asphalt_area)
print "CONCRETE AREA: {}".format(concrete_area)
print "TOTAL AREA: {}".format(total_area)

ASPHALT AREA: 14188124.544
CONCRETE AREA: 68261919.264
TOTAL AREA: 82459864.608


In [26]:
def sanity_check(city_name, nohighways=False):
    city_name = city_name.upper()
    df = data_cleaning('./MASS_DOT_roads.csv')
    cgeom = city_geometry(df, city_name, exclude_highways=nohighways)
    df_surface_types = cgeom.groupby(['surfacetyp'])['surface_area'].sum().reset_index()
    
    # get material indices
    asph_idx = [0,6]
    conc_idx = [7]
    other_idx = [1,2,3,4,5,8,9,10]
    
    # initialize text
    asphalt_text = ""
    concrete_text = ""
    other_text = ""
    
    # initialize areas
    total_asphalt = 0
    total_concrete = 0
    total_other = 0
    
    print "ASPHALT INDICES: {}".format(asph_idx)
    print "CONCRETE INDICES: {}".format(conc_idx)
    print "OTHER INDICES: {}".format(other_idx)
    print ""
    
    # iterate through dataframe
    for i in range(len(df_surface_types)):
        if df_surface_types.iloc[i]["surfacetyp"] in asph_idx:
            area = df_surface_types.iloc[i]["surface_area"]
            total_asphalt += area
            asphalt_text += "        {}: {:,}\n".format(int(df_surface_types.iloc[i]["surfacetyp"]), area)
        if df_surface_types.iloc[i]["surfacetyp"] in conc_idx:
            area = df_surface_types.iloc[i]["surface_area"]
            total_concrete += area
            concrete_text += "        {}: {:,}\n".format(int(df_surface_types.iloc[i]["surfacetyp"]), area)
        if df_surface_types.iloc[i]["surfacetyp"] in other_idx:
            area = df_surface_types.iloc[i]["surface_area"]
            total_other += area
            other_text += "        {}: {:,}\n".format(int(df_surface_types.iloc[i]["surfacetyp"]), area)
    
    # print results
    print "CITY: {}".format(city_name)
    print "    ASPHALT ROADS: {:,}".format(total_asphalt)
    print asphalt_text.rstrip()
    print "    CONCRETE ROADS: {:,}".format(total_concrete)
    print concrete_text.rstrip()
    print "    OTHER ROADS: {:,}".format(total_other)
    print other_text.rstrip()
    
    print ""
    
    print(df_surface_types.to_string())

In [27]:
sanity_check('springfield')
# sanity_check('wellesley')
# sanity_check('pittsfield')
# sanity_check('salem')

ASPHALT INDICES: [0, 6]
CONCRETE INDICES: [7]
OTHER INDICES: [1, 2, 3, 4, 5, 8, 9, 10]

CITY: SPRINGFIELD
    ASPHALT ROADS: 82,101,166.752
        0: 6,707,733.12
        6: 75,393,433.632
    CONCRETE ROADS: 73,849.248
        7: 73,849.248
    OTHER ROADS: 7,557,743.424
        1: 9,820.8
        2: 2,102,525.04
        4: 5,385.6
        5: 5,372,480.784
        8: 67,531.2

   surfacetyp  surface_area
0           0  6707733.1200
1           1     9820.8000
2           2  2102525.0400
3           4     5385.6000
4           5  5372480.7840
5           6 75393433.6320
6           7    73849.2480
7           8    67531.2000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [7]:
# sanity_check('springfield', nohighways=False)
# sanity_check('wellesley', nohighways=False)
# sanity_check('pittsfield', nohighways=False)
# sanity_check('salem', nohighways=False)

CITY: SPRINGFIELD


NameError: global name 'highways' is not defined

In [8]:
# sanity_check('springfield', nohighways=True)
# sanity_check('springfield', nohighways=False)

CITY: SPRINGFIELD


NameError: global name 'highways' is not defined