In [1]:
import numpy as np
import pandas as pd
from IPython.display import display, Markdown, Latex
df = pd.read_csv('responses_complete.csv', index_col=None, parse_dates=['date'])

#don't truncate results
pd.set_option('display.max_rows', None)


In [2]:
# a dictionary of columns that contain comma-separated multiple answer options
multiple_answer_options = {
    'schools': ['CET', 'PVC', 'CHHS', 'Homeschooled'],
    'child_drive_reason': ['No - we do not drive, or prefer not to drive', 'Our own personal preference', 'Lack of available busing where we live', 'The bus schedule does not match our schedule', 'Safety concerns with buses', 'Safety concerns with walking', 'Safety concerns with bicycling', "My child's health condition"],
    'child_no_walk_reason': ['No - they walk a lot', 'My child does not like to walk', 'We live too far to walk', "We don't have time to walk", 'Fear of dangerous driving', 'Lack of adequate sidewalks', 'Lack of adequate crosswalks at busy intersections', 'Lack of crossing guards at busy intersections', "My child's health condition", 'Visually unappealing route'],
    'child_no_bike_reason': ['No - they bicycle a lot', 'My child does not like to bicycle', 'My child is too young to bicycle', 'We live too far to bicycle', 'Our own personal preference', 'Fear of dangerous driving', 'Lack of adequate bike lanes', "My child's health condition", 'Visually unappealing route', 'Hills'],
    'no_walk_reason': ['No - I walk a lot', 'I live too far to walk', 'I do not like to walk', "I don't have time to walk", 'Fear of dangerous driving', 'Lack of adequate sidewalks', 'Lack of adequate crosswalks at intersections', 'Lack of crossing guards at intersections', "My health condition", 'My own personal preference', 'Visually unappealing'], 
    'no_bike_reason': ['No - I bicycle a lot', 'I live too far to bicycle', 'I do not like to bicycle', "I don't have time to bicycle", 'Fear of dangerous driving', 'Lack of adequate bike lanes', 'My health condition', 'My own personal preference', 'Visually unappealing'],
    'drive_reason': ['No - I do not drive, or prefer not to drive', "I don't have time to walk or bicycle", 'Safety concerns with walking', 'Safety concerns with bicycling', 'My own health condition', 'My own personal preference']
}

# remember that there was also an 'other' field where users could enter custom answers... we'll deal with that later.

In [3]:
#add the most popular complaint tags to the dictionary of questions and their answer options
multiple_answer_options['problem_tags'] = [ 'speeding', 'missing sidewalks', 'cpa', 'sidewalk condition', 'aggressive driving', 'driver awareness', 's riverside', '129', 'maple', 'cyclists breaking rules', 'cleveland', 'road surface', 'municipal place', 'missing crosswalks', 'bushes on sidewalk', 'grand', 'narrow roads', 'mt airy', 'shoprite', 'benedict', 'five corners', 'van wyck', 'dummy light', 'crossing guard', 'gottwald circle', 'n riverside', 'poor lighting', 'croton commons', 'truesdale', 'hmb trail', 'cet', 'on-street parking', 'old post n', 'olcott', 'old post s', 'morningside' ]

#add the most popular suggestion tags to the dictionary of questions and their answer options
multiple_answer_options['suggestion_tags'] = [ 'bike lanes', 'more sidewalks', 'enforce speed', 'maintain sidewalks', 'reduce speed', 'educate cyclists', 'more crosswalks', 'speed bumps', 'no bike lanes', 'maintain crosswalks', 'enforce crosswalks', 'sidewalk on mt airy s', 'reduce on-street parking', 'more traffic lights', 'better lighting', 'more stop signs', 'dedicated pedestrian walk signals', 'more signs', 'improve route 9 access', 'educate pedestrians', 'outlaw cycling', 'enforce trimming of bushes', 'speed cameras', 'parking on one side only', 'bike racks', 'traffic calming', 'speed indicator signs' ]


## Response counts to each multiple select question

In [4]:
# loop through each question
for question in multiple_answer_options.keys():    
    display(Markdown('## {}:'.format(question)))

    # calculate both absolute numbers and percentages for each answer option
    df_abs = df.filter(regex=("^{}_.*".format(question))).sum().sort_values(ascending=False).to_frame()
    df_per = df_abs.apply(lambda x: 100 * x / x.sum()).round(0).astype(int).sort_values(by=0,ascending=False).astype(str) + '%'
    
    # merge both absolute numbers and percentages together into one DataFrame
    df_both = pd.merge(df_abs, df_per, right_index=True, left_index=True)
    
    # prettify the row indices
    df_both = df_both.reset_index()
    df_both['index'] = df_both['index'].str.replace("{}_".format(question), '')
    df_both = df_both.rename(columns={'0_x': 'number', '0_y': 'percent', 'index': 'response'})
    df_both.set_index('response', inplace=True)
    
    # show the data
    display(df_both)
    
    # show a few 'other' answers
    display(Markdown('#### other responses:'))
    s_others = df['{}_other'.format(question)]
    display(s_others.value_counts()[:10])


## schools:

Unnamed: 0_level_0,number,percent
response,Unnamed: 1_level_1,Unnamed: 2_level_1
CET,104,40%
PVC,78,30%
CHHS,70,27%
Homeschooled,5,2%


#### other responses:

Circle School                        3
St. Augustine                        2
Happy Hearts                         2
Masters                              1
Currently pre-school but soon CET    1
Circle                               1
Circle School/Asbury Play & Learn    1
Windward and Hackley                 1
Happy Hearts Daycare!                1
Nursery School in Briarcliff         1
Name: schools_other, dtype: int64

## child_drive_reason:

Unnamed: 0_level_0,number,percent
response,Unnamed: 1_level_1,Unnamed: 2_level_1
No - we do not drive or prefer not to drive,77,36%
Our own personal preference,45,21%
The bus schedule does not match our schedule,35,16%
Safety concerns with walking,24,11%
Safety concerns with bicycling,17,8%
Lack of available busing where we live,7,3%
Safety concerns with buses,5,2%
My child's health condition,3,1%


#### other responses:

Weather                               2
Kids running late                     2
Bad weather                           2
Weather related events                1
kids too young                        1
inclement weather or lateness         1
Drive to Happy Hearts                 1
Bad monitoring of bus behavior.       1
Drive to nursery. Bus to CET          1
Teenagers can't wake up in morning    1
Name: child_drive_reason_other, dtype: int64

## child_no_walk_reason:

Unnamed: 0_level_0,number,percent
response,Unnamed: 1_level_1,Unnamed: 2_level_1
We live too far to walk,59,22%
No - they walk a lot,49,18%
We don't have time to walk,38,14%
Lack of adequate sidewalks,37,14%
Fear of dangerous driving,37,14%
Lack of adequate crosswalks at busy intersections,22,8%
My child does not like to walk,11,4%
Lack of crossing guards at busy intersections,9,3%
My child's health condition,2,1%
Visually unappealing route,1,0%


#### other responses:

Too young                                      5
Fear of dangerous drivers                      1
One walks and the other takes the bus          1
My child is too young. Having said that        1
My child carpools with friends                 1
Nursery school                                 1
Laziness                                       1
too young                                      1
Drives child to school. He walks back home.    1
School is out of district                      1
Name: child_no_walk_reason_other, dtype: int64

## child_no_bike_reason:

Unnamed: 0_level_0,number,percent
response,Unnamed: 1_level_1,Unnamed: 2_level_1
Fear of dangerous driving,66,24%
My child is too young to bicycle,57,20%
Lack of adequate bike lanes,46,16%
Hills,29,10%
We live too far to bicycle,27,10%
My child does not like to bicycle,27,10%
Our own personal preference,21,8%
My child's health condition,3,1%
Visually unappealing route,2,1%
No - they bicycle a lot,2,1%


#### other responses:

Too young                                                                                                3
Once in a while                                                                                          1
No side walks. South Mt Airy people go 45+ miles and hour                                                1
She's just not biking yet.                                                                               1
N/a                                                                                                      1
See above other                                                                                          1
And arrive at school sweaty? This is not the 60’s or 70’s.                                               1
First year we are considering it is this year for pvc                                                    1
too young                                                                                                1
She biked or skootered a lot when we 

## no_walk_reason:

Unnamed: 0_level_0,number,percent
response,Unnamed: 1_level_1,Unnamed: 2_level_1
No - I walk a lot,210,30%
Lack of adequate sidewalks,113,16%
I don't have time to walk,87,13%
My own personal preference,65,9%
Fear of dangerous driving,56,8%
I live too far to walk,51,7%
Lack of adequate crosswalks at intersections,46,7%
My health condition,27,4%
Visually unappealing,19,3%
I do not like to walk,9,1%


#### other responses:

Hills                                                                                                                           3
Sidewalks in bad condition coupled with hills is bad for my back and knees. Would walk more  if sidewalks were taken care of    1
I walk a lot but not as much as I’d like because of time constraints                                                            1
Timing                                                                                                                          1
Children pickups and drop offs                                                                                                  1
Looking for faster options                                                                                                      1
bike is faster                                                                                                                  1
coming back up mt. airy too hard                                                          

## no_bike_reason:

Unnamed: 0_level_0,number,percent
response,Unnamed: 1_level_1,Unnamed: 2_level_1
Fear of dangerous driving,126,21%
Lack of adequate bike lanes,124,21%
My own personal preference,90,15%
I do not like to bicycle,84,14%
I don't have time to bicycle,64,11%
No - I bicycle a lot,57,10%
My health condition,26,4%
I live too far to bicycle,20,3%
Visually unappealing,6,1%


#### other responses:

Hills                                                                                                                                          4
Fear of dangerous drivers                                                                                                                      3
Don’t know how                                                                                                                                 2
I currently don’t own a bicycle                                                                                                                2
We live on Mt Airy so returning to the house is a steep climb uphill.                                                                          1
No                                                                                                                                             1
very hilly and too many blind curves with fast cars                                                                               

## drive_reason:

Unnamed: 0_level_0,number,percent
response,Unnamed: 1_level_1,Unnamed: 2_level_1
My own personal preference,171,32%
I don't have time to walk or bicycle,134,25%
Safety concerns with bicycling,95,18%
Safety concerns with walking,60,11%
No - I do not drive or prefer not to drive,39,7%
My own health condition,28,5%


#### other responses:

Hills                                       6
Time                                        5
Too far                                     3
time                                        3
Convenience                                 3
Distance from my airy                       2
Sometimes I walk                            1
Require a vehicle for emergency response    1
Too far to carry groceries back             1
Hills.                                      1
Name: drive_reason_other, dtype: int64

## problem_tags:

Unnamed: 0_level_0,number,percent
response,Unnamed: 1_level_1,Unnamed: 2_level_1
speeding,125,9%
missing sidewalks,122,9%
cpa,87,6%
sidewalk condition,83,6%
aggressive driving,75,6%
driver awareness,74,6%
s riverside,70,5%
129,52,4%
maple,48,4%
road surface,46,3%


#### other responses:

crosswalk condition      6
radnor                   6
bungalow                 6
albany post road         5
upper village            5
side-by-side cyclists    4
station lot              4
nordica                  4
library                  4
pvc                      4
Name: problem_tags_other, dtype: int64

## suggestion_tags:

Unnamed: 0_level_0,number,percent
response,Unnamed: 1_level_1,Unnamed: 2_level_1
bike lanes,104,15%
more sidewalks,98,15%
enforce speed,66,10%
maintain sidewalks,64,9%
reduce speed,46,7%
educate cyclists,39,6%
more crosswalks,38,6%
speed bumps,24,4%
no bike lanes,17,3%
maintain crosswalks,16,2%


#### other responses:

lower speed limit                                      4
no parking next to corners                             3
clearer signs                                          3
lower speed limits                                     3
sharrows                                               3
dedicated bicycle trails                               3
more parking                                           2
raise speed limits                                     2
outlaw walking in road                                 2
require cyclists wear reflective apparel and lights    2
Name: suggestion_tags_other, dtype: int64