# Count number of responses for various topics
The tagging of comments makes it such that we must add together a few different sub-counts to get the total

In [1]:
import numpy as np
import pandas as pd
from IPython.display import display, Markdown, Latex
df = pd.read_csv('responses_complete.csv', index_col=None, parse_dates=['date'])

#don't truncate results
pd.set_option('display.max_rows', None) # display all rows
pd.set_option('display.max_colwidth', -1) # display full text of columns


## Number of comments about 129/Maple

In [2]:
filtered = (df['problem_tags_maple'] == 1) | (df['problem_tags_129'] == 1) | (df['problem_tags_van wyck'] == 1)
df2 = df[filtered]
display(df2.shape[0])

105

## Number of comments about S Riverside

In [3]:
filtered = (df['problem_tags_s riverside'] == 1) | (df['problem_tags_shoprite'] == 1)
df2 = df[filtered]
display(df2.shape[0])

83

## Number of comments about Cleveland

In [4]:
filtered = (df['problem_tags_cleveland'] == 1) | (df['problem_tags_five corners'] == 1) | (df['problem_tags_cet'] == 1)
df2 = df[filtered]
display(df2.shape[0])

60

## Number of comments about Benedict Boulevard


In [5]:
filtered = (df['problem_tags_benedict'] == 1) | (df['problem_tags_gottwald circle'] == 1)
df2 = df[filtered]
display(df2.shape[0])

40

## Number of comments about sidewals

In [6]:
filtered = (df['problem_tags_sidewalk condition'] == 1) | (df['problem_tags_missing sidewalks'] == 1) | (df['problem_tags_bushes on sidewalk'] == 1) | (df['suggestion_tags_more sidewalks'] == 1) | (df['suggestion_tags_maintain sidewalks'] == 1)
df2 = df[filtered]
display(df2.shape[0])

207

## Number of comments about crosswalks

In [7]:
filtered = (df['problem_tags_missing crosswalks'] == 1) | (df['suggestion_tags_maintain crosswalks'] == 1) | (df['suggestion_tags_more crosswalks'] == 1)
df2 = df[filtered]
display(df2.shape[0])

57

## Number of comments about driver behavior

In [8]:
filtered = (df['problem_tags_aggressive driving'] == 1) | (df['problem_tags_speeding'] == 1) | (df['problem_tags_driver awareness'] == 1) | (df['suggestion_tags_enforce speed'] == 1) | (df['suggestion_tags_reduce speed'] == 1) | (df['suggestion_tags_speed bumps'] == 1) | (df['suggestion_tags_speed cameras'] == 1) | (df['suggestion_tags_traffic calming'] == 1)    
df2 = df[filtered]
display(df2.shape[0])

227

## Insights into school bus ridership

### Number respondents who say they don't have school bus service available

In [29]:
num_no_bus = df['child_drive_reason_Lack of available busing where we live'].sum()
num_no_bus

7

### Frequency with which students take the bus

In [27]:
df['child_bus_freq'].value_counts()

Every day, or almost every day    113
Rarely or never                   93 
Once in a while                   15 
Once every few days               9  
Name: child_bus_freq, dtype: int64

In [43]:
df['child_bus_freq'].value_counts(normalize=True).mul(100).round(1).astype(str) + '%'

Every day, or almost every day    49.1%
Rarely or never                   40.4%
Once in a while                   6.5% 
Once every few days               3.9% 
Name: child_bus_freq, dtype: object

### Number of respondents who told us about their school-aged children

In [30]:
total_respondents_with_kids = df['child_bus_freq'].value_counts().sum()
total_respondents_with_kids

230

### Estimate of how many respondents don't have bus service where they live
Based on our understanding that the school district has 1600 students, including 1,048 with bus service, we estimate that around 35% do not have bus service available where they live.

In [35]:
estimate_no_bus_service = round(total_respondents_with_kids * 0.35)
estimate_no_bus_service

80

In [40]:
estimate_no_bus_service_as_percent = round((estimate_no_bus_service / total_respondents_with_kids) * 100)
str(estimate_no_bus_service_as_percent) + "%"

'35%'

### Adjustment to account for estimated number of students who don't have bus service available

In [63]:
df2 = df['child_bus_freq'].value_counts()
# subtract estimated number of students without bus service from the 'rarely or never' response count
df2.iloc[1] = df2.iloc[1] - estimate_no_bus_service 
df2

Every day, or almost every day    113
Rarely or never                   13 
Once in a while                   15 
Once every few days               9  
Name: child_bus_freq, dtype: int64

Total number of respondents in our revised estimate

In [65]:
total_respondents_with_kids = df2.sum()
total_respondents_with_kids

150

In [72]:
df2.apply(lambda x: str(round(x/df2.sum() * 100)) + '%' )

Every day, or almost every day    75%
Rarely or never                   9% 
Once in a while                   10%
Once every few days               6% 
Name: child_bus_freq, dtype: object