## Open up the scrubbed data file

In [None]:
import numpy as np
import pandas as pd
from IPython.display import display, Markdown, Latex, clear_output
df = pd.read_csv('responses_scrubbed_tagged.csv', index_col=None, parse_dates=['date'])

#don't truncate results
pd.set_option('display.max_rows', None)

## Categorize every free text response from every respondent

In [None]:
# fields containing free text responses
fields = ['feelings', 'problem_areas', 'suggested_improvements', 'additional_comments', 'business_additional_comments', 'final_comments']

# what row index number to start from (useful if interrputed)
start_from = 103 #default to zero if starting from beginning

def get_unique_values_from_csv_field(column, df):
    combo_vals = df[column].tolist() # contains comma-separated sets of tags
    single_vals = [] # will contain single tags
    for combo in combo_vals:
        list_of_vals = str(combo).split(',') # break up comma-seprated values into single values
        list_of_vals = [val.strip() for val in list_of_vals] # remove whitespace
        single_vals.extend(list_of_vals) # add single values to the ultimate list
        
    # return the value counts
    return single_vals
    
# start with a blank list of tags
problem_tags = []
suggestion_tags = []

# count rows we loop through
counter = 0

# loop through every row in the data
for index, row in df.iterrows():
    
    #skip to starting row
    if counter < start_from:
        counter += 1
        continue

    # determine whether we have already tagged this respondent's free text responses
    my_problems = ''
    my_suggestions = ''
    try:
        my_problems = df.loc[index]['problem_tags']
        my_suggestions = df.loc[index]['suggestion_tags']
        problem_tags = get_unique_values_from_csv_field('problem_tags', df)
        suggestion_tags = get_unique_values_from_csv_field('suggestion_tags', df)
        #display('...got em')        
    except:
        # these columns must not exist yet, so create them
        df['problem_tags'] = ''
        df['suggestion_tags'] = ''
        df['problem_tags'] = df['problem_tags'].astype(str)
        df['suggestion_tags'] = df['suggestion_tags'].astype(str)
        #display('...created em')
    
    clear_output() # wipe the output clear    
    
    display(Markdown('## Response #{}'.format(index)))
    
    # display each row's free text fields:
    for field in fields:
        # display this person's response to this question if they gave one
        value = row[field]
        if not pd.isnull(row[field]):
            display(Markdown('### {} comment\n{}'.format(field, value)))
        
    # ask the user for tags
    display(Markdown('## Tag this response'))
    display(Markdown("Enter tags that represent this person's complaints and suggestions.  Re-use tags used in others' responses, where applicable."))
    
    # display commonly used tags
    display(Markdown('### Common complaints'))
    display(pd.Series(problem_tags).value_counts())

    display(Markdown('### Common suggestions'))
    display(pd.Series(suggestion_tags).value_counts())
    
    # allow editor to tag this response
    my_problems = input("Complaints (or hit Enter to skip): ").strip()
    my_suggestions = input("Suggestions (or hit Enter to skip): ").strip()
    
    # store the tags in this record
    df.at[index,'problem_tags'] = my_problems
    df.at[index,'suggestion_tags'] = my_suggestions
    
    # update lists of tags that have already been used
    problem_tags = get_unique_values_from_csv_field('problem_tags', df)
    suggestion_tags = get_unique_values_from_csv_field('suggestion_tags', df)

    # save these tags to show later
    #p = [val.strip() for val in my_problems.split(',')]
    #s = [val.strip() for val in my_suggestions.split(',')]
    #problem_tags.extend(p)
    #suggestion_tags.extend(s)
    #problem_tags.sort()
    #suggestion_tags.sort()
    
    # increment counter
    counter += 1
    
    # save to file immediately just in case we have to interrupt the script mid-way
    df.to_csv('responses_scrubbed_tagged.csv', index=False)
    

## Response #131

### feelings comment
Depends on where I am walking. 

### suggested_improvements comment
More driver and cyclist awareness to their surroundings. 

### additional_comments comment
Areas of no sidewalks, I.e. sunset trail, river view trails. 

## Tag this response

Enter tags that represent this person's complaints and suggestions.  Re-use tags used in others' responses, where applicable.

### Common complaints

nan                                   425
missing sidewalks                      32
speeding                               32
sidewalk condition                     16
cyclists breaking rules                15
cpa                                    15
driver awareness                       13
s riverside                            12
129                                    11
maple                                  11
                                        9
road surface                            8
benedict                                7
municipal place                         7
cleveland                               7
aggressive drivers                      7
dummy light                             6
van wyck                                6
shoprite                                5
five corners                            5
olcott                                  5
poor lighting                           5
narrow roads                            5
mt airy                           

### Common suggestions

nan                                                    428
bike lanes                                              26
more sidewalks                                          21
reduce speed                                            14
more crosswalks                                         13
educate cyclists                                        11
enforce speed                                           10
maintain sidewalks                                       9
maintain crosswalks                                      8
                                                         7
street lights                                            5
outlaw cycling                                           4
improve route 9 access                                   4
enforce trimming of bushes                               3
traffic calming                                          3
enforce crosswalks                                       3
sidewalk on mt airy s                                   

## Save changes to a new CSV file

In [None]:
df.to_csv('responses_scrubbed_tagged.csv', index=False)