In [None]:
year = None
month = None
program = "dfc-fermata"
study_type = "study"
mode_of_interest = None
include_test_users = False
dynamic_labels = {  }
use_imperial = False

In [None]:
#probably going to end up passing this in
survey_info =  {
    "surveys": {
      "UserProfileSurvey": {
        "formPath": "https://raw.githubusercontent.com/JGreenlee/nrel-openpath-deploy-configs/fermata-demo/survey_resources/dfc-fermata/fermata-onboarding-v0.xml",
        "version": 1,
        "compatibleWith": 1,
        "dataKey": "manual/demographic_survey",
        "labelTemplate": { "en": "Answered" }
      },
      "TripConfirmSurvey": {
        "formPath": "https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/fermata-ev-return-trip-v0.xml",
        "version": 1,
        "compatibleWith": 1,
        "dataKey": "manual/trip_user_input",
        "labelTemplate": { "en": "Answered" }
      }
    },
    "trip-labels": "ENKETO"
  }

In [None]:
from collections import defaultdict
import urllib.request
import numpy as np
import pandas as pd
from xml.dom import minidom

from plots import *
import scaffolding

sns.set_style("whitegrid")
sns.set()
%matplotlib inline

# get metric vs imperial vars
label_units, short_label, label_units_lower, distance_col, weight_unit = scaffolding.get_units(use_imperial)

In [None]:
#input: list of survey names from the config
#output: list of links to the sheets where questions/answers are
#will run n surveys times
def get_sheet_links(survey_list):
    sheet_list = []
    for name in survey_list:
        form_path = survey_info['surveys'][name]['formPath']
        #THIS ASSUMES THE FILENAME IS THE SAME AS THE FORM PATH BUT WITH xml FILE TYPE
        l_path = form_path.split('.')
        l_path[-1] = 'xml'
        s = '.'
        sheet_path = s.join(l_path)
        sheet_list.append(sheet_path)
    return sheet_list

#input: list of urls for the survey xlsx files
#output: two dictionaries to translate the ?s/ans
#will run n surveys times
def build_dictionaries(url_list):
    opt_dict = {}
    quest_dict = {}
    
    for url in sheet_list:
        result = urllib.request.urlopen(url)
        doc = minidom.parse(result) 

        labels = doc.getElementsByTagName("label") 
        for label in labels:
            if(bool(label.parentNode.getAttribute("ref"))):
                print(label.parentNode.getAttribute("ref").split('/')[-1])
                print(label.firstChild.data)
                
                quest_dict[str(label.parentNode.getAttribute("ref").split('/')[-1])] = label.firstChild.data
    
    return opt_dict, quest_dict


#input: dataframe containing all trips that have non-blank user_input
#output: dataframe with questions in the columns and answers in the rows
#for loop will run n survey responses times (this could get big!)
def create_dataframe(df_trips_w_surveys):
    df = df_trips_w_surveys.reset_index()
    rows = []
    for i in range(len(df)):
        data_key = list(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse'].keys())[0]
        row = pd.json_normalize(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse'][data_key])
        rows.append(row)
    df = pd.concat(rows)
#     print(df.head())
    
    rename_nests = {}
    for col in df.columns:
        rename_nests[col] = col.split('.')[-1]
    
    print(rename_nests)
    df = df.rename(columns=rename_nests)
    
    #drop the non-question columns, should leave behind all the questions
    #need to do this better, won't always be the same
    df = df.drop(columns = ['end', 'start', 'attrid', 'attrxmlns:orx', 'attrxmlns:orx', 'attrxmlns:jr', 'instanceID'])

    return df

#input: list of labels that will end up on the chart
#output: translated to readable list, with multiples handled
#the for loop will run n times, where num_options <= n < all possible combinations of options
#if people are selecting many different combinations, could be large
def traslate_options(labels, opt_dict):
    for i in range(len(labels)):
        l_labels = labels[i].split(" ")
        for k in range(len(l_labels)):
            print()
            l_labels[k] = opt_dict[l_labels[k]]
        sep = "\n"
        labels[i] = sep.join(l_labels)
    
    return labels

#input: all of the responses to a single survey
#output: text with num responses and users
#THIS SHOULD GO IN SCAFFOLDING AND INCLUDE WHOLE POOL AT SOME POINT
def get_text(responses):
    num_resp = len(responses)
    num_users = responses.user_id.nunique()
    quality_text = f"Based on {num_resp} responses from {num_users} users"
    print(quality_text)
    return quality_text

In [None]:
#list of all surveys that are not a "UserProfileSurvey"
survey_list = list(survey_info['surveys'].keys())
survey_list.remove('UserProfileSurvey')
sheet_list = get_sheet_links(survey_list)

print('survey sheets: ', sheet_list)

In [None]:
#load all of the confirmed trips
tq = scaffolding.get_time_query(year, month)
all_confirmed_trips = scaffolding.load_all_confirmed_trips(tq)
#remove blank inputs
survey_trips = all_confirmed_trips[all_confirmed_trips['user_input'] != {}]

#survey counts df
survey_trips = survey_trips.reset_index()
survey_trips['survey_name'] = survey_trips.user_input.apply(lambda sr: sr['trip_user_input']['data']['name'])

#gather the cols needed for charts and text
survey_trips = survey_trips[['survey_name', 'user_id', 'user_input']]
survey_trips.head()

#get quality text
qual_text = get_text(survey_trips)

In [None]:
#create translation dictionaries
opt_dict, quest_dict = build_dictionaries(sheet_list)

print(opt_dict)
print(quest_dict)

#format survey trips into responses dataframe
df_responses = create_dataframe(survey_trips)

In [None]:
file_suffix = scaffolding.get_file_suffix(year, month, program)

#create one plot per question
for col in df_responses.columns:
    filename = col + file_suffix
    print(filename)
    
    quest_frame = df_responses.copy()
    quest_frame = quest_frame[quest_frame[col] != ""] #could have blank responses for non-mandatory ?s
    labels = quest_frame[col].value_counts(dropna=True).keys().tolist()
#     labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)
    values = quest_frame[col].value_counts(dropna=True).tolist()
     
    #if other is 0 don't display it :)
    #will only show questions in the current survey (not older versions) and that have at least 1 response
    if col in quest_dict and len(quest_frame[col]) != 0:
        pie_chart_purpose(quest_dict[col]+'\n'+qual_text, labels, values, filename)