In [6]:
#Week 10 deliverable ipynb notebook
#Group: The Data Pipers
#Members: Diego Martinez Echevarria, dmartinezechevarria@gmail.com, Spain/Denmark, Data Analyst
#         Jennifer Turley, jennifer.turley@ucdconnect.ie, Ireland, Data Analyst
#Problem Description: 
    #Data Collection Pipeline, this project will collect data from surveys, 
    #converting the data into useful information
#Github Repo Link: 
    #https://github.com/dme48/data-pipers
#EDA performed on the data: 
    #This question is not yet applicable to our project. 
    #Data is being collected via the conformed surveys and the pipeline.  
#Final Recommendation    
    #Our final recommendation for our surveys is to use primarily Typeform, relying 
    #less on Survey Monkey and Google Forms. Typeform allows for better formatting 
    #of questions and more free responses than either Survey Monkey or Google Forms. 
    #We are therefore sharing the link for our Typeform survey more widely. 
    #Our survey links are still active and collecting data; this  data is being 
    #successfully extracted and transformed into a Pandas dataframe utilizing the 
    #pipeline code. We will subsequently do an analysis of the data.  

#some of the code in progress is below, which can also be found at the above github link

In [7]:
###Functions related to the gathering of data from online forms

In [8]:
#required packages
import os
import json
import pandas as pd
from typing import Iterable
from typeform import Typeform

In [9]:
#Fetches answers
def fetch_typeform(login_filename: str = "login",
                   field_ids_filename="question_ids.json") -> pd.DataFrame:

    
    ids_to_fields = read_field_ids(field_ids_filename)
    (token, form_id) = read_login(login_filename)
    responses = Typeform(token).responses
    query_result: dict = responses.list(form_id)

    answers = extract_answers(query_result, ids_to_fields)

    return pd.DataFrame.from_dict(answers)


In [10]:
#Reads the file (filename) and returns it as a dictionary
def read_field_ids(filename: str):
    path = os.path.dirname(os.path.realpath(__file__)) + "/" + filename
    with open(path, "r") as json_file:
        question_ids = json.load(json_file)
    return question_ids

In [11]:
#Reads the login file (filename) and returns the token and the form ID
def read_login(filename: str):
    path = os.path.dirname(os.path.realpath(__file__)) + "/" + filename
    with open(path, "r") as f:
        login_fields = f.read().split("\n")
    return login_fields[0:2]


In [12]:
#Extracts the answers from a Typeform formatted dict (single solved form)
def extract_answers(query_response: dict, ids_to_fields: dict):
    #Arguments:
        #query_response (dict): Typeform's dict containing all responses
        #ids_to_fields (dict): Connects question ids to corresponding fields
    #Returns:
        #dict containing the (formatted) answers to the form, in the shape of
        #{field_a: [answer_a, answer_b, ...],
        # field_b: [answer_a, answer_b, ...],...}.
        #If no answer was found at some point, then its answer is None, which
       # means that every list has the same length.
   
    fields = [f for f in ids_to_fields.values()]
    final_answers = {f: [] for f in fields}
    for answer_set in query_response["items"]:
        remaining_fields = set(fields)

        for question in answer_set["answers"]:
            field = ids_to_fields[question["field"]["id"]]
            answer = format_answer(question)
            final_answers[field].append(answer)
            remaining_fields.discard(field)
        for field in remaining_fields:
            final_answers[field].append(None)

    return final_answers


FORMAT_GUIDE = {
    "text": lambda x: x,
    "boolean": lambda x: x,
    "number": lambda x: float(x),
    "choice": lambda x: x["label"],
    "choices": lambda x: x["labels"]
}



In [13]:
#Extracts the answer from a question and formats it
def format_answer(question):
    
    answer_type = question["type"]
    answer = question[answer_type]

    return FORMAT_GUIDE[answer_type](answer)

In [14]:
#for SurveyMonkey
from surveymonkey.client import Client
import os
import json
import pandas as pd

In [15]:
def load_login(filename: str = "login.json") -> dict:
    """Reads surveymonkey credentials from filename and returns them as dict"""
    path = os.path.dirname(os.path.realpath(__file__)) + "/" + filename
    with open(path, "r") as f:
        login = json.load(f)
    return login

In [16]:
def load_config(filename: str = "form_config.json") -> tuple:
    """
    Loads the config file at filename and returns a tuple containing its two
    dictionaries.
    """
    path = os.path.dirname(os.path.realpath(__file__)) + "/" + filename
    with open(path, "r") as f:
        config = json.load(f)
    return config

In [17]:
def extract_questions(response):
    questions = response["pages"][0]["questions"]
    for question in questions:
        id = question["id"]
        answer = question["answers"]
        print(answer)

In [18]:
def extract_answers(query_response: dict, config: dict) -> dict:
    """
    Extracts the answers from a Survey Monkey single form
    Arguments:
        query_response (dict): Survey monkey's dict containing all responses
        id_to_field (dict): Connects question ids to corresponding fields
    Returns:
        dict containing the (formatted) answers to the form, in the shape of
        {field_a: [answer_a, answer_b, ...],
         field_b: [answer_a, answer_b, ...],...}.
        If no answer was found at some point, then its answer is None, which
        means that every list has the same length.
    """
    final_answers = {f: [] for f in config["id_to_field"].values()}

    for filled_form in query_response["data"]:
        remaining_fields = set(config["id_to_field"].values())

        question_set = filled_form["pages"][0]["questions"]

        for question in question_set:
            field = config["id_to_field"][question["id"]]
            answer = format_answer(
                question["answers"], field, config["id_to_choice"])
            final_answers[field].append(answer)
            remaining_fields.discard(field)

        for field in remaining_fields:
            final_answers[field].append(None)

    return final_answers



In [19]:
def format_answer(answer: dict, field: str, id_to_choice: dict):
    """
    Extracts the answer from a question and formats it. Accordingly
    to the FORMAT_GUIDE and its field.
    """
    FORMAT_GUIDE = {
        "default": lambda x: int(x[0]["text"]),
        "actions_taken_self": lambda x: [id_to_choice[choice_dict["choice_id"]] for choice_dict in answer]
    }
    formater = FORMAT_GUIDE.get(field, FORMAT_GUIDE["default"])
    return formater(answer)



In [20]:
def fetch_monkey():
    login = load_login()
    config = load_config()

    client = Client(
        client_id=login["client_id"],
        client_secret=login["client_secret"],
        redirect_uri=login["redirect_uri"],
        access_token=login["access_token"])

    all_responses = client.get_survey_response_bulk(login["form_id"])

    answers = extract_answers(all_responses, config)
    return pd.DataFrame.from_dict(answers)