In [1]:
import requests 
import yaml
import json
import datetime as dt
import time
import os
import random
from azure.storage.blob import BlobServiceClient, BlobType
from email_validator import validate_email, EmailNotValidError
from utils.utils import log_azure, request, load_json, clean_field_text 

with open("api-key.yaml", "r") as file:
    data = yaml.full_load(file)

# SurveyMonkey Survey
SM_DATA = data['sm']['real']
# CareerOneStop Survey 
COS_DATA = data['cos']



---

##### **Main Functions** 

* `get_qa_key()` - GET (or load cached copy) of question/answer key from SM or COS 

<br>

* `combine_qa_keys()` - Combine the SM and COS question/answer keys into one combined key/translation map between the APIs
    - Generates a refreshed map if a change is detected in the SM survey or the COS survey. 
        - The COS survey should not change at all. 
        - Changes in the SM survey should not affect its ability to match the answer keys in the COS survey.

<br>

* `get_sm_responses()` - GET SM survey responses 

<br>

* `process_sm_responses()` - filter and process new SM responses from get_sm_responses()
    - Checks against DB for already processed responses 
    - Checks if response includes valid email address (`has_valid_email()`)
    - Checks for unexpected response ids vs. the combined Q/A key.
    - Loads new responses into database (into 'processing' table) until they are finished

<br>

* `translate_post_cos()` - Use combined answer to translate the processed SM responses to COS JSON format
    - Loads responses from processing table in
    - Adds matching COS information to response questions/answers 
    - If response is missing a required skills-survey answer, fills with "beginner" 

<br>

* `compose_email()` - Extract list of recommended jobs from COS response and compose an email.

In [2]:
## GET/Load question-answer keys for SurveyMonkey Survey and CareerOneStop Skills Matcher
def get_qa_key(api=None, fetch=False) -> dict:
    """
    Load list of questions/answers from either the Survey Monkey API `/details` endpoint or from CareerOneStop. 

    Args: 

    api (str):   Must be one of 'sm' (for Skills Monkey Survey) or 'cos' (for CareerOneStop)

    fetch (bool):   Whether to GET new question/answer key from api or to just use locally saved copy (default is False). 

          If loading our cached copy fails, automatically set to True.

          If fetch == True:
            If the GET request fails, we use our cached copy.
            If the question/answer details have changed, we update our cached copy. Any such changes may break combine_qa_keys() and this app as a whole.            

        Note 500 requests/month limit to SM -- if going to use fetch option, may want to only do so periodically.

    """

    # Set SM vs. COS variables
    if api == "sm": 
        url = f"{SM_DATA['base_url']}/details"
        headers = SM_DATA['headers']
        cached_fp = SM_DATA['survey-details-fp']
    elif api == "cos":
        url = COS_DATA['url']
        headers = COS_DATA['headers']
        cached_fp = COS_DATA['survey-details-fp']
    else:
        raise Exception("`api` must be one of `sm` (SurveyMonkey) or `cos` (CareerOneStop)")
    
    # Load cached details 
    cached_key = load_json(cached_fp)
    if cached_key is None: 
        log_azure(f"WARNING: Loading {cached_fp} failed. Fetching new {api.upper()} key.")
        fetch = True

    ## Attempt Request (if fetch == True)
    fetched_key = None
    if fetch: 
        try: 
            print(url, headers)
            response = request(url=url, headers=headers, method="GET")
            if response.status_code != 200:
                log_azure(f"WARNING: GET {api.upper()} survey details -- Response Code: {response.status_code} -- Proceeding with cached file: {cached_fp}")
            else: 
                fetched_key = response.json()
        except Exception as e: 
            log_azure(f"ERROR: GET {api.upper()} survey details -- Error: {str(e)} -- Proceeding with cached file: {cached_fp}")

    ## Return Block
    if fetched_key is None and cached_key is None:
        raise Exception(f"ERROR: Failed both to fetch new copy and to load cached copy of {api.upper()} Q/A key.")  
    elif fetched_key is None: 
        return cached_key
    elif fetched_key != cached_key: # This block will never run so long as one of 
        log_azure(f"WARNING: GET {api.upper()} survey details -- Fetched Q/A key conflicts with cached copy {cached_fp} -- Updating.")           
        with open(cached_fp, "w") as file: 
            json.dump(fetched_key, file)
        return fetched_key
    else: 
        log_azure(f"INFO: GET {api.upper()} survey details -- Fetched Q/A key matches cached copy {cached_fp}")
        return fetched_key

In [35]:
## Create translation map from Survey Monkey key to COS key 
def combine_qa_keys(fetch=False) -> dict: 
    """Creates translation map from SM to COS using the question/answer keys of each.
    
    Args: 

    fetch (bool): Setting for get_qa_key() -- whether to only load local cache of question/answer keys or to fetch new copies.
    
    """

    sm_key = get_qa_key("sm", fetch=fetch)
    cos_key = get_qa_key("cos", fetch=fetch)

    ## Prepare translation map
    combined_map = {
        'non-skills-matcher':[], # not to send to COS (background questions)
        'skills-matcher':[], # to send to COS skills matcher 
    }

    ## Adding relevant SM information to map
    sm_question_number = 1
    for p in sm_key['pages']:
        for q in p['questions']:
        
            question_type = 'skills-matcher' if "skills matcher" in p['title'].lower()  else "non-skills-matcher" 
            
            answers = None  
            if 'choice' in q['family']: # single_choice, multiple_choice 
                # Main answer choices
                answers = [{'id':{'sm':a['id']}, 'text':{'sm':clean_field_text(a['text'])}} for a in q['answers']['choices']]
                # 'Other' option
                if 'other' in q['answers'].keys(): 
                    answers.append({
                        'id':{'sm':q['answers']['other']['id']},
                        'text':{'sm':clean_field_text(q['answers']['other']['text'])}
                        })
            elif q['family'] == 'datetime':
                answers = [{'id':{'sm':q['answers']['rows'][0]['id']},
                            'text':{'sm':clean_field_text(q['answers']['rows'][0]['text'])}}]

            combined_map[question_type].append({
                'question_id':{'sm':q['id']},
                'page_number': p['position'],
                'question_number':{'sm':sm_question_number}, # q['position'] gives the question's position on the current page, not its absolute number
                'question_family':q['family'],
                'question_text':{'sm':clean_field_text([h['heading'] for h in q['headings']][0])},
                'answers':answers
            })

            sm_question_number += 1

    if len(combined_map['skills-matcher']) != len(cos_key['Skills']):
        error_text = f"ERROR: No. of skills-matcher questions retrieved from SM {len(combined_map['skills-matcher'])} doesn't match number in COS {len(cos_key['Skills'])}"
        log_azure(error_text)
        raise Exception(error_text)

    for n in range(len(combined_map['skills-matcher'])): 
        cos_q = cos_key['Skills'][n]
        cos_answer_ids = [{'id':cos_q["DataPoint20"], 'text':cos_q['AnchorFirst']},
                          {'id':cos_q["DataPoint35"], 'text':cos_q['AnchorSecond']}, 
                          {'id':cos_q["DataPoint50"], 'text':cos_q['AnchorThrid']}, 
                          {'id':cos_q["DataPoint65"], 'text':cos_q['AnchorFourth']}, 
                          {'id':cos_q["DataPoint80"], 'text':cos_q['AnchorLast']}]

        combined_map['skills-matcher'][n]['question_id']['cos'] = cos_q['ElementId']
        combined_map['skills-matcher'][n]['question_number']['cos'] = n + 1 # correcting for 0 index in loop
        combined_map['skills-matcher'][n]['question_text']['cos'] = cos_q['Question']

        # Add cos answer ids and text to answers array; 
        if len(combined_map['skills-matcher'][n]['answers']) != len(cos_answer_ids):
            error_text = f"ERROR: No. of answer options in SM question #{combined_map['skills-matcher'][n]['question_number']['sm']} != number of COS answer levels."
            log_azure(error_text)
            raise Exception(error_text)
        for m in range(len(combined_map['skills-matcher'][n]['answers'])):
            combined_map['skills-matcher'][n]['answers'][m]['id']['cos'] = cos_answer_ids[m]['id']
            combined_map['skills-matcher'][n]['answers'][m]['text']['cos'] = cos_answer_ids[m]['text']

    ## Casting question lists to dictionary, with keys being the survey monkey question ids, for easier lookup in translation
    # Making these lists to start with made the previous COS insertion step easier
    combined_map['skills-matcher'] = {q['question_id']['sm']:q for q in combined_map['skills-matcher']}

    return combined_map

In [36]:
combine_qa_keys()['skills-matcher']

{'143974309': {'question_id': {'sm': '143974309', 'cos': '2.C.1.a'},
  'page_number': 21,
  'question_number': {'sm': 42, 'cos': 1},
  'question_family': 'single_choice',
  'question_text': {'sm': 'How much do you know about business planning and leadership?',
   'cos': 'How much do you know about business planning and leadership?'},
  'answers': [{'id': {'sm': '1066658702', 'cos': 1.534},
    'text': {'sm': 'Beginner (Complete a timesheet)',
     'cos': 'Complete a timesheet'}},
   {'id': {'sm': '1066658703', 'cos': 2.4145},
    'text': {'sm': 'Basic', 'cos': ''}},
   {'id': {'sm': '1066658704', 'cos': 3.295},
    'text': {'sm': 'Skilled (Monitor project progress to complete it on time)',
     'cos': 'Monitor project progress to complete it on time'}},
   {'id': {'sm': '1066658705', 'cos': 4.1755},
    'text': {'sm': 'Advanced', 'cos': ''}},
   {'id': {'sm': '1066658706', 'cos': 5.056},
    'text': {'sm': 'Expert (Manage a $10m company)',
     'cos': 'Manage a $10m company'}}]},
 '143

In [26]:
## Get question/answer keys 
fetch = False 
sm_key = get_qa_key("sm", fetch=fetch)
cos_key = get_qa_key("cos", fetch=fetch)

## Prepare translation map
combined_map = {
    'non-skills-matcher':[], # not to send to COS (background questions)
    'skills-matcher':[], # to send to COS skills matcher 
}

## Adding relevant SM information to map
sm_question_number = 1
for p in sm_key['pages']:
    for q in p['questions']:
    
        question_type = 'skills-matcher' if "skills matcher" in p['title'].lower()  else "non-skills-matcher" 
        
        answers = None  
        if 'choice' in q['family']: # single_choice, multiple_choice 
            # Main answer choices
            answers = [{'id':{'sm':a['id']}, 'text':{'sm':clean_field_text(a['text'])}} for a in q['answers']['choices']]
            # 'Other' option
            if 'other' in q['answers'].keys(): 
                answers.append({
                    'id':{'sm':q['answers']['other']['id']},
                    'text':{'sm':clean_field_text(q['answers']['other']['text'])}
                    })
        elif q['family'] == 'datetime':
            answers = [{'id':{'sm':q['answers']['rows'][0]['id']},
                        'text':{'sm':clean_field_text(q['answers']['rows'][0]['text'])}}]

        combined_map[question_type].append({
            'question_id':{'sm':q['id']},
            'page_number': p['position'],
            'question_number':{'sm':sm_question_number}, # q['position'] gives the question's position on the current page, not its absolute number
            'question_family':q['family'],
            'question_text':{'sm':clean_field_text([h['heading'] for h in q['headings']][0])},
            'answers':answers
        })

        sm_question_number += 1

if len(combined_map['skills-matcher']) != len(cos_key['Skills']):
    error_text = f"ERROR: No. of skills-matcher questions retrieved from SM {len(combined_map['skills-matcher'])} doesn't match number in COS {len(cos_key['Skills'])}"
    log_azure(error_text)
    raise Exception(error_text)

for n in range(len(combined_map['skills-matcher'])): 
    cos_q = cos_key['Skills'][n]
    cos_answer_ids = [cos_q["DataPoint20"],
                    cos_q["DataPoint35"], 
                    cos_q["DataPoint50"], 
                    cos_q["DataPoint65"], 
                    cos_q["DataPoint80"]]

    combined_map['skills-matcher'][n]['question_id']['cos'] = cos_q['ElementId']
    combined_map['skills-matcher'][n]['question_number']['cos'] = n + 1 # correcting for 0 index in loop
    combined_map['skills-matcher'][n]['question_text']['cos'] = cos_q['Question']

    # Add cos answer ids and text to answers array; 
    if len(combined_map['skills-matcher'][n]['answers']) != len(cos_answer_ids):
        error_text = f"ERROR: No. of answer options in SM question #{combined_map['skills-matcher'][n]['question_number']['sm']} != number of COS answer levels."
        log_azure(error_text)
        raise Exception(error_text)
    for m in range(len(combined_map['skills-matcher'][n]['answers'])):
        combined_map['skills-matcher'][n]['answers'][m]['id']['cos'] = cos_answer_ids[m]

## Casting question lists to dictionary, with keys being the survey monkey question ids, for easier lookup in translation
# Making these lists to start with made the previous COS insertion step easier
combined_map['skills-matcher'] = {q['question_id']['sm']:q for q in combined_map['skills-matcher']}

combined_map

{'non-skills-matcher': [{'question_id': {'sm': '144588883'},
   'page_number': 1,
   'question_number': {'sm': 1},
   'question_family': 'single_choice',
   'question_text': {'sm': 'How did you learn about the survey?'},
   'answers': [{'id': {'sm': '1070603277'}, 'text': {'sm': 'Michael'}},
    {'id': {'sm': '1070603278'}, 'text': {'sm': 'Kim'}},
    {'id': {'sm': '1070603279'}, 'text': {'sm': "Ci'Aira"}},
    {'id': {'sm': '1070603280'}, 'text': {'sm': "Sade'"}},
    {'id': {'sm': '1070603281'}, 'text': {'sm': 'Jabrielle'}},
    {'id': {'sm': '1076832322'}, 'text': {'sm': 'Other (please specify)'}}]},
  {'question_id': {'sm': '150376558'},
   'page_number': 1,
   'question_number': {'sm': 2},
   'question_family': 'single_choice',
   'question_text': {'sm': 'Are you taking this survey online or in person?'},
   'answers': [{'id': {'sm': '1108449076'}, 'text': {'sm': 'In person'}},
    {'id': {'sm': '1108449077'}, 'text': {'sm': 'Online'}}]},
  {'question_id': {'sm': '143922396'},
   

In [21]:
if len(combined_map['skills-matcher']) != len(cos_key['Skills']):
    error_text = f"ERROR: No. of skills-matcher questions retrieved from SM {len(combined_map['skills-matcher'])} doesn't match number in COS {len(cos_key['Skills'])}"
    log_azure(error_text)
    raise Exception(error_text)

for n in range(len(combined_map['skills-matcher'])): 
    cos_q = cos_key['Skills'][n]
    cos_answer_ids = [cos_q["DataPoint20"],
                    cos_q["DataPoint35"], 
                    cos_q["DataPoint50"], 
                    cos_q["DataPoint65"], 
                    cos_q["DataPoint80"]]

    combined_map['skills-matcher'][n]['question_id']['cos'] = cos_q['ElementId']
    combined_map['skills-matcher'][n]['question_number']['cos'] = n + 1 # correcting for 0 index in loop
    combined_map['skills-matcher'][n]['question_text']['cos'] = cos_q['Question']
    # combined_map['skills-matcher'][n]['answer_ids'] = dict(zip(combined_map['skills-matcher'][n]['answer_ids'], cos_answer_ids))
    # Add cos answer ids and text to answers array; 
    if len(combined_map['skills-matcher'][n]['answers']) != len(cos_answer_ids):
        error_text = f"ERROR: No. of answer options in SM question #{combined_map['skills-matcher'][n]['question_number']['sm']} != number of COS answer levels."
        log_azure(error_text)
        raise Exception(error_text)
    for m in range(len(combined_map['skills-matcher'][n]['answers'])):
        combined_map['skills-matcher'][n]['answers'][m]['id']['cos'] = cos_answer_ids[m]

## Casting question lists to dictionary for easier lookup in translation -- keeping these as lists made the previous insertion step easier
combined_map['skills-matcher'] = {q['question_id']['sm']:q for q in combined_map['skills-matcher']}
combined_map['non-skills-matcher'] = {q['question_id']['sm']:q for q in combined_map['non-skills-matcher']}


In [22]:
combined_map['skills-matcher']

{'143974309': {'question_id': {'sm': '143974309', 'cos': '2.C.1.a'},
  'page_number': 21,
  'question_number': {'sm': 42, 'cos': 1},
  'question_family': 'single_choice',
  'question_text': {'sm': 'How much do you know about business planning and leadership?',
   'cos': 'How much do you know about business planning and leadership?'},
  'answers': [{'id': {'sm': '1066658702', 'cos': 1.534},
    'text': {'sm': 'Beginner (Complete a timesheet)'}},
   {'id': {'sm': '1066658703', 'cos': 2.4145}, 'text': {'sm': 'Basic'}},
   {'id': {'sm': '1066658704', 'cos': 3.295},
    'text': {'sm': 'Skilled (Monitor project progress to complete it on time)'}},
   {'id': {'sm': '1066658705', 'cos': 4.1755}, 'text': {'sm': 'Advanced'}},
   {'id': {'sm': '1066658706', 'cos': 5.056},
    'text': {'sm': 'Expert (Manage a $10m company)'}}]},
 '143974383': {'question_id': {'sm': '143974383', 'cos': '2.C.4.d'},
  'page_number': 21,
  'question_number': {'sm': 43, 'cos': 2},
  'question_family': 'single_choice',


In [97]:
combine_qa_keys()['skills-matcher']

{'143974309': {'question_id': {'sm': '143974309', 'cos': '2.C.1.a'},
  'question_number': {'sm': 1, 'cos': 1},
  'question_family': 'single_choice',
  'question_text': {'sm': 'How much do you know about business planning and leadership?',
   'cos': 'How much do you know about business planning and leadership?'},
  'answer_ids': {'1066658702': 1.534,
   '1066658703': 2.4145,
   '1066658704': 3.295,
   '1066658705': 4.1755,
   '1066658706': 5.056}},
 '143974383': {'question_id': {'sm': '143974383', 'cos': '2.C.4.d'},
  'question_number': {'sm': 2, 'cos': 2},
  'question_family': 'single_choice',
  'question_text': {'sm': 'How much do you know about plant, animal and cell functions?',
   'cos': 'How much do you know about plant, animal and cell functions?'},
  'answer_ids': {'1066659170': 1.372,
   '1066659171': 2.401,
   '1066659172': 3.43,
   '1066659173': 4.459,
   '1066659174': 5.488}},
 '143974479': {'question_id': {'sm': '143974479', 'cos': '1.A.3.c.3'},
  'question_number': {'sm': 

In [81]:
combined_map = {
    'non-skills-matcher':[], # not to send to COS (background questions)
    'skills-matcher':[], # to send to COS skills matcher 
}

for p in sm_key['pages']:
    for q in p['questions']:
        question_type = 'skills-matcher' if "skills matcher" in p['title'].lower()  else "non-skills-matcher" 
        answer_ids = [d['id'] for d in q['answers']['choices']] if ('answers' in q.keys() and 'choice' in q['family']) else None
        combined_map[question_type].append({
            'question_id':{'sm':q['id']},
            'question_number':{'sm':q['position']},
            'question_family':q['family'],
            'question_text':{'sm':[h['heading'] for h in q['headings']][0]},
            'answer_ids':answer_ids
        })


In [91]:
combine_qa_keys(fetch=False) 

{'non-skills-matcher': {'144588883': {'question_id': {'sm': '144588883'},
   'question_number': {'sm': 1},
   'question_family': 'single_choice',
   'question_text': {'sm': 'How did you learn about the survey?'},
   'answer_ids': ['1070603277',
    '1070603278',
    '1070603279',
    '1070603280',
    '1070603281']},
  '150376558': {'question_id': {'sm': '150376558'},
   'question_number': {'sm': 2},
   'question_family': 'single_choice',
   'question_text': {'sm': 'Are you taking this survey online or in person?'},
   'answer_ids': ['1108449076', '1108449077']},
  '143922396': {'question_id': {'sm': '143922396'},
   'question_number': {'sm': 3},
   'question_family': 'open_ended',
   'question_text': {'sm': 'What zip code do you currently live in?'},
   'answer_ids': None},
  '143922786': {'question_id': {'sm': '143922786'},
   'question_number': {'sm': 4},
   'question_family': 'datetime',
   'question_text': {'sm': 'What is your date of birth?'},
   'answer_ids': None},
  '150407860

In [126]:
[q for p in sm_key['pages'] for q in p['questions'] if 'answers' in q.keys() and 'choices' not in q['answers'].keys()]


[{'id': '143922786',
  'position': 4,
  'visible': True,
  'family': 'datetime',
  'subtype': 'date_only',
  'layout': None,
  'sorting': None,
  'required': None,
  'validation': {'type': 'date_us',
   'text': 'Please enter a valid date.',
   'max': None,
   'min': None,
   'sum': None,
   'sum_text': ''},
  'forced_ranking': False,
  'headings': [{'heading': 'What is your date of birth?'}],
  'href': 'https://api.surveymonkey.com/v3/surveys/513506444/pages/43673968/questions/143922786',
  'answers': {'rows': [{'position': 1,
     'visible': True,
     'text': 'Date / Time',
     'id': '1066314198'}]}}]

In [69]:
[h['heading'] for q in questions for h in q['headings']]

['How did you learn about the survey?',
 'Are you taking this survey online or in person?',
 'What zip code do you currently live in?',
 'What is your date of birth?',
 'Which of the following best describes you?',
 'What is the highest level of education that you have attained?',
 'Do you hold any professional certifications?',
 'What certifications do you hold?',
 'Are you currently employed?',
 'How many jobs do you currently work at?',
 'What are your current job title(s)',
 'Does your current compensation cover what you need to feel financially comfortable?',
 'What would it take for you to feel like you were earning enough to feel financially comfortable?',
 'Are you currently looking for another job?',
 'Where are you looking for another job?',
 'Why are you looking for another job?',
 'Are you seeking a promotion at your current job?',
 'What is your ideal job?',
 'What are some barriers that you feel you face to achieving your ideal job?',
 'Are you currently receiving unemplo

In [34]:

combined_map = {
    'non-skills-matcher':[], # not to send to COS (background questions)
    'skills-matcher':[], # to send to COS skills matcher 
}

for p in sm_key['pages']:
    for q in p['questions']:
        question_type = 'skills-matcher' if "skills matcher" in p['title'].lower()  else "non-skills-matcher" 
        try: 
            answer_ids = [d['id'] for d in q['answers']['choices']] if 'answers' in q.keys() else None
            combined_map[question_type].append({
                'question_id':{'sm':q['id']},
                'question_number':{'sm':q['position']},
                'question_text':{'sm':[h['heading'] for h in q['headings']]},
                'answer_ids':answer_ids
            })
        except: 
            display(q)


{'id': '143922786',
 'position': 4,
 'visible': True,
 'family': 'datetime',
 'subtype': 'date_only',
 'layout': None,
 'sorting': None,
 'required': None,
 'validation': {'type': 'date_us',
  'text': 'Please enter a valid date.',
  'max': None,
  'min': None,
  'sum': None,
  'sum_text': ''},
 'forced_ranking': False,
 'headings': [{'heading': 'What is your date of birth?'}],
 'href': 'https://api.surveymonkey.com/v3/surveys/513506444/pages/43673968/questions/143922786',
 'answers': {'rows': [{'position': 1,
    'visible': True,
    'text': 'Date / Time',
    'id': '1066314198'}]}}

In [23]:
# sm_key = get_qa_key(api="sm", fetch=False) 
# cos_key = get_qa_key(api="cos", fetch=False) 
combined_map = combine_qa_keys()
combined_map

# print("Survey Monkey Q/A Key:".upper())
# display(sm_key)
# print("\nCareerOneStop Q/A Key:".upper())
# display(cos_key)
# print("\nCombined SM/COS Q/A Key".upper())
# display(combined_map)

KeyError: 'choices'

In [8]:
def get_sm_api_response(per_page=10000, test_mode=True):
    """GET list of all survey responses from /surveys/{id}/responses/bulk?per_page={per_page}. 

    Arg: 

    per_pages (int): The amount of responses to retrieve per page. 

        - The SM API returns survey responses in pages, up to `per_page` responses per page. 
        - The SM API response begins with the *earliest* survey responses, NOT the most recent responses.
            - If all survey responses do not fit on one page, the response object of the GET request will include a link to the page 
            of the most recent responses. See `example_multiple_pages.json` for an example, in the `links.last` attribute.
        - Once the amount of total responses exceeds `per_page`, we will have to make a second API call to the most recent page 
        for the most recent responses.
            - I've set `per_page=10000` as a hacky way to get around this. The function will make the second API request if this amount is exceeded

    test (bool): mode to reducing API request by just loading cached copy.

    """
    if test_mode:
        with open("sm_responses.json", "r") as file:
            return json.load(file) 

    url =  SM_DATA['base_url'] + f"/responses/bulk?per_page={per_page}"
    response = request(url, headers=SM_DATA['headers'])
    response_json = response.json()

    if 'last' in response_json['links'].keys():
        url = response_json['links']['last']
        log_azure(f"WARNING: We've miraculously exceeded {per_page} survey responses. Making second request to {url}.")
        response = request(url, headers=SM_DATA['headers'])
        response_json = response.json()

    return response_json

def has_valid_email(sm_survey_response:dict) -> bool: 
    """Check if SM survey response includes a valid email address in the email question.
    Use to skip a response in process_sm_responses().
    """
    # TO-DO: The Survey Monkey Survey should at least validate the text format of the email address (though it doesn't support deliverability validation).
    # Link: https://help.surveymonkey.com/en/surveymonkey/create/validating-text-fields/

    # Load translation map
    combined_map = combine_qa_keys()

    # Identify the question_id of the email question in the answer key  
    email_question_id = [q['question_id']['sm'] for q in combined_map['non-skills-matcher'].values() 
                            if 'email' in q['question_text']['sm'][0].lower()][0]
    
    # Check if sm_response contains the email question (if any question is ommitted, the respondent left it blank)      
    questions = [q for p in sm_survey_response['pages'] for q in p['questions']]
    if not any(q['id'] == email_question_id for q in questions): 
        return False 
    
    # Validate email if one was provided 
    email_address = [q for q in questions if q['id'] == email_question_id][0]['answers'][0]['text']
    try:
        validate_email(email_address, check_deliverability=True)
        return True
    except EmailNotValidError as e:
        log_azure(f"WARNING: {sm_survey_response['id']} contains invalid email address: {email_address} -- {str(e)}. Skipping.")
        return False 


def process_sm_responses(sm_api_response) -> list[dict]: 
    """Filter and process new SM survey responses from get_sm_api_response()


        - Checks for unexpected question ids vs. the combined Q/A key.
        - Checks against DB for already processed responses 
        - Checks if response includes valid email address (`has_valid_email()`)
        - Loads (raw) new responses into database (into 'processing' table) until they are finished
           - When these responses are successfully sent to COS and then emailed to user, they will be moved to main DB table.
    """

    ## -- Read list of already processed responses from database (TO-DO) -- ##
    # Database useful to check against repeated user email or IP + answers (avoid redundant emails), for retries of failures 

    placeholder_processed_response_ids = []
    ## -------------------------------------------------------------------- ## 

    # Create/load question answer/key map 
    combined_map = combine_qa_keys()    

    ## 1.) Check for unexpected question ids in new responses vs. those in COS translation map. Attempt to refresh keys if there's a mismatch.
    # If there are still unexpected ids, there's probably an error with combine_qa_keys().
    new_resp_question_ids = set(q['id'] for resp in sm_api_response['data'] for p in resp['pages'] for q in p['questions']
                    if q['id'] not in placeholder_processed_response_ids)
    refreshes = 0
    while refreshes < 2: 
        skills_matcher_ids = set(combined_map['skills-matcher'].keys())
        non_skills_matcher_ids = set(combined_map['non-skills-matcher'].keys())
        expected_ids = skills_matcher_ids.union(non_skills_matcher_ids)
        unexpected_ids = new_resp_question_ids.difference(expected_ids)

        if len(unexpected_ids) > 0: 
            if refreshes == 0:  
                log_azure(f"WARNING: Unexpected question ids in SM responses: {unexpected_ids}. Refreshing question/answer key map.")
                combined_map = combine_qa_keys(fetch=True)
            elif refreshes == 1: 
                log_azure(f"ERROR: Unexpected question ids remain after refresh: {unexpected_ids}.") 
                raise Exception(f"ERROR: Unexpected question ids remain after refresh: {unexpected_ids}.")
            refreshes += 1
        else: 
            break 

    ## 2.) Filter for new survey responses and survey responses that have valid email addresses
    processed_responses = []
    for resp in sm_api_response['data']: 
        if resp['id'] not in placeholder_processed_response_ids and has_valid_email(resp): 

            resp_dict = {
            'response_id':resp['id'],
            'collector_id':resp['collector_id'], 
            'questions':[] 
            }

            # Add questions information
            for p in resp['pages']:
                for q in p['questions']:

                    # Get matching question dictionary from combined_map, based on question type and SM question_id
                    question_type = 'non-skills-matcher' if q['id'] in non_skills_matcher_ids else 'skills-matcher'
                    q_map = combined_map[question_type][q['id']] 

                    # Get rest of information from q_map
                    question_number = {'sm':q_map['question_number']['sm']}
                    if 'cos' in q_map['question_number'].keys():
                        question_number['cos'] = q_map['question_number']['cos']
                    
                    question_id = {'sm':q_map['question_id']['sm']}
                    if 'cos' in q_map['question_id'].keys():
                        question_id['cos'] = q_map['question_id']['cos']

                    if question_type == 'skills-matcher': 
                        answers = [{'sm':a['choice_id'],
                                    'cos':q_map['answer_ids'][a['choice_id']]} for a in q['answers']]
                    else: 
                        answers = [{'sm':a} for a in q['answers']]
                    # answers = []
                    # for a in q['answers']: 
                        # if 'choice_id' in a.keys(): 
                        #     answers.append({'sm':a['choice_id']})
                        # else: 
                        #     answers.append({'sm':a})

                    resp_dict['questions'].append({'question_id':question_id, 
                                                   'question_number':question_number, 
                                                   'question_type':question_type, 
                                                   'answers':answers})
                                                
            processed_responses.append(resp_dict)

    ## -- 3. Write (raw) new responses to processing table in DB (TO-DO)  -- ## 
    # load_to_db(...)
    ## -------------------------------------------------------------------- ## 

    return processed_responses

In [9]:
def translate_post_cos(processed_sm_responses): 
    """Translate processed SM survey responses to COS POST objects, retrieve responses."""

    cos_data = []
    for resp in processed_sm_responses:
        
        cos_request = {'SKAValueList':
                    [{'ElementId':q['question_id']['cos'], 
                      'DataValue':str(q['answers'][0]['cos'])} for q in resp['questions'] 
                        if q['question_type'] == 'skills-matcher']}
        
        cos_response = request(method="POST", 
                             url=COS_DATA['url'],
                             json=cos_request, 
                             headers=COS_DATA['headers'])
        
        cos_data.append({'sm_response_id':resp['response_id'],
                         'cos_request': cos_request, 
                         'cos_response':cos_response})

    return cos_data

In [30]:
sm_api_response = get_sm_api_response(test_mode=False)
processed_sm_responses = process_sm_responses(sm_api_response)
cos_requests = translate_post_cos(processed_sm_responses)

INFO: POST {'https://api.careeronestop.org/v1/skillsmatcher/XjV8e71wBCteYXb'} -- {200} -- 0.49 -- {datetime.datetime(2023, 9, 29, 23, 9, 24, 377236)}


In [12]:
sm_api_response = get_sm_api_response(test_mode=False)


TypeError: request() missing 2 required positional arguments: 'url' and 'json'

In [15]:
sm_api_response

{'data': [{'id': '114409718452',
   'recipient_id': '',
   'collection_mode': 'default',
   'response_status': 'completed',
   'custom_value': '',
   'first_name': '',
   'last_name': '',
   'email_address': '',
   'ip_address': '96.64.76.209',
   'logic_path': {},
   'metadata': {'contact': {}},
   'page_path': [],
   'collector_id': '427785863',
   'survey_id': '409346397',
   'custom_variables': {},
   'edit_url': 'https://www.surveymonkey.com/r/?sm=MKTi7zUaCT0NNV4xwYeK7ZznphgwMRWxwqKsI120yUFxpWGRWVD0l5Ev1zYBoZVK',
   'analyze_url': 'https://www.surveymonkey.com/analyze/browse/JrhdBA97A18icLNdv_2B26M4cs4Lx9WaWryCJ3TK_2F_2FzUk_3D?respondent_id=114409718452',
   'total_time': 41,
   'date_modified': '2023-09-11T16:21:35+00:00',
   'date_created': '2023-09-11T16:20:54+00:00',
   'href': 'https://api.surveymonkey.com/v3/surveys/409346397/responses/114409718452',
   'pages': [{'id': '45350810',
     'questions': [{'id': '150768414',
       'answers': [{'choice_id': '1110937950'}]},
     