In [1]:
import requests 
import yaml
import json
import datetime as dt
import time
import os
import random
from azure.storage.blob import BlobServiceClient, BlobType
from email_validator import validate_email, EmailNotValidError
from funcs.utils import log_azure, request, load_json, clean_field_text, has_valid_email, load_config
from funcs.funcs import combine_qa_keys, get_sm_survey_responses, process_sm_responses
from itertools import chain

with open("api-key.yaml", "r") as file:
    data = yaml.full_load(file)

# SurveyMonkey Survey
SM_DATA = data['sm']['real']
# CareerOneStop Survey 
COS_DATA = data['cos']


---

##### **Main Functions** 

* `get_qa_key()` - GET (or load cached copy) of question/answer key from SM or COS 

<br>

* `combine_qa_keys()` - Combine the SM and COS question/answer keys into one combined key/translation map between the APIs
    - Generates a refreshed map if a change is detected in the SM survey or the COS survey. 
        - The COS survey should not change at all. 
        - The survey monkey questions which match to the COS survey questions should not change (they are intended to just be a port).

<br>

* `get_sm_responses()` - GET SM survey responses 

<br>

* `process_sm_responses()` - filter and process new SM survey responses from get_sm_responses()
    - Checks against DB for already processed responses 
    - Checks for unexpected question ids vs. the combined Q/A key.
        - Attempts to refresh the combined Q/A key if any unexpected question ids are found.
    - Adds matching information from the combined Q/A key to the survey responses   
    - Loads new responses into database (into 'processing' table) until they are finished

<br>

* `post_cos()` 
    - Creates COS JSON request objects from each SM survey response object 
        - If a SM survey response is missing an answer for a skills-survey question, it fills the corresponding question in the COS object with an answer of "Beginner"
    -  POSTS each request object to the COS Skills Matcher API 
    - Stores the COS response alongside the original SM survey response, updating the database 
    
<br>

* `compose_email()` - Compose an email from a COS response object.

<br>

* `send_email()` - Send email if respondent provided valid email address. 

<br>

* `export_csv()` - Export data from database in csv/excel format for Social Contract analysis purposes      

In [2]:
sm_survey_responses = get_sm_survey_responses()

INFO: GET {'https://api.surveymonkey.com/v3/surveys/513506444/responses/bulk'} -- {200} -- 3.11 -- {datetime.datetime(2023, 10, 12, 10, 19, 54, 559654)}


In [50]:
sm_survey_responses

{'data': [{'id': '118440150887',
   'recipient_id': '',
   'collection_mode': 'default',
   'response_status': 'completed',
   'custom_value': '',
   'first_name': '',
   'last_name': '',
   'email_address': '',
   'ip_address': '73.165.183.94',
   'logic_path': {},
   'metadata': {'contact': {}},
   'page_path': [],
   'collector_id': '452261040',
   'survey_id': '513506444',
   'custom_variables': {},
   'edit_url': 'https://www.surveymonkey.com/r/?sm=MSXCvuilN9Vf0ZobVXFjPswyoNLgo_2FfEjRoZb0vM7grlb8CTskkkTNj66m1GwMlq',
   'analyze_url': 'https://www.surveymonkey.com/analyze/browse/8xd9D8KA37YeLpeTvx8j2O9lfNbj2kqrYZtRcHvmtdM_3D?respondent_id=118440150887',
   'total_time': 1043,
   'date_modified': '2023-10-10T23:03:01+00:00',
   'date_created': '2023-10-10T22:45:38+00:00',
   'href': 'https://api.surveymonkey.com/v3/surveys/513506444/responses/118440150887',
   'pages': [{'id': '43673968',
     'questions': [{'id': '144588883',
       'answers': [{'choice_id': '1070603277'}]},
      

In [51]:
## GET all survey responses from survey monkey survey
current_resp = sm_survey_responses
raw_responses = [current_resp]
retry_attempts = 0 
while 'next' in current_resp['links'].keys() and retry_attempts < 2: 
    next_page_response = request(url=current_resp['links']['next'], headers=SM_DATA['headers'], method='GET')
    if next_page_response.status_code != 200: 
        print(f"Warning: {current_resp['links']['next']} returned status code {next_page_response.status_code}")
        time.sleep(5)
        retry_attempts += 1
    else:
        retry_attempts = 0 
        current_resp = next_page_response.json()
        raw_responses.append(current_resp)


INFO: GET {'https://api.surveymonkey.com/v3/surveys/513506444/responses/bulk?start_created_at=2023-09-12T14%3A19%3A51%2B00%3A00&status=completed&sort_by=date_modified&sort_order=DESC&per_page=100&page=2'} -- {200} -- 1.79 -- {datetime.datetime(2023, 10, 12, 11, 32, 4, 168353)}
INFO: GET {'https://api.surveymonkey.com/v3/surveys/513506444/responses/bulk?start_created_at=2023-09-12T14%3A19%3A51%2B00%3A00&status=completed&sort_by=date_modified&sort_order=DESC&per_page=100&page=3'} -- {200} -- 1.66 -- {datetime.datetime(2023, 10, 12, 11, 32, 5, 963095)}
INFO: GET {'https://api.surveymonkey.com/v3/surveys/513506444/responses/bulk?start_created_at=2023-09-12T14%3A19%3A51%2B00%3A00&status=completed&sort_by=date_modified&sort_order=DESC&per_page=100&page=4'} -- {200} -- 1.41 -- {datetime.datetime(2023, 10, 12, 11, 32, 7, 383537)}
INFO: GET {'https://api.surveymonkey.com/v3/surveys/513506444/responses/bulk?start_created_at=2023-09-12T14%3A19%3A51%2B00%3A00&status=completed&sort_by=date_modified

In [53]:
now = dt.datetime.utcnow()
now_str = dt.datetime.strftime(now, format='%m-%-d-%Y_%T')
with open(f'raw_responses_{now_str}.json','w') as file: 
    json.dump(raw_responses, file)

In [58]:
## Process and request results from CareerOneStop
processed_responses = []
for resp in raw_responses:
    processed_responses.append(process_sm_responses(resp))

# Flatten 
processed_responses = list(chain.from_iterable(processed_responses))

with open(f"processed_responses{now_str}.json", "w") as file: 
    json.dump(processed_responses, file)


In [82]:
import pandas as pd 

raw_responses_data = list(chain.from_iterable([resp['data'] for resp in raw_responses]))
df = pd.DataFrame(data=raw_responses_data)

In [84]:
df.head(1).transpose()

Unnamed: 0,0
id,118440150887
recipient_id,
collection_mode,default
response_status,completed
custom_value,
first_name,
last_name,
email_address,
ip_address,73.165.183.94
logic_path,{}


In [98]:
# For scroll through IPs  
df = df[['id','response_status','ip_address','collector_id'] + list(df.filter(regex='date|url').columns)]
for line in df['ip_address'].value_counts().to_csv().splitlines():
    print(line.replace(",",", "))

ip_address, count
38.150.14.131, 60
73.128.78.173, 19
38.152.33.129, 12
173.216.31.207, 4
167.21.42.154, 3
74.79.70.138, 2
71.201.134.77, 2
100.34.15.134, 2
100.11.43.96, 2
100.11.71.177, 2
63.70.2.233, 2
100.11.166.79, 2
47.150.252.83, 2
71.65.66.126, 2
100.34.10.158, 2
69.143.12.1, 2
73.165.183.94, 2
108.30.250.52, 2
173.17.205.70, 2
47.230.118.20, 2
76.106.59.99, 2
108.52.13.124, 2
108.226.206.131, 2
173.29.189.181, 2
108.48.108.65, 2
141.151.91.54, 2
73.57.101.16, 1
98.155.26.20, 1
69.245.177.224, 1
168.93.14.209, 1
75.97.237.24, 1
208.80.222.66, 1
154.84.170.146, 1
76.117.40.111, 1
75.199.13.35, 1
100.34.10.172, 1
68.163.33.40, 1
150.195.45.184, 1
216.151.183.159, 1
100.11.175.237, 1
122.8.25.87, 1
98.25.241.115, 1
68.202.27.124, 1
72.238.234.62, 1
98.211.70.111, 1
108.52.210.92, 1
168.93.3.12, 1
70.189.208.154, 1
168.93.19.189, 1
168.93.49.159, 1
168.93.48.210, 1
166.203.170.134, 1
168.93.19.209, 1
168.93.35.114, 1
168.93.10.235, 1
168.93.43.101, 1
24.211.234.112, 1
72.92.41.120,