In [84]:
import pandas as pd
import json
from collections import Counter, defaultdict
from datetime import datetime
import time
from unidecode import unidecode
from itertools import chain

DATA_DIR = 'C:/Users/igork/Downloads/New_Data_Hari/'

In [85]:
# irrelevant questions
USELESS_QS = "6 7 27 31 53 81 113 114".split()

questions = pd.read_csv(DATA_DIR + "Questions.txt", sep="|", dtype=str)

answers = pd.read_csv(DATA_DIR + "Answers.txt", sep="|", dtype=str)
answers["Descr"] = answers["Descr"].apply(lambda _: _.split(";")[-1].strip())

resp = pd.read_csv(DATA_DIR + "profileresponses.txt", sep="|", encoding='latin-1', dtype=str)
resp['ResponseText'] = resp['ResponseText'].fillna('')
resp['ResponseDate'] = resp['ResponseDate'].fillna('')

customers = pd.read_csv(DATA_DIR + "YC_member.txt", sep="|", encoding='latin-1', dtype=str)

In [86]:
class RespCollector(object):
    
    def __init__(self, df):
        
        self.info_dict = defaultdict(lambda: defaultdict())
        self.df = df
        self.personal_dict = defaultdict()
        self.kids_dict = defaultdict(lambda : defaultdict)
        self.work_dict = defaultdict()
        self.finance_dict = defaultdict()
        self.insurance_dict = defaultdict()
        self.transport_dict = defaultdict()
        self.phone_dict = defaultdict()
        self.internet_dict = defaultdict()
        self.devices_dict = defaultdict()
        self.health_dict = defaultdict()
        self.online_shopping_dict = defaultdict()
        self.property_dict = defaultdict()
        self.media_dict = defaultdict()
        self.optins_dict = defaultdict()
        self.travel_dict = defaultdict()
        self.interests_dict = defaultdict()
        self.drinks_dict = defaultdict()

    def _format_date(self, date_str):
        if isinstance(date_str, str) and (len(date_str) > 3):
            return datetime.strptime(" ".join(date_str.split()[:-1]), "%b %d %Y").strftime("%d/%m/%Y")
        
        return None
      
    def _add_top_level_answer(self, dk, ki, answer, mx=1):
        assert len(answer) == 1, 'ERROR: _add_top_level_answer only accepts a single-element list!'
        if ki not in self.info_dict:
            dk[ki] = [answer[0]]
        else:
            dk[ki].append(answer[0])
        if mx == 1:
            dk[ki] = dk[ki][0]
            
        return self
    
    def _remove_empty(self, dic):
        
        return {k: v for k, v in dic.items() if v}
    
    def _get_answer(self, row):
        """
        row has to be a row object
        """
        question = questions.loc[questions.Question_PK == row[1].Question_PK, "Descr"].values[0].lower().strip()
    
        # collect answer (from parts) as a list
        answer = []
    
        if row[1].ResponseText:
            answer.append(row[1].ResponseText.lower().strip())
        if row[1].ResponseDate:
            answer.append(self._format_date(row[1].ResponseDate))
    
        if not answer:
            possible_answer = answers.loc[(answers.Answer_PK == row[1].Answer_PK) & (answers.Question_PK == row[1].Question_PK), "Descr"].values
            if possible_answer:
                answer.append(possible_answer[0].strip().lower())
            else:
                answer = [None]
    
        # ---- P E R S O N A L
        
        if int(row[1].Question_PK) == 1:
            self._add_top_level_answer(self.personal_dict, "gender", answer)
        
        if int(row[1].Question_PK) == 2:
            self._add_top_level_answer(self.personal_dict, "dob", answer)
    
        if int(row[1].Question_PK) == 3:
            self.personal_dict["marital_status"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.personal_dict, "marital_status", answer)
    
        if int(row[1].Question_PK) == 4:
            self.personal_dict["home_postcode"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.personal_dict, "home_postcode", answer)
    
        if int(row[1].Question_PK) == 6:
            self._add_top_level_answer(self.personal_dict, "lives_in_state", answer)
    
        if int(row[1].Question_PK) == 7:
            self._add_top_level_answer(self.personal_dict, "lives_in_area", answer)
        
        if int(row[1].Question_PK) == 70:
            self._add_top_level_answer(self.personal_dict, "born_in", answer)
        
        if int(row[1].Question_PK) == 71:
            self._add_top_level_answer(self.personal_dict, "ancestry", answer)
        
        if int(row[1].Question_PK) == 72:
            self._add_top_level_answer(self.personal_dict, "languages_at_home", answer, 5)
        
        if int(row[1].Question_PK) == 73:
            self._add_top_level_answer(self.personal_dict, "religion", answer)
        
        if int(row[1].Question_PK) == 25:
            self.personal_dict["education"] = [answer[0], self._format_date(row[1].RespondedTime)]
        
        # ----- K I D S
        
        if int(row[1].Question_PK) == 11:
            self.kids_dict["kids_u18_in_household"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.kids_dict, "kids_u18_in_household", answer)
        
        if int(row[1].Question_PK) == 77:
            self.kids_dict["total_kids"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.kids_dict, "total_kids", answer)
        
        if int(row[1].Question_PK) == 113:
            self._add_top_level_answer(self.kids_dict, "is_pregnant", answer)
        
        if int(row[1].Question_PK) == 114:
            self._add_top_level_answer(self.kids_dict, "baby_due", answer)
        
        if int(row[1].Question_PK) in range(12,22):
            
            gnd = [p if p.isalpha() else None for p in answer][0] 
            dob = ["/".join(p.split("/")[-2:]) if "/" in p else None for p in answer][0]
            
            if "kids_info" not in self.kids_dict:
                self.kids_dict["kids_info"] = [{"gender": gnd, "dob": dob}]
            else:
                self.kids_dict["kids_info"].append({"gender": gnd, "dob": dob})                 
        
#             self._add_top_level_answer(self.info_dict, "education", answer)
    
        # ----- W O R K
        
        if int(row[1].Question_PK) == 5:
            self.work_dict["work_postcode"] = [answer[0], self._format_date(row[1].RespondedTime)]
        
        if int(row[1].Question_PK) == 27:
            self.work_dict["employment_status"] = [answer[0], self._format_date(row[1].RespondedTime)]
        
        if int(row[1].Question_PK) == 28:
            self.work_dict["industry"] = [answer[0], self._format_date(row[1].RespondedTime)]
        
        if int(row[1].Question_PK) == 29:
            self.work_dict["occupation"] = [answer[0], self._format_date(row[1].RespondedTime)]
        
        if int(row[1].Question_PK) == 30:
            self.work_dict["company_size"] = [answer[0], self._format_date(row[1].RespondedTime)]
        
#         if int(row[1].Question_PK) == 31:
#             self.work_dict["company_annual_turnover"] = answer
        
        # ----- F I N A N C E
        
        if int(row[1].Question_PK) == 26:
            self._add_top_level_answer(self.finance_dict, "main_salary_earner", answer)
        
        if int(row[1].Question_PK) == 32:
            self.finance_dict["annual_income"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.finance_dict, "annual_income", answer)
        
        if int(row[1].Question_PK) == 33:
            self._add_top_level_answer(self.finance_dict, "annual_household_income", answer)
        
        if int(row[1].Question_PK) == 34:
            self._add_top_level_answer(self.finance_dict, "ways_to_pay_bills", answer, 5)
        
        if int(row[1].Question_PK) == 35:
            self._add_top_level_answer(self.finance_dict, "financial_services", answer, 5)
        
        if int(row[1].Question_PK) == 36:
            self.finance_dict["financial_institutions"] = answer
        
        if int(row[1].Question_PK) == 37:
            self.finance_dict["main_financial_institutions"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.finance_dict, "main_financial_institutions", answer, 5)
        
        if int(row[1].Question_PK) == 38:
            self.finance_dict["numb_credit_store_cards"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.finance_dict, "numb_credit_store_cards", answer)
        
        if int(row[1].Question_PK) == 39:
            self._add_top_level_answer(self.finance_dict, "total_credit_limit", answer)
        
        if int(row[1].Question_PK) == 115:
            self.finance_dict["credit_card_types"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.finance_dict, "credit_card_types", answer, 5)
        
        # ----- I N S U R A N C E
        
        if int(row[1].Question_PK) == 40:
            self._add_top_level_answer(self.insurance_dict, "insurance_policies", answer, 5)
        
        if int(row[1].Question_PK) == 66:
            self._add_top_level_answer(self.insurance_dict, "has_health_insurance", answer)
            
        if int(row[1].Question_PK) == 67:
            self._add_top_level_answer(self.insurance_dict, "private_health_insurance_with", answer, 5)
            
        if int(row[1].Question_PK) == 81:
            self._add_top_level_answer(self.insurance_dict, "vehicle_insurance_expiration", answer)
        
        if int(row[1].Question_PK) == 84:
            self._add_top_level_answer(self.insurance_dict, "home_building_insurance_expiration", answer)
            
        if int(row[1].Question_PK) == 85:
            self._add_top_level_answer(self.insurance_dict, "home_contents_insurance_expiration", answer)
        
        if int(row[1].Question_PK) == 86:
            self._add_top_level_answer(self.insurance_dict, "life_insurance_expiration", answer)
        
        if int(row[1].Question_PK) == 87:
            self._add_top_level_answer(self.insurance_dict, "health_insurance_expiration", answer)
        
        if int(row[1].Question_PK) == 88:
            self._add_top_level_answer(self.insurance_dict, "boat_insurance_expiration", answer)
        
        if int(row[1].Question_PK) == 89:
            self._add_top_level_answer(self.insurance_dict, "caravan_insurance_expiration", answer)
            
        # ----- T R A N S P O R T A T I O N
                 
        if int(row[1].Question_PK) == 42:
            self.transport_dict["vehicle_owned"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.transport_dict, "vehicle_owned", answer)
        
        if int(row[1].Question_PK) == 43:
            self._add_top_level_answer(self.transport_dict, "cond_most_used_vehicle_when_purchased", answer)
        
        if int(row[1].Question_PK) == 44:
            self._add_top_level_answer(self.transport_dict, "vehicle_makes_owned", answer, 5)
        
        if int(row[1].Question_PK) == 45:
            self._add_top_level_answer(self.transport_dict, "value_most_used_vehicle_when_purchased", answer)
        
        if int(row[1].Question_PK) == 46:
            self._add_top_level_answer(self.transport_dict, "vehicle_types_owned", answer, 5)
        
        if int(row[1].Question_PK) == 47:
            self._add_top_level_answer(self.transport_dict, "main_transport_to_work", answer)
            
        if int(row[1].Question_PK) == 91:
            self._add_top_level_answer(self.transport_dict, "total_vehicles_in_hhold", answer)
     
        if int(row[1].Question_PK) == 92:
            self._add_top_level_answer(self.transport_dict, "year_bought_most_used_vehicle", answer)
        
        if int(row[1].Question_PK) == 116:
            self.transport_dict["plans_to_purchase_vehicle"] = [answer[0], self._format_date(row[1].RespondedTime)]
            
        # ----- P H O N E
        
        if int(row[1].Question_PK) == 48:
            self.phone_dict["owns_mobile"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.phone_dict, "owns_mobile", answer)
        
        if int(row[1].Question_PK) == 49:
            self._add_top_level_answer(self.phone_dict, "mobile_brand", answer)
            
        if int(row[1].Question_PK) == 80:
            self._add_top_level_answer(self.phone_dict, "mobile_number", answer)
        
        if int(row[1].Question_PK) == 50:
            self._add_top_level_answer(self.phone_dict, "who_pays_mobile", answer)
        
        if int(row[1].Question_PK) == 51:
            self.phone_dict["mobile_network"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.phone_dict, "mobile_network", answer)
        
        if int(row[1].Question_PK) == 52:
            self._add_top_level_answer(self.phone_dict, "mobile_on_contract", answer)
        
        if int(row[1].Question_PK) == 53:
            self._add_top_level_answer(self.phone_dict, "mobile_contract_expiration", answer)
        
        if int(row[1].Question_PK) == 59:
            self.phone_dict["landline_at_home"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.phone_dict, "landline_at_home", answer)
        
        if int(row[1].Question_PK) == 60:
            self._add_top_level_answer(self.phone_dict, "landline_provider", answer)

        # ----- I N T E R N E T
              
        if int(row[1].Question_PK) == 54:
            self._add_top_level_answer(self.internet_dict, "internet_at_home", answer)
        
        if int(row[1].Question_PK) == 55:
            self._add_top_level_answer(self.internet_dict, "type_internet_at_home", answer)
        
        if int(row[1].Question_PK) == 56:
            self.internet_dict["isp"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.internet_dict, "isp", answer)
        
        if int(row[1].Question_PK) == 93:
            self.internet_dict["social_networks"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.internet_dict, "social_networks", answer, 5)         
        
        # ----- D E V I C E S
        
        if int(row[1].Question_PK) == 57:
            self._add_top_level_answer(self.devices_dict, "owns_computer", answer)
                
        if int(row[1].Question_PK) == 58:
            self._add_top_level_answer(self.devices_dict, "computer_type", answer)
    
        if int(row[1].Question_PK) == 62:
            self.devices_dict["owns_devices"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.devices_dict, "owns_devices", answer, 5)
                    
        if int(row[1].Question_PK) == 117:
            self._add_top_level_answer(self.devices_dict, "deviced_purchased_upgraded_past_12_month", answer, 5)            
        
        # ----- H E A L T H
        
        if int(row[1].Question_PK) == 65:
            self._add_top_level_answer(self.health_dict, "conditions_suffered", answer, 5)
            
        if int(row[1].Question_PK) == 63:
            self.health_dict["smoker"] = [answer[0], self._format_date(row[1].RespondedTime)]
            self._add_top_level_answer(self.health_dict, "smoker", answer)
        
        if int(row[1].Question_PK) == 118:
            self._add_top_level_answer(self.health_dict, "type_of_cigarettes", answer)
        
        if int(row[1].Question_PK) == 119:
            self._add_top_level_answer(self.health_dict, "brands_of_cigarettes", answer, 5)
        
        if int(row[1].Question_PK) == 120:
            self._add_top_level_answer(self.health_dict, "brands_of_cigarette_papers", answer, 5)
        
        if int(row[1].Question_PK) == 64:
            self._add_top_level_answer(self.health_dict, "wears_glasses_or_lenses", answer)
        
        # ----- O N L I N E  S H O P P I N G
            
        if int(row[1].Question_PK) == 61:
            self._add_top_level_answer(self.online_shopping_dict, "online_purchasing_freq", answer)
        
        if int(row[1].Question_PK) == 69:
            self.online_shopping_dict["buying_groceries_online"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.online_shopping_dict, "buying_groceries_online", answer)

#         if int(row[1].Question_PK) == 68:
#             self._add_top_level_answer(self.online_shopping_dict, "role_in_buying_groceries", answer)
        
        if int(row[1].Question_PK) == 94:
            self._add_top_level_answer(self.online_shopping_dict, "main_supermarkets_for_groceries", answer, 5)
        
        if int(row[1].Question_PK) == 95:
            self._add_top_level_answer(self.online_shopping_dict, "regularly_shops_at_department_stopes", answer, 5) 
        
        # ----- P R O P E R T Y
        
        if int(row[1].Question_PK) == 9:
            self._add_top_level_answer(self.property_dict, "housing_type", answer)
        
        if int(row[1].Question_PK) == 41:
            self.property_dict["home_ownership_status"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.property_dict, "home_ownership_status", answer)
            
        if int(row[1].Question_PK) == 90:
            self._add_top_level_answer(self.property_dict, "bought_home_in", answer)
            
        if int(row[1].Question_PK) == 8:
            self._add_top_level_answer(self.property_dict, "people_in_household", answer)
    
        if int(row[1].Question_PK) == 10:
            self._add_top_level_answer(self.property_dict, "household_type", answer)

        # ----- M E D I A
        
        if int(row[1].Question_PK) == 75:
            self._add_top_level_answer(self.media_dict, "pay_tv_at_home", answer)
        
        if int(row[1].Question_PK) == 76:
            self._add_top_level_answer(self.media_dict, "pay_tv_provider", answer)
            
        if int(row[1].Question_PK) == 109:
            self._add_top_level_answer(self.media_dict, "reads_newspapers", answer, 5)
        
        if int(row[1].Question_PK) == 110:
            self._add_top_level_answer(self.media_dict, "reads_magazines", answer, 5)
        
        if int(row[1].Question_PK) == 111:
            self._add_top_level_answer(self.media_dict, "watches_sports", answer, 5)
        
        if int(row[1].Question_PK) == 112:
            self._add_top_level_answer(self.media_dict, "reads_news_portals", answer, 5)
            
        # ----- O P T - I N S
        
        if int(row[1].Question_PK) == 78:
            self._add_top_level_answer(self.optins_dict, "okayed_kids_online_surveys", answer)
        
        if int(row[1].Question_PK) == 79:
            self._add_top_level_answer(self.optins_dict, "wants_sms_offers", answer)
        
        if int(row[1].Question_PK) == 108:
            self._add_top_level_answer(self.optins_dict, "wants_wine_offers", answer)
        
        if int(row[1].Question_PK) == 23:
            self._add_top_level_answer(self.optins_dict, "wants_be_in_focus_group", answer)
        
        if int(row[1].Question_PK) == 24:
            self._add_top_level_answer(self.optins_dict, "wants_phone_interview", answer)
        
        # ----- T R A V E L
        
        if int(row[1].Question_PK) == 96:
            self._add_top_level_answer(self.travel_dict, "member_of_frequent_flyer", answer)
        
        if int(row[1].Question_PK) == 97:
            self.travel_dict["flights_past_12_months"] = [answer[0], self._format_date(row[1].RespondedTime)]
#             self._add_top_level_answer(self.travel_dict, "flights_past_12_months", answer)
        
        if int(row[1].Question_PK) == 98:
            self._add_top_level_answer(self.travel_dict, "purpose_flying_past_12_months", answer)
        
        if int(row[1].Question_PK) == 99:
            self._add_top_level_answer(self.travel_dict, "how_often_would_fly_for_business_a_year", answer)
        
        if int(row[1].Question_PK) == 100:
            self._add_top_level_answer(self.travel_dict, "how_often_would_fly_for_leisure_a_year", answer)
        
        if int(row[1].Question_PK) == 101:
            self._add_top_level_answer(self.travel_dict, "on_holidays_goes_to", answer)
        
        if int(row[1].Question_PK) == 102:
            self._add_top_level_answer(self.travel_dict, "rented_a_car_past_12_months", answer)
            
        # ----- I N T E R E S T S
        
        if int(row[1].Question_PK) == 74:
            self._add_top_level_answer(self.interests_dict, "pets", answer)
  
        if int(row[1].Question_PK) == 82:
            self._add_top_level_answer(self.interests_dict, "owns_swimming_pool", answer)

        if int(row[1].Question_PK) == 83:
            self._add_top_level_answer(self.interests_dict, "interested_in_activities", answer, 5)
        
        # ----- D R I N K S
        
        if int(row[1].Question_PK) == 103:
            self._add_top_level_answer(self.drinks_dict, "regular_alcoholic_drinks", answer, 5)
        
        if int(row[1].Question_PK) == 104:
            self._add_top_level_answer(self.drinks_dict, "energy_drinks", answer, 5)
        
        if int(row[1].Question_PK) == 105:
            self._add_top_level_answer(self.drinks_dict, "sports_drinks", answer, 5)
        
        if int(row[1].Question_PK) == 106:
            self._add_top_level_answer(self.drinks_dict, "bottles_wine_a_month_at_household", answer)
        
        if int(row[1].Question_PK) == 107:
            self._add_top_level_answer(self.drinks_dict, "how_much_ok_to_spend_bottle_wine", answer)
            
        return self
    
    def collect_answers(self):
        
        for row in self.df.iterrows():
            self._get_answer(row)
            
        self.personal_dict = self._remove_empty(self.personal_dict)
        if self.personal_dict:
            self.info_dict["personal"] = self.personal_dict
            
        self.work_dict = self._remove_empty(self.work_dict)
        if self.work_dict:
            self.info_dict["work"] = self.work_dict
            
        self.kids_dict = self._remove_empty(self.kids_dict)
        if self.kids_dict:
            self.info_dict["kids"] = self.kids_dict
   
        self.finance_dict = self._remove_empty(self.finance_dict)
        if self.finance_dict:
            self.info_dict["financial"] = self.finance_dict
        
        self.insurance_dict = self._remove_empty(self.insurance_dict)
        if self.insurance_dict:
            self.info_dict["insurance"] = self.insurance_dict
        
        self.transport_dict = self._remove_empty(self.transport_dict)
        if self.transport_dict:
            self.info_dict["transportation"] = self.transport_dict
            
        self.phone_dict = self._remove_empty(self.phone_dict)
        if self.phone_dict:
            self.info_dict["phone"] = self.phone_dict
        
        self.internet_dict = self._remove_empty(self.internet_dict)
        if self.internet_dict:
            self.info_dict["internet"] = self.internet_dict
        
        self.devices_dict = self._remove_empty(self.devices_dict)
        if self.devices_dict:
            self.info_dict["devices"] = self.devices_dict
        
        self.health_dict = self._remove_empty(self.health_dict)
        if self.health_dict:
            self.info_dict["health"] = self.health_dict
        
        self.property_dict = self._remove_empty(self.property_dict)
        if self.property_dict:
            self.info_dict["household"] = self.property_dict
        
        self.online_shopping_dict = self._remove_empty(self.online_shopping_dict)
        if self.online_shopping_dict:
            self.info_dict["shopping"] = self.online_shopping_dict
            
        self.media_dict = self._remove_empty(self.media_dict)
        if self.media_dict:
            self.info_dict["media"] = self.media_dict
        
        self.optins_dict = self._remove_empty(self.optins_dict)
        if self.optins_dict:
            self.info_dict["opt-ins"] = self.optins_dict
        
        self.travel_dict = self._remove_empty(self.travel_dict)
        if self.travel_dict:
            self.info_dict["travel"] = self.travel_dict
    
        self.interests_dict = self._remove_empty(self.interests_dict)
        if self.interests_dict:
            self.info_dict["interests"] = self.interests_dict
        
        self.drinks_dict = self._remove_empty(self.drinks_dict)
        if self.drinks_dict:
            self.info_dict["drinks"] = self.drinks_dict
        
        return self

In [None]:
# customers have a unique identifier member_pk
member_pks = sorted(customers.member_pk.unique())
print("total {} member pks".format(len(member_pks)))

total 4031876 member pks


In [None]:
cust_profiles = []

pt = 0

t0 = time.time()

for j, member_pk in enumerate(member_pks, 1):
    
    # collect this member's personal data
    first_name, last_name, customer_id = chain.from_iterable(customers.loc[customers.member_pk == member_pk,
                            ["FirstName", "LastName", "TicketekCustomerID"]].applymap(lambda x: unidecode(x.lower().replace("'","")) if isinstance(x, str) else None).values)
    
    this_resp = resp[resp.Member_PK == member_pk]
    rc = RespCollector(this_resp).collect_answers()
    
    #print(rc.info_dict)
    
    dk = {"name": first_name, "last_name": last_name, 
                          "member_id": member_pk, "ticketek_id": customer_id}
    
    dk.update(rc.info_dict)
    
    cust_profiles.append(dk)
    
#     print(cust_profiles)
    
    print("#{}/{}: {} {}".format(j, len(member_pks), first_name, last_name))
    
    if (len(cust_profiles) == 10000) or (j == len(member_pks)):
        
        pt += 1
        json.dump(cust_profiles, open("cprofile-p{}.json".format(pt), "w"))
        
        mins, secs = divmod(time.time() - t0, 60)
        hrs = mins//60
        print("elapsed time: {:.0f} hrs {:.0f} mins {:.0f} secs".format(hrs, mins - hrs*60, secs))
        cust_profiles = []

#1/4031876: joy henderson
#2/4031876: lyne magee
#3/4031876: luke tilley
#4/4031876: adam jolliffe
#5/4031876: alexander kostrin
#6/4031876: greg murtagh
#7/4031876: jesse beattie
#8/4031876: frank pfab
#9/4031876: garry silver
#10/4031876: alexander slaven
#11/4031876: eric gruetzner
#12/4031876: paul webb
#13/4031876: allan quick
#14/4031876: steve posselt
#15/4031876: alan edwards
#16/4031876: ben mcquillan
#17/4031876: teppo inkinen
#18/4031876: barry brandon
#19/4031876: nicholas waldron
#20/4031876: shaun okelly
#21/4031876: chris flavel
#22/4031876: colin rogers
#23/4031876: kevin mcdonald
#24/4031876: mike swain
#25/4031876: nathan sutton
#26/4031876: brenden topliss
#27/4031876: grant douglas
#28/4031876: alison stewart
#29/4031876: alexander guilfoyle
#30/4031876: darren curyer
#31/4031876: john shaw
#32/4031876: luke jackson
#33/4031876: kenneth ransley
#34/4031876: daniel munt
#35/4031876: ian hodson
#36/4031876: warrick leeson
#37/4031876: peter doyle
#38/4031876: rudy koo