In [1]:
import sys
import os
import json
import pandas as pd

sys.path.append(os.path.abspath("N:\\CancerEpidem\\BrBreakthrough\\DeliveryProcess\\Schema_and_Derivation_utils\\Questionnaire\\R0\\scripts"))
from common_utils import load_and_pivot_data, load_schema, process_data, save_output, mask_pii, get_config, validate_data
from schema_utils import build_variable_mapping, extract_constraints, extract_var_types
from processing_utils import process_data, save_change_tracking
from restructure_utils import restructure_pregnancies
import CleaningRules as cr
import NestedVariables as nv

sys.path.append(os.path.abspath("N:\\CancerEpidem\\BrBreakthrough\\DeliveryProcess\Schema_and_Derivation_utils"))
from utilities import connect_DB, createLogger, read_data
from config import Delivery_log_path, test_server, r0_json_path, out_json_path

In [2]:
# Initialize
config = get_config()
logger = createLogger('Pregnancies', config['Delivery_log_path'])
question_range = 'BETWEEN 550 AND 739'

In [3]:
# Load data
pivoted, dfPII = load_and_pivot_data(question_range, logger)

In [4]:
pivoted = pivoted.fillna('').reset_index()
pivotedDict = pivoted.set_index('StudyID').to_dict('index')

In [5]:
schema = load_schema('Pregnancies_JSON')
schema_props = schema["additionalProperties"]["properties"]

In [6]:
variable_mapping = build_variable_mapping(schema)
constraint_map = extract_constraints(schema_props)
var_type_map = extract_var_types(schema_props)

Extracted constraints for all fields
Extracted variable types for all fields


In [7]:
# Process data
processed_data, change_tracking = process_data(
    pivotedDict,
    variable_mapping,
    var_type_map,
    constraint_map,
    cr.newValMap
)

Processed data for 113732 participants


In [8]:
processed_data

{100002: {'Gen07_full@Q5_13_D_10': None,
  'Gen07_full@Q5_13_D_11': None,
  'Gen07_full@Q5_13_D_12': None,
  'Gen07_full@Q5_13_D_7': None,
  'Gen07_full@Q5_13_D_8': None,
  'Gen07_full@Q5_13_D_9': None,
  'Gen07_full@Q5_13_M_10': None,
  'Gen07_full@Q5_13_M_11': None,
  'Gen07_full@Q5_13_M_12': None,
  'Gen07_full@Q5_13_M_13': None,
  'Gen07_full@Q5_13_M_7': None,
  'Gen07_full@Q5_13_M_8': None,
  'Gen07_full@Q5_13_M_9': None,
  'Gen07_full@Q5_13_Y_10': None,
  'Gen07_full@Q5_13_Y_11': None,
  'Gen07_full@Q5_13_Y_12': None,
  'Gen07_full@Q5_13_Y_13': None,
  'Gen07_full@Q5_13_Y_7': None,
  'Gen07_full@Q5_13_Y_8': None,
  'Gen07_full@Q5_13_Y_9': None,
  'Gen07_full@Q5_14_10_1': None,
  'Gen07_full@Q5_14_11_1': None,
  'Gen07_full@Q5_14_12_1': None,
  'Gen07_full@Q5_14_13_1': None,
  'Gen07_full@Q5_14_7_1': None,
  'Gen07_full@Q5_14_8_1': None,
  'Gen07_full@Q5_14_9_1': None,
  'Gen07_full@Q5_15_10': None,
  'Gen07_full@Q5_15_11': None,
  'Gen07_full@Q5_15_12': None,
  'Gen07_full@Q5_15_

In [9]:
json_data = restructure_pregnancies(processed_data, schema, variable_mapping)

In [10]:
json_data

{100002: {'Pregnancies': [{'R0_PregNum': 1,
    'R0_Preg_BirthWghtG': None,
    'R0_Preg_BirthWghtlbs': None,
    'R0_Preg_BirthWghtOzs': None,
    'R0_Preg_BreastfeedingWks': None,
    'R0_Preg_MilkSuppression': None,
    'R0_Preg_EndDay': 18,
    'R0_Preg_EndMnth': 8,
    'R0_Preg_EndYr': 99,
    'R0_Preg_Outcome': 7,
    'R0_Preg_DurationWks': 6,
    'R0_Preg_SevereVomiting': 2,
    'R0_Preg_Eclampsia': 2,
    'R0_Preg_ChildSex': None}],
  'R0_InfertilityPeriod': None,
  'R0_InfertilityAge': None,
  'R0_BreastfeedingBreast': None,
  'R0_FirstPregWghtKG': None,
  'R0_FirstPregWghtlbs': None,
  'R0_FirstPregWghtStone': None,
  'R0_LastPregWghtKG': None,
  'R0_LastPregWghtlbs': None,
  'R0_LastPregWghtStone': None,
  'R0_PregHrmns': 2,
  'R0_PregHrmnsAge': None,
  'R0_InfertilityDiagnosis': None,
  'R0_InfertilityReason': None,
  'R0_InfertilityReasonOther': None,
  'R0_FertilityDrugs': None,
  'R0_FertilityTreatmentAgeFirst': None,
  'R0_FertilityTreatmentAgeLast': None,
  'R0_EverPre

In [11]:
validate_data(json_data, schema)

JSON data is valid.


In [None]:
# Remove PII
json_data = mask_pii(json_data, dfPII)

In [13]:
# Save output
save_output(json_data, 'Output_Pregnancies', logger)

2025-06-13 13:29:28 - INFO: Saved output to N:\CancerEpidem\BrBreakthrough\DeliveryProcess\Data_Output_Testing\Output_Pregnancies.json


Output saved: N:\CancerEpidem\BrBreakthrough\DeliveryProcess\Data_Output_Testing\Output_Pregnancies.json
