In [1]:
import os, json, csv
import pandas as pd
from django.apps import apps as django_apps
from django.core.exceptions import ValidationError
from edc_base.utils import get_utcnow, age
from dateutil.relativedelta import relativedelta
from django.db.models import OuterRef, Subquery
from edc_constants.constants import MALE, FEMALE, YES, POS, NEG
from collections import defaultdict
from tqdm import tqdm
from pre_flourish.helper_classes import MatchHelper
from flourish_prn.models import ChildOffStudy
from flourish_child.models import Appointment as ChildAppointment
from flourish_follow.models import Contact
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

In [2]:
def get_child_consent(subject_identifier):
    try:
        child_consent = CaregiverChildConsent.objects.filter(
            subject_identifier=subject_identifier,
        ).latest('consent_datetime')
    except CaregiverChildConsent.DoesNotExist:
        pass
    else:
        return child_consent

In [3]:
def check_offstudy(subject_identifier):
    offstudy = ChildOffStudy.objects.filter(
        subject_identifier=subject_identifier)
    return offstudy.exists()

In [4]:
def get_fu_schedule_name(cohort_name):
    schedule_names = CohortSchedules.objects.filter(
        cohort_name=cohort_name,
        onschedule_model__startswith='flourish_child',
        schedule_type__in=['followup', 'sq_followup']).values_list('schedule_name', flat=True)
    return schedule_names

In [5]:
def has_done_fu(subject_identifier, cohort_name):
    fu_appts = ChildVisit.objects.filter(
        subject_identifier=subject_identifier,
        schedule_name__in=get_fu_schedule_name(cohort_name))
    return fu_appts.exists()

In [6]:
def get_init_contact_datetime(subject_identifier):
    try:
        contact = Contact.objects.filter(
            subject_identifier=subject_identifier).earliest('contact_datetime')
    except Contact.DoesNotExist:
        return get_utcnow().date()
    else:
        return getattr(contact, 'contact_datetime', get_utcnow()).date()

In [7]:
age_range_to_group = [(7, 9.5), (9.5, 14), (14, 17), (17, 21)]

def bcpp_age_range(age):
    if age is None:
        return None
    for age_range in age_range_to_group:
        if age_range[0] <= age < age_range[1]:
            return age_range
    return None

In [8]:
# Get matrix groupings for a list of participants
match_helper_cls = MatchHelper()
gender_map = {MALE: 'male', FEMALE: 'female'}

def get_matrix_grouping(participants, cohort='cohort_c', use_reference=False, is_bcpp=False):
    bmi_age_data = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
    subject_data = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
    reference_dt = get_utcnow().date()

    for participant in tqdm(participants):
        if use_reference:
            reference_dt = get_init_contact_datetime(
                subject_identifier=participant.child_visit.subject_identifier, )
        child_identifier = participant.child_visit.subject_identifier
        child_consent = get_child_consent(child_identifier, )
        child_offstudy = check_offstudy(child_identifier, )
        if not child_consent or child_offstudy:
            continue
        child_dob = getattr(child_consent, 'child_dob', None)
        gender = getattr(child_consent, 'gender', None)
        child_height = getattr(participant, 'child_height', 0) # measured in centimeters
        child_weight_kg = getattr(participant, 'child_weight_kg', 0)
        if all([child_height, child_weight_kg, child_dob, gender, ]):
            _age = age(child_dob, reference_dt)
            _age = _age.years + (_age.months / 12)
            _bmi = child_weight_kg / pow((child_height / 100), 2) # formulae used kg/m^2
            gender = gender_map.get(gender)
            bmi_group = match_helper_cls.bmi_group(_bmi)
            age_range = match_helper_cls.age_range(_age)
            if is_bcpp:
                age_range = bcpp_age_range(_age)
            if not (bool(bmi_group) and bool(age_range)):
                continue
            bmi_age_data[bmi_group][age_range][gender] += 1

            subject_data[bmi_group][age_range][gender].append(child_identifier)
    return bmi_age_data, subject_data

In [9]:
def get_current_cohort_name(subject_identifier):
    try:
        current_cohort = Cohort.objects.get(
            subject_identifier=subject_identifier, current_cohort=True)
    except Cohort.DoesNotExist:
        return None
    else:
        return current_cohort.name

In [32]:
def generate_matrix_dict(bmi_age_data, subject_data):
    export_data = []
    sidxs_heu = []
    no_fu_sidxs = []
    for bmi_group, age_data in bmi_age_data.items():
        for age_group, gender_data in age_data.items():
            for gender_group, count in gender_data.items():
                f_count = 0
                fu_sidx = []
                subject_identifiers = subject_data[bmi_group][age_group][gender_group]
                for sidx in subject_identifiers:
                    cohort_name = get_current_cohort_name(sidx)
                    if has_done_fu(sidx, cohort_name):
                        f_count += 1
                        fu_sidx.append(sidx)
                    else:
                        no_fu_sidxs.append(sidx)
                sidxs_heu.extend(subject_identifiers)
                export_data.append(
                    {'bmi_group': bmi_group,
                     'age_group': age_group,
                     'gender_group': gender_group,
                     'count': len(subject_identifiers),
                     'subject_identifiers': subject_identifiers,
                     'has_fu': f_count,
                     'fu_sidx': fu_sidx })
    return export_data, sidxs_heu, no_fu_sidxs

In [33]:
""" Returns a list of participants taken off study during the
    ANC enrolment i.e. before the child has been delivered
    or enrolled on study.
"""
caregiver_offstudy_sidx = CaregiverOffStudy.objects.values_list('subject_identifier', flat=True)
deliveries = MaternalDelivery.objects.values_list('child_subject_identifier', flat=True)
anc_enrols = AntenatalEnrollment.objects.exclude(
    child_subject_identifier__in=deliveries).filter(subject_identifier__in=caregiver_offstudy_sidx)
anc_offstudy_before_del = anc_enrols.values_list('child_subject_identifier', flat=True)

In [34]:
offstudies = ChildOffStudy.objects.values_list('subject_identifier', flat=True)

def anc_exposure_list(exposure_status=POS):
    anc_sidxs = []
    ancs = AntenatalEnrollment.objects.exclude(child_subject_identifier__in=offstudies)
    for anc_enrol in ancs:
        if anc_enrol.child_subject_identifier in anc_offstudy_before_del:
            continue
        try:
            onsch = OnScheduleCohortAAntenatal.objects.get(
                subject_identifier=anc_enrol.subject_identifier,
                child_subject_identifier=anc_enrol.child_subject_identifier)
        except OnScheduleCohortAAntenatal.DoesNotExist:
            print(anc_enrol.subject_identifier)
        try:
            hivtest = HIVRapidTestCounseling.objects.get(
                maternal_visit__subject_identifier=anc_enrol.subject_identifier,
                maternal_visit__schedule_name=onsch.schedule_name,
                rapid_test_done=YES)
        except HIVRapidTestCounseling.DoesNotExist:
            hiv_status = anc_enrol.enrollment_hiv_status
        else:
            hiv_status = hivtest.result
        if hiv_status == exposure_status:
            anc_sidxs.append(anc_enrol.child_subject_identifier)
    return anc_sidxs

In [35]:
# HEU matrix breakdown

exposed_participants = ChildDataset.objects.filter(
    infant_hiv_exposed__in=['Exposed', 'exposed']).values_list('study_child_identifier', flat=True)

heu_sidx = CaregiverChildConsent.objects.filter(
    study_child_identifier__in=exposed_participants).exclude(
    subject_identifier__in=offstudies).exclude(
    subject_identifier__in=anc_offstudy_before_del).values_list('subject_identifier', flat=True)
heu_sidx = list(set(heu_sidx))

primary_heu_sidx = Cohort.objects.filter(
    subject_identifier__in=heu_sidx, current_cohort=True).exclude(name__icontains='sec').values_list(
    'subject_identifier', flat=True)
primary_heu_sidx = list(set(primary_heu_sidx))

print('"Total HEU":', len(heu_sidx))
print('"Total HEU Primary":', len(primary_heu_sidx))

latest_measurements = ChildClinicalMeasurements.objects.filter(
    child_visit__subject_identifier__in=primary_heu_sidx).filter(
    child_visit__subject_identifier=OuterRef(
        'child_visit__subject_identifier')).order_by('-report_datetime').values('id')[:1]
participants = ChildClinicalMeasurements.objects.filter(
    id=Subquery(latest_measurements), ).select_related('child_visit')

bmi_age_data, subject_data = get_matrix_grouping(participants, is_bcpp=True)
export_data, sidxs_heu, no_fu_sidxs = generate_matrix_dict(bmi_age_data, subject_data)

"Total HEU": 740
"Total HEU Primary": 267


100%|██████████| 267/267 [00:02<00:00, 90.47it/s]


In [36]:
cidxs = Cohort.objects.exclude(
    subject_identifier__in=offstudies).exclude(subject_identifier__in=anc_offstudy_before_del).exclude(
    name='cohort_a').filter(current_cohort=True, exposure_status='EXPOSED').values_list(
    'subject_identifier', flat=True)

In [37]:
difference = set(primary_heu_sidx) - set(cidxs)
len(difference)

5

In [38]:
# Check participants not on the matrix pool and reasons:
# B142-040990012-2-10 - offstudy, does not have clinical measurements
missing = {}
matrix_data = {}
for pidx in difference:
    offstudy = ChildOffStudy.objects.filter(subject_identifier=pidx).exists()
    study_status = 'offstudy' if offstudy else 'onstudy'
    try:
        latest_measurement = ChildClinicalMeasurements.objects.filter(
            child_visit__subject_identifier=pidx).latest('report_datetime')
    except ChildClinicalMeasurements.DoesNotExist:
        print(f'missing measurements for {pidx}, {study_status}')
        missing.update({'pid': pidx, 'study_status': study_status})
    else:
        child_height = getattr(latest_measurement, 'child_height', 0)
        child_weight_kg = getattr(latest_measurement, 'child_weight_kg', 0)
        child_consent = get_child_consent(pidx)
        child_dob = getattr(child_consent, 'child_dob', None)
        gender = getattr(child_consent, 'gender', None)
        if all([child_height, child_weight_kg, child_dob, gender, ]):
            _age = age(child_dob, get_utcnow())
            _age = _age.years + (_age.months / 12)
            _bmi = child_weight_kg / pow((child_height / 100), 2)
            print(f'{pidx}: {_age}, {_bmi}, {gender}, {study_status}, {has_done_fu(pidx, "cohort_c")}')
        else:
            print(f'incomplete, {pidx}: {child_height}, {child_weight_kg}, {child_dob}, {gender}')

B142-040990809-1-10: 5.083333333333333, 15.61792666364870982344952467, M, onstudy, False
B142-040990669-9-10: 5.083333333333333, 16.03579268608491599635061215, F, onstudy, False
B142-040990783-8-10: 5.083333333333333, 14.51597700065981713639362805, F, onstudy, False
B142-040990789-5-10: 5.166666666666667, 14.8, F, onstudy, False
B142-040990875-2-10: 5.166666666666667, 15.01565503131006262012524025, M, onstudy, False


In [40]:
# Followup(s) reports
# age_group: [7, 9.5) provide list of PIDs who will be 10years or older between Apr 1st - Mar 31st.
# age_group: [9.5, 14) provide how many had their FUs at/after 10years of age.
# bmi: > 18 and age_group: [14, 17] share list of PIDs in the group.
import pytz
import datetime

def generate_fu_reports(dataset):
    gt_10 = []
    fu_at_10 = []
    pid_list = []

    for data in dataset:
        age_group = data.get('age_group')
        bmi_group = data.get('bmi_group')

        if age_group == (7, 9.5):
            sidxs = data.get('subject_identifiers', [])
            reference_dt = datetime.date(2025, 3, 31)
            for sidx in sidxs:
                child_consent = get_child_consent(sidx)
                child_dob = getattr(child_consent, 'child_dob', None)
                child_age = age(child_dob, reference_dt)
                child_age = child_age.years + (child_age.months / 12)
                _age = age(child_dob, get_utcnow().date())
                _age = _age.years + (_age.months / 12)
                if child_age >= 10:
                    gt_10.append({'subject_identifier': sidx,
                                  'bmi_group': bmi_group,
                                  'child_age': round(_age, 2),
                                  'age_at_mar': round(child_age, 2), })

        if age_group == (9.5, 14):
            fu_sidxs = data.get('fu_sidx', [])
            for sidx in fu_sidxs:
                cohort_name = get_current_cohort_name(sidx)
                schedule_names = get_fu_schedule_name(cohort_name)
                fu_visit = ChildVisit.objects.get(subject_identifier=sidx, schedule_name__in=schedule_names)
                fu_report_datetime = fu_visit.report_datetime.astimezone(pytz.timezone('Africa/Gaborone'))
                child_consent = get_child_consent(sidx)
                child_dob = getattr(child_consent, 'child_dob', None)
                child_age_at_fu = age(child_dob, fu_report_datetime.date())
                child_age_at_fu = child_age_at_fu.years + (child_age_at_fu.months / 12)
                _age = age(child_dob, get_utcnow().date())
                _age = _age.years + (_age.months / 12)
                if child_age_at_fu >= 10:
                    fu_at_10.append({'subject_identifier': sidx,
                                     'bmi_group': bmi_group,
                                     'child_age': round(_age, 2),
                                     'age_at_fu': round(child_age_at_fu, 2), })

        if bmi_group == '>18' and age_group == (14, 17):
            pid_list.extend(data.get('subject_identifiers'))

    return gt_10, fu_at_10, pid_list

heu_gt_10, heu_fu_at10, heu_pid_list = generate_fu_reports(export_data)

In [122]:
# C HEU No Followup details
import pytz
tz = pytz.timezone('Africa/Gaborone')

no_fu_data = []
for sidx in no_fu_sidxs:
    consent = CaregiverChildConsent.objects.filter(subject_identifier=sidx).earliest('consent_datetime')
    measurements = ChildClinicalMeasurements.objects.filter(
        child_visit__subject_identifier=sidx)
    child_age = age(consent.child_dob, get_utcnow().date())
    child_age = child_age.years + (child_age.months / 12)
    child_bmi = None
    if measurements:
        measurement = measurements.latest('report_datetime')
        child_bmi = measurement.child_weight_kg / pow((measurement.child_height/100), 2)
    no_fu_data.append({'subject_identifier': sidx,
                       'consent_datetime': consent.consent_datetime.astimezone(tz).strftime('%d-%m-%Y %H:%M'),
                       'child_age': str(round(child_age, 2)),
                       'child_bmi': str(round(child_bmi, 2)),
                       'gender': consent.gender})

import csv
keys = no_fu_data[0].keys()
with open('primary_heu_no_fus.csv', 'w', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(no_fu_data)

In [39]:
import csv
keys = export_data[0].keys()
with open('primary_heu_matrix.csv', 'w', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(export_data)

In [136]:
# HEU breakdown for participant older than 10years.

exposed = ChildDataset.objects.filter(
    infant_hiv_exposed__in=['Exposed', 'exposed']).values_list('study_child_identifier', flat=True)

cohort_c = get_utcnow() - relativedelta(years=10, months=1)
sidxs = CaregiverChildConsent.objects.filter(
    study_child_identifier__in=exposed, child_dob__lte=cohort_c).values_list('subject_identifier', flat=True)
sidxs = list(set(sidxs))
print('"> 10 years, HEU":', len(sidxs))

primary_sidxs = Cohort.objects.filter(
    subject_identifier__in=sidxs, current_cohort=True).exclude(name__icontains='_sec').values_list('subject_identifier', flat=True)
primary_sidxs = list(set(primary_sidxs))
print('"> 10 years, [Primary cohort] HEU":', len(primary_sidxs))

primary_c = Cohort.objects.filter(
    subject_identifier__in=sidxs, name='cohort_c', current_cohort=True).values_list('subject_identifier', flat=True)
primary_c = list(set(primary_c))
print('"> 10 years, [Primary C] HEU":', len(primary_c))

primary_b = Cohort.objects.filter(
    subject_identifier__in=sidxs, name='cohort_b', current_cohort=True).values_list('subject_identifier', flat=True)
primary_b = list(set(primary_b))
print('"> 10 years, [Primary B] HEU":', len(primary_b))

sec_aims = Cohort.objects.filter(
    subject_identifier__in=sidxs, current_cohort=True, name__icontains='_sec').values_list('subject_identifier', flat=True)
sec_aims = list(set(sec_aims))
print('"> 10 years, [Secondary aims] HEU":', len(sec_aims))

"> 10 years, HEU": 529
"> 10 years, [Primary cohort] HEU": 120
"> 10 years, [Primary C] HEU": 108
"> 10 years, [Primary B] HEU": 12
"> 10 years, [Secondary aims] HEU": 409


In [9]:
# HEU breakdown for participants 9.5 years old - 10years.
upper_limit = get_utcnow() - relativedelta(years=9, months=6)
lower_limit = get_utcnow() - relativedelta(years=10, )
sidxs = CaregiverChildConsent.objects.filter(
    study_child_identifier__in=exposed,
    child_dob__range=[lower_limit, upper_limit]).values_list('subject_identifier', flat=True)
sidxs = list(set(sidxs))
len(sidxs)

73

In [141]:
# HUU matrix breakdown

unexposed_participants = ChildDataset.objects.filter(
    infant_hiv_exposed__in=['Unexposed', 'unexposed'])

bcpp_unexposed = unexposed_participants.filter(
    study_child_identifier__icontains='P').values_list('study_child_identifier', flat=True)

unexposed_participants = unexposed_participants.exclude(
    study_child_identifier__in=bcpp_unexposed).values_list('study_child_identifier', flat=True)

huu_sidxs = CaregiverChildConsent.objects.exclude(
    subject_identifier__in=offstudies).filter(
    study_child_identifier__in=unexposed_participants).values_list('subject_identifier', flat=True)

bcpp_sidxs = CaregiverChildConsent.objects.exclude(
    subject_identifier__in=offstudies).filter(
    study_child_identifier__in=bcpp_unexposed).values_list('subject_identifier', flat=True)

huu_sidxs = list(set(huu_sidxs))
bcpp_sidxs = list(set(bcpp_sidxs))
print('"Total Prior Participants HUU":', len(huu_sidxs))
print('"Total BCPP HUU":', len(bcpp_sidxs))

final_huu = huu_sidxs + bcpp_sidxs

primary_huu = Cohort.objects.filter(
    subject_identifier__in=final_huu, current_cohort=True).exclude(name__icontains='sec').values_list(
    'subject_identifier', flat=True)
primary_huu = list(set(primary_huu))
print('"Total Primary HUU":', len(primary_huu))

c_primary_huu = Cohort.objects.filter(
    subject_identifier__in=final_huu,
    name='cohort_c',
    current_cohort=True).values_list('subject_identifier', flat=True)

b_primary_huu = Cohort.objects.filter(
    subject_identifier__in=final_huu,
    name='cohort_b',
    current_cohort=True).values_list('subject_identifier', flat=True)

a_primary_huu = Cohort.objects.filter(
    subject_identifier__in=final_huu,
    name='cohort_a',
    current_cohort=True).values_list('subject_identifier', flat=True)

c_primary_huu = set(c_primary_huu)
b_primary_huu = list(set(b_primary_huu))
a_primary_huu = list(set(a_primary_huu))

print('"> 10 years, [Primary cohort C] HUU":', len(c_primary_huu))
print('"[Primary cohort B] HUU":', len(b_primary_huu))
print('"[Primary cohort A] HUU":', len(a_primary_huu))


"Total Prior Participants HUU": 157
"Total BCPP HUU": 45
"Total Primary HUU": 202
"> 10 years, [Primary cohort C] HUU": 138
"[Primary cohort B] HUU": 63
"[Primary cohort A] HUU": 1


In [142]:
# HUU Prior participants matrix breakdown
latest_measurements = ChildClinicalMeasurements.objects.filter(
    child_visit__subject_identifier__in=primary_huu).filter(
    child_visit__subject_identifier=OuterRef(
        'child_visit__subject_identifier')).order_by('-report_datetime').values('id')[:1]
participants = ChildClinicalMeasurements.objects.filter(
    id=Subquery(latest_measurements), ).select_related('child_visit')

bmi_age_data, subject_data = get_matrix_grouping(participants, is_bcpp=True)
export_data, sidxs_huu, no_fu_sidxs = generate_matrix_dict(bmi_age_data, subject_data)

100%|██████████| 202/202 [00:02<00:00, 87.63it/s]


In [148]:
difference = set(primary_huu) - set(sidxs_huu)
len(difference)

40

In [146]:
import csv
keys = export_data[0].keys()
with open('primary_huu_matrix.csv', 'w', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(export_data)

In [150]:
# HUU No Followup details
import pytz
tz = pytz.timezone('Africa/Gaborone')

no_fu_data = []
for sidx in no_fu_sidxs:
    consent = CaregiverChildConsent.objects.filter(subject_identifier=sidx).earliest('consent_datetime')
    measurements = ChildClinicalMeasurements.objects.filter(
        child_visit__subject_identifier=sidx)
    child_age = age(consent.child_dob, get_utcnow().date())
    child_age = child_age.years + (child_age.months / 12)
    child_bmi = None
    if measurements:
        measurement = measurements.latest('report_datetime')
        child_bmi = measurement.child_weight_kg / pow((measurement.child_height/100), 2)
    no_fu_data.append({'subject_identifier': sidx,
                       'consent_datetime': consent.consent_datetime.astimezone(tz).strftime('%d-%m-%Y %H:%M'),
                       'child_age': str(round(child_age, 2)),
                       'child_bmi': str(round(child_bmi, 2)),
                       'gender': consent.gender})

import csv
keys = no_fu_data[0].keys()
with open('primary_huu_no_fus.csv', 'w', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(no_fu_data)

In [165]:
# HUU BCPP participants matrix breakdown
latest_measurements = ChildClinicalMeasurements.objects.filter(
    child_visit__subject_identifier__in=bcpp_sidxs).filter(
    child_visit__subject_identifier=OuterRef(
        'child_visit__subject_identifier')).order_by('-report_datetime').values('id')[:1]
participants = ChildClinicalMeasurements.objects.filter(
    id=Subquery(latest_measurements), ).select_related('child_visit')

bmi_age_data, subject_data  = get_matrix_grouping(participants, is_bcpp=True)
export_data, sidxs_huu, no_fu_sidxs = generate_matrix_dict(bmi_age_data, subject_data)

import csv
keys = export_data[0].keys()
with open('bcpp_huu_matrix.csv', 'w', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(export_data)

100%|██████████| 45/45 [00:00<00:00, 79.52it/s]


In [166]:
difference = list(set(bcpp_sidxs) - set(sidxs_huu))

In [149]:
missing = {}
matrix_data = {}
for pidx in difference:
    offstudy = ChildOffStudy.objects.filter(subject_identifier=pidx).exists()
    study_status = 'offstudy' if offstudy else 'onstudy'
    try:
        latest_measurement = ChildClinicalMeasurements.objects.filter(
            child_visit__subject_identifier=pidx).latest('report_datetime')
    except ChildClinicalMeasurements.DoesNotExist:
        print(f'missing measurements for {pidx}, {study_status}')
        missing.update({'pid': pidx, 'study_status': study_status})
    else:
        child_height = getattr(latest_measurement, 'child_height', 0)
        child_weight_kg = getattr(latest_measurement, 'child_weight_kg', 0)
        child_consent = get_child_consent(pidx,)
        child_dob = getattr(child_consent, 'child_dob', None)
        gender = getattr(child_consent, 'gender', None)
        if all([child_height, child_weight_kg, child_dob, gender, ]):
            _age = age(child_dob, get_utcnow())
            _age = _age.years + (_age.months / 12)
            _bmi = child_weight_kg / pow((child_height / 100), 2)
            print(f'{pidx}: {_age}, {_bmi}, {gender}, {study_status}, {has_done_fu(pidx, "cohort_c")}')
        else:
            print(f'incomplete, {pidx}: {child_height}, {child_weight_kg}, {child_dob}, {gender}')

B142-040990828-1-10: 6.916666666666667, 15.33574851299443737875926568, M, onstudy, False
B142-040990505-5-10: 6.416666666666667, 11.06770833333333333333333333, M, onstudy, False
B142-040990425-6-10: 6.75, 14.12918108419838523644752018, M, onstudy, False
B142-040990484-3-10: 6.666666666666667, 22.49134948096885813148788927, F, onstudy, False
B142-040990457-9-10: 6.416666666666667, 13.74471357170319108035371011, F, onstudy, False
B142-040990475-1-10: 6.75, 15.82673885880882965431070387, F, onstudy, False
B142-040990352-2-10: 6.333333333333333, 14.91431010085128366388895128, M, onstudy, False
B142-040990468-6-10: 6.333333333333333, 13.47293239185131077022968915, F, onstudy, False
B142-040991276-2-10: 10.833333333333334, 14.96445940890385334829779274, F, onstudy, False
B142-040990096-5-70: 6.416666666666667, 13.89418647768213110271166731, M, onstudy, False
B142-040990364-7-10: 6.75, 15.04805586333067072070911387, F, onstudy, False
B142-040990513-9-10: 6.166666666666667, 14.1143141829534370

In [83]:
cidxs = Cohort.objects.filter(
    current_cohort=True, exposure_status='UNEXPOSED').values_list('subject_identifier', flat=True)

difference = set(cidxs) - set(huu_sidxs)
len(difference)

4

In [84]:
# Check participants not on the cohort C HUU matrix pool and reasons:
# B142-040991049-3-10 - offstudy, does not have clinical measurements
missing = {}
matrix_data = {}
for pidx in difference:
    offstudy = ChildOffStudy.objects.filter(subject_identifier=pidx).exists()
    study_status = 'offstudy' if offstudy else 'onstudy'
    try:
        latest_measurement = ChildClinicalMeasurements.objects.filter(
            child_visit__subject_identifier=pidx).latest('report_datetime')
    except ChildClinicalMeasurements.DoesNotExist:
        print(f'missing measurements for {pidx}, {study_status}')
        missing.update({'pid': pidx, 'study_status': study_status})
    else:
        child_height = getattr(latest_measurement, 'child_height', 0)
        child_weight_kg = getattr(latest_measurement, 'child_weight_kg', 0)
        child_consent = get_child_consent(pidx)
        child_dob = getattr(child_consent, 'child_dob', None)
        gender = getattr(child_consent, 'gender', None)
        if all([child_height, child_weight_kg, child_dob, gender, ]):
            _age = age(child_dob, get_utcnow())
            _age = _age.years + (_age.months / 12)
            _bmi = child_weight_kg / pow((child_height / 100), 2)
            print(f'{pidx}: {_age}, {_bmi}, {gender}, {study_status}, {has_done_fu(pidx, "cohort_c")}')
        else:
            print(f'incomplete, {pidx}: {child_height}, {child_weight_kg}, {child_dob}, {gender}')

missing measurements for B142-040991049-3-10, offstudy
B142-040990076-7-10: 13.0, 16.40603566529492455418381344, M, offstudy, False
B142-040990347-2-10: 13.083333333333334, 17.93902746283698664651045603, M, onstudy, True
B142-040990321-7-10: 12.25, 18.30808080808080808080808081, F, offstudy, False


In [18]:
# HEU cohort C secondary aims eligibility to cohort C primary
from flourish_caregiver.helper_classes.cohort_assignment import CohortAssignment

secondary_aims = Cohort.objects.filter(
    exposure_status='EXPOSED', name='cohort_c_sec', current_cohort=True).values_list(
    'subject_identifier', flat=True)
secondary_aims = list(set(secondary_aims))
offstudy_pidx = []
eligible = []
ineligible = []
for pidx in tqdm(secondary_aims):
    offstudy = ChildOffStudy.objects.filter(subject_identifier=pidx).exists()
    if offstudy:
        offstudy_pidx.append(pidx)
        continue
    # Check eligibility for primary cohort
    try:
        child_consent = CaregiverChildConsent.objects.filter(
            subject_identifier=pidx).latest('consent_datetime')
    except CaregiverChildConsent.DoesNotExist:
        print(f'missing consent {pidx}')
        continue
    else:
        child_dataset = getattr(child_consent, 'child_dataset', None)
        try:
            maternal_dataset_obj = MaternalDataset.objects.get(
                study_maternal_identifier=getattr(
                    child_dataset, 'study_maternal_identifier', None))
        except MaternalDataset.DoesNotExist:
            pass
        else:
            arv_regimen = getattr(
                    maternal_dataset_obj, 'mom_pregarv_strat', None)
            cohort = CohortAssignment(
                child_dob=child_dob,
                enrolment_dt=get_utcnow().date(),
                child_hiv_exposure=getattr(
                    child_dataset, 'infant_hiv_exposed', None),
                arv_regimen=arv_regimen, )
            if cohort.child_age > 10:
                if cohort.hiv_exposed_uninfected and cohort.art_3drug_combination:
                    eligible.append(pidx)
                    continue
            ineligible.append({'pid': pidx, 'age': cohort.child_age, 'arv_regimen': arv_regimen})


100%|██████████| 391/391 [00:07<00:00, 53.11it/s]


In [19]:
print(f'Total HEU cohort C secondary aims enrolled: {len(secondary_aims)}')
print(f'Offstudy pids: {len(offstudy_pidx)}')
print(f'Eligible pids: {len(eligible)}')
print(f'Ineligible pids: {len(ineligible)}')

Total HEU cohort C secondary aims enrolled: 391
Offstudy pids: 11
Eligible pids: 306
Ineligible pids: 74


In [20]:
latest_measurements = ChildClinicalMeasurements.objects.filter(
    Q(child_visit__subject_identifier__in=eligible) & Q(
        child_visit__subject_identifier=OuterRef(
            'child_visit__subject_identifier'))).order_by('-report_datetime').values('id')[:1]
participants = ChildClinicalMeasurements.objects.filter(
    id=Subquery(latest_measurements), ).select_related('child_visit')

bmi_age_data, subject_data  = get_matrix_grouping(participants, cohort='cohort_c_sec')
export_data, sidxs_heu = generate_matrix_dict(bmi_age_data, subject_data)

100%|██████████| 305/305 [00:03<00:00, 84.53it/s]


In [21]:
import csv
keys = export_data[0].keys()
with open('sec_heu_matrix.csv', 'w', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(export_data)

In [22]:
difference = set(eligible) - set(sidxs_heu)
len(difference)

10

In [23]:
# Check C SEC participants not on the matrix pool and reasons:
missing = {}
matrix_data = {}
for pidx in difference:
    offstudy = ChildOffStudy.objects.filter(subject_identifier=pidx).exists()
    study_status = 'offstudy' if offstudy else 'onstudy'
    try:
        latest_measurement = ChildClinicalMeasurements.objects.filter(
            child_visit__subject_identifier=pidx).latest('report_datetime')
    except ChildClinicalMeasurements.DoesNotExist:
        print(f'missing measurements for {pidx}, {study_status}')
        missing.update({'pid': pidx, 'study_status': study_status})
    else:
        child_height = getattr(latest_measurement, 'child_height', 0)
        child_weight_kg = getattr(latest_measurement, 'child_weight_kg', 0)
        child_consent = get_child_consent(latest_measurement, cohort='cohort_c_sec')
        child_dob = getattr(child_consent, 'child_dob', None)
        gender = getattr(child_consent, 'gender', None)
        if all([child_height, child_weight_kg, child_dob, gender, ]):
            _age = age(child_dob, get_utcnow())
            _age = _age.years + (_age.months / 12)
            _bmi = child_weight_kg / pow((child_height / 100), 2)
            print(f'{pidx}: {_age}, {_bmi}, {gender}, {study_status}')
        else:
            print(f'incomplete, {pidx}: {child_height}, {child_weight_kg}, {child_dob}, {gender}, {study_status}')

B142-040990744-0-10: 10.75, 14.94202988405976811953623907, M, onstudy
B142-040990283-9-10: 12.666666666666666, 14.90879576231364005297107950, M, onstudy
B142-040990797-8-10: 10.166666666666666, 14.96783740304370365142218793, M, onstudy
B142-040990862-0-10: 10.916666666666666, 14.92194674012855831037649219, F, onstudy
B142-040990212-8-10: 10.583333333333334, 14.98127340823970037453183521, M, onstudy
B142-040990571-7-10: 16.916666666666668, 14.92961174895533287354456570, M, onstudy
B142-040990028-8-10: 10.5, 14.90007767813007555963561895, F, onstudy
B142-040991015-4-10: 13.666666666666666, 17.95070066910743592980684257, M, onstudy
missing measurements for B142-040991169-9-10, onstudy
B142-040990205-2-10: 10.833333333333334, 14.90759434768796547534507747, M, onstudy


In [44]:
contacts = Contact.objects.filter(contact_success=YES,
                                  appt_scheduled=YES,
                                  appt_date__isnull=False, )
contacts = list(set(contacts.values_list('subject_identifier', flat=True)))

subquery = Cohort.objects.filter(
    subject_identifier=OuterRef('subject_identifier'),
    name='cohort_c_sec').values('subject_identifier')
c_sec_sq = Cohort.objects.annotate(
    has_c_sec=Exists(subquery)).filter(
    name='cohort_c',
    current_cohort=True,
    has_c_sec=True,
    exposure_status='EXPOSED', subject_identifier__in=contacts).values_list('subject_identifier', flat=True)

latest_measurements = ChildClinicalMeasurements.objects.filter(
    Q(child_visit__subject_identifier__in=c_sec_sq) & Q(
        child_visit__subject_identifier=OuterRef(
            'child_visit__subject_identifier'))).order_by('-report_datetime').values('id')[:1]
participants = ChildClinicalMeasurements.objects.filter(
    id=Subquery(latest_measurements), ).select_related('child_visit')

earliest_measurements = ChildClinicalMeasurements.objects.filter(
    Q(child_visit__subject_identifier__in=c_sec_sq) & Q(
        child_visit__subject_identifier=OuterRef(
            'child_visit__subject_identifier'))).order_by('report_datetime').values('id')[:1]
earliest_participants = ChildClinicalMeasurements.objects.filter(
    id=Subquery(earliest_measurements), ).select_related('child_visit')


latest_bmi_age_data, latest_subject_data  = get_matrix_grouping(participants, cohort='cohort_c')
latest_export_data, latest_sidxs_heu = generate_matrix_dict(latest_bmi_age_data, latest_subject_data)

earliest_bmi_age_data, earliest_subject_data = get_matrix_grouping(earliest_participants, cohort='cohort_c', use_reference=True)
earliest_export_data, earliest_sidxs_heu = generate_matrix_dict(earliest_bmi_age_data, earliest_subject_data)

100%|██████████| 18/18 [00:00<00:00, 68.67it/s]
100%|██████████| 18/18 [00:00<00:00, 74.77it/s]


In [47]:
import csv
keys = latest_export_data[0].keys()
with open('latest_sec_heu_matrix.csv', 'w', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(latest_export_data)

In [48]:
earliest_export_data
import csv
keys = earliest_export_data[0].keys()
with open('earliest_sec_heu_matrix.csv', 'w', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(earliest_export_data)

In [43]:
for participant in participants:
    earliest_participant = earliest_participants.get(
        child_visit__subject_identifier=participant.child_visit.subject_identifier)

    latest_bmi_age_data, latest_subject_data  = get_matrix_grouping([participant, ], cohort='cohort_c')
    latest_export_data, latest_sidxs_heu = generate_matrix_dict(latest_bmi_age_data, latest_subject_data)

    earliest_bmi_age_data, earliest_subject_data = get_matrix_grouping(
        [earliest_participant, ], cohort='cohort_c', use_reference=True)
    earliest_export_data, earliest_sidxs_heu = generate_matrix_dict(
        earliest_bmi_age_data, earliest_subject_data)
    print(latest_export_data, '\n', earliest_export_data, '\n', earliest_sidxs_heu)

100%|██████████| 1/1 [00:00<00:00, 58.45it/s]
100%|██████████| 1/1 [00:00<00:00, 64.41it/s]


[{'bmi_group': '>18', 'age_group': (14, 17), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '15-17.9', 'age_group': (14, 17), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040990346-4-10']


100%|██████████| 1/1 [00:00<00:00, 59.40it/s]
100%|██████████| 1/1 [00:00<00:00, 53.76it/s]


[{'bmi_group': '15-17.9', 'age_group': (17, 21), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '15-17.9', 'age_group': (14, 17), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040991164-0-10']


100%|██████████| 1/1 [00:00<00:00, 69.66it/s]
100%|██████████| 1/1 [00:00<00:00, 68.32it/s]


[{'bmi_group': '15-17.9', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '>18', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['C142-040990193-0-10']


100%|██████████| 1/1 [00:00<00:00, 74.17it/s]
100%|██████████| 1/1 [00:00<00:00, 64.96it/s]


[{'bmi_group': '15-17.9', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '<14.9', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040990622-8-10']


100%|██████████| 1/1 [00:00<00:00, 70.18it/s]
100%|██████████| 1/1 [00:00<00:00, 69.10it/s]


[{'bmi_group': '15-17.9', 'age_group': (14, 17), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '15-17.9', 'age_group': (14, 17), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040990197-1-10']


100%|██████████| 1/1 [00:00<00:00, 71.83it/s]
100%|██████████| 1/1 [00:00<00:00, 69.19it/s]


[{'bmi_group': '>18', 'age_group': (17, 21), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '>18', 'age_group': (14, 17), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040990618-6-10']


100%|██████████| 1/1 [00:00<00:00, 74.20it/s]
100%|██████████| 1/1 [00:00<00:00, 68.74it/s]


[{'bmi_group': '>18', 'age_group': (14, 17), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '>18', 'age_group': (14, 17), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040991079-0-10']


100%|██████████| 1/1 [00:00<00:00, 120.29it/s]
100%|██████████| 1/1 [00:00<00:00, 109.35it/s]


[{'bmi_group': '>18', 'age_group': (9.5, 14), 'gender_group': 'female', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '>18', 'age_group': (9.5, 14), 'gender_group': 'female', 'count': 1, 'has_fu': 1}] 
 ['B142-040991029-5-10']


100%|██████████| 1/1 [00:00<00:00, 70.14it/s]
100%|██████████| 1/1 [00:00<00:00, 69.38it/s]


[{'bmi_group': '>18', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '>18', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040990786-1-60']


100%|██████████| 1/1 [00:00<00:00, 67.98it/s]
100%|██████████| 1/1 [00:00<00:00, 69.62it/s]


[{'bmi_group': '<14.9', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '<14.9', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040991095-6-10']


100%|██████████| 1/1 [00:00<00:00, 72.23it/s]
100%|██████████| 1/1 [00:00<00:00, 73.18it/s]


[{'bmi_group': '15-17.9', 'age_group': (14, 17), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '15-17.9', 'age_group': (14, 17), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040990620-2-10']


100%|██████████| 1/1 [00:00<00:00, 72.91it/s]
100%|██████████| 1/1 [00:00<00:00, 69.03it/s]


[{'bmi_group': '>18', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '>18', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040990290-4-10']


100%|██████████| 1/1 [00:00<00:00, 71.52it/s]
100%|██████████| 1/1 [00:00<00:00, 66.71it/s]


[{'bmi_group': '>18', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '>18', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040990763-0-10']


100%|██████████| 1/1 [00:00<00:00, 66.67it/s]
100%|██████████| 1/1 [00:00<00:00, 68.22it/s]


[{'bmi_group': '<14.9', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '>18', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040990995-8-10']


100%|██████████| 1/1 [00:00<00:00, 73.54it/s]
100%|██████████| 1/1 [00:00<00:00, 67.41it/s]


[{'bmi_group': '15-17.9', 'age_group': (17, 21), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '15-17.9', 'age_group': (14, 17), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['C142-040990607-9-10']


100%|██████████| 1/1 [00:00<00:00, 72.17it/s]
100%|██████████| 1/1 [00:00<00:00, 70.34it/s]


[{'bmi_group': '>18', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '>18', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040990892-7-10']


100%|██████████| 1/1 [00:00<00:00, 68.83it/s]
100%|██████████| 1/1 [00:00<00:00, 70.25it/s]


[{'bmi_group': '>18', 'age_group': (14, 17), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '>18', 'age_group': (14, 17), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040990273-0-10']


100%|██████████| 1/1 [00:00<00:00, 72.11it/s]
100%|██████████| 1/1 [00:00<00:00, 70.62it/s]

[{'bmi_group': '>18', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 [{'bmi_group': '>18', 'age_group': (9.5, 14), 'gender_group': 'male', 'count': 1, 'has_fu': 1}] 
 ['B142-040990137-7-10']



