In [1]:
import os
import sys

import django

sys.path.append('../')  # add path to project root dir

os.environ["DJANGO_SETTINGS_MODULE"] = "flourish.settings"

os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

django.setup()

  * Reading config from flourish.ini
Loading Data Encryption (init)...
 * loading keys from /Users/nimrodmunatsi/source/flourish-code-space/flourish/crypto_fields
 * loading rsa.restricted.public ... Done.
 * loading rsa.restricted.private ... Done.
 * loading rsa.local.public ... Done.
 * loading rsa.local.private ... Done.
 * loading aes.local ... Done.
 * loading aes.restricted ... Done.
 * loading salt.local ... Done.
 * loading salt.restricted ... Done.
 Done loading Data Encryption (init)...


        failure to do so will cause the tasks to be retriggered before completion. 
        See https://django-q.readthedocs.io/en/latest/configure.html#retry for details.
  warn(


Loading Data Encryption ...
 * found encryption keys in /Users/nimrodmunatsi/source/flourish-code-space/flourish/crypto_fields.
 * using model django_crypto_fields.crypt.
 Done loading Data Encryption.
Loading Edc Consent ...
 * checking for site consents ...
 * registered consents 'consents' from 'flourish_caregiver'
 * registered consents 'consents' from 'flourish_child'
 * registered consents 'consents' from 'pre_flourish'
 * registered consents 'consents' from 'flourish_facet'
 * flourish_caregiver.subjectconsent 1 covering 2020-07-01 UTC to 2025-06-30 UTC
 * flourish_caregiver.subjectconsent 2 covering 2020-07-01 UTC to 2025-06-30 UTC
 * flourish_caregiver.subjectconsent 3 covering 2020-07-01 UTC to 2025-06-30 UTC
 * flourish_caregiver.subjectconsent 4 covering 2020-07-01 UTC to 2025-06-30 UTC
 * flourish_caregiver.tbadolconsent 1 covering 2020-07-01 UTC to 2025-06-30 UTC
 * flourish_caregiver.tbinformedconsent 1 covering 2020-07-01 UTC to 2025-06-30 UTC
 * flourish_child.childdum

In [2]:
from django.db.models import Count
from flourish_caregiver.models import SubjectConsent


def mothers_with_multiple_kids():
    consents = SubjectConsent.objects.annotate(
        child_count=Count('caregiverchildconsent')
    ).filter(child_count__gt=1)
    return consents.values_list('subject_identifier', flat=True)

In [3]:

from itertools import combinations


def find_objects_within_90_days(object_list):
    """
    Given a list of SubjectConsent objects, 
    return the first pair of objects found that have date difference within 90 days.
    If no such pair is found, return None.
    """
    for obj1, obj2 in combinations(object_list, 2):
        if abs((obj1.report_datetime - obj2.report_datetime).days) <= 90:
            return obj1, obj2
    return None

In [4]:
def compare_instances(instance1, instance2, exclude_fields=None):
    """
    Compare two Django model instances field by field.

    Parameters:
        instance1, instance2: The instances to compare.
        exclude_fields: A list of field names to exclude from comparison. Defaults to None.

    Returns:
        dict: A dictionary of field names and boolean values indicating whether the 
              values for that field in instance1 and instance2 are equal.
    """
    assert instance1.__class__ == instance2.__class__, "Instances must be of the same class."

    comparison_dict = {}

    field_names = [f.name for f in instance1._meta.fields]
    if exclude_fields:
        field_names = [f for f in field_names if f not in exclude_fields]

    for field in field_names:
        comparison_dict[field] = (getattr(instance1, field) == getattr(instance2, field))

    return comparison_dict


In [15]:
def add_to_mothers_with_different_data(mother, visit_crf_model, unique_visit_code, corresponding_visit_code):
    visit_tuple = tuple(sorted([unique_visit_code, corresponding_visit_code]))
    visit_str = f'({visit_tuple[0]}, {visit_tuple[1]})'
    if mother not in mothers_with_different_data:
        mothers_with_different_data[mother] = []
    for existing_entry in mothers_with_different_data[mother]:
        if visit_crf_model in existing_entry and existing_entry[visit_crf_model] == visit_str:
            return
    mothers_with_different_data[mother].append({visit_crf_model: visit_str})

In [16]:
import pytz
from tqdm import tqdm
from flourish_caregiver.models import MaternalVisit
from django.apps import apps as django_apps


mothers_with_different_data = {}
tz = pytz.timezone('Africa/Gaborone')

for mother in tqdm(mothers_with_multiple_kids()):
    unique_mother_visits_codes = list(
        set(MaternalVisit.objects.filter(subject_identifier=mother).values_list(
            'visit_code', flat=True)))
    for unique_visit_code in unique_mother_visits_codes:
        if unique_visit_code in ['2100T']:
            continue

        unique_visit = MaternalVisit.objects.filter(subject_identifier=mother,
                                                    visit_code=unique_visit_code).earliest(
            'report_datetime')
        if getattr(unique_visit, 'visits', None) is not None:
            appt = unique_visit.appointment
            visit_definition = appt.visits.get(appt.visit_code)
            ideal_timepoint = appt.timepoint_datetime

            earliest_appt_dt = (ideal_timepoint - visit_definition.rlower).astimezone(tz)
            latest_appt_dt = (ideal_timepoint + visit_definition.rupper).astimezone(tz)

            try:
                corresponding_maternal_visit = MaternalVisit.objects.filter(
                    subject_identifier=mother,
                    report_datetime__range=(earliest_appt_dt, latest_appt_dt)).exclude(
                    schedule_name=unique_visit.schedule_name).latest('report_datetime')

            except MaternalVisit.DoesNotExist:
                continue
            else:
                visit_crfs = unique_visit.visits.get(unique_visit.visit_code).crfs
                for visit_crf in visit_crfs:
                    crf_model_cls = django_apps.get_model(visit_crf.model)
                    try:
                        crf_model_objs = crf_model_cls.objects.get(
                            maternal_visit=unique_visit)
                    except crf_model_cls.DoesNotExist:
                        continue
                    try:
                        corresponding_crf_model_objs = crf_model_cls.objects.get(
                            maternal_visit=corresponding_maternal_visit
                        )
                    except crf_model_cls.DoesNotExist:
                        continue
                    exclude_fields = ['id', '_state', 'created', 'modified',
                                      'user_created', 'device_created', 'device_modified',
                                      'form_as_json', 'site', 'consent_model',
                                      'consent_version', 'maternal_visit',
                                      'report_datetime', 'user_modified',
                                      'hostname_created', 'hostname_modified', 'revision']

                    comparison_result = compare_instances(
                        crf_model_objs, corresponding_crf_model_objs,
                        exclude_fields)
                    for field, equal in comparison_result.items():
                        if not equal:
                            add_to_mothers_with_different_data(mother, visit_crf.model,
                                                               unique_visit_code, corresponding_maternal_visit.visit_code)
                            break



100%|██████████| 141/141 [00:27<00:00,  5.14it/s]


{'B142-040990669-9': [{'flourish_caregiver.sociodemographicdata': '(2002M, 3001M)'},
  {'flourish_caregiver.covid19': '(2002M, 3001M)'},
  {'flourish_caregiver.sociodemographicdata': '(2001M, 3000M)'},
  {'flourish_caregiver.covid19': '(2001M, 3000M)'},
  {'flourish_caregiver.sociodemographicdata': '(2003M, 3002M)'}],
 'B142-040990529-5': [{'flourish_caregiver.sociodemographicdata': '(1000M, 2008M)'},
  {'flourish_caregiver.medicalhistory': '(1000M, 2008M)'},
  {'flourish_caregiver.relationshipfatherinvolvement': '(1000M, 2008M)'}],
 'B142-040990464-5': [{'flourish_caregiver.sociodemographicdata': '(1000M, 2008M)'},
  {'flourish_caregiver.medicalhistory': '(1000M, 2008M)'},
  {'flourish_caregiver.relationshipfatherinvolvement': '(1000M, 2008M)'}],
 'B142-040990466-0': [{'flourish_caregiver.sociodemographicdata': '(2010M, 3000M)'},
  {'flourish_caregiver.medicalhistory': '(2010M, 3000M)'},
  {'flourish_caregiver.sociodemographicdata': '(1000M, 2003M)'},
  {'flourish_caregiver.medicalhis

In [8]:
import csv
import re


def transform_model_name(model_name):
    # Replace underscores with spaces and split by dot
    parts = model_name.replace('_', ' ').split('.')
    # Capitalize each part
    readable_parts = [part.capitalize() for part in parts]
    # Join the parts back into a single human-readable string
    return ' '.join(readable_parts)

{'B142-040990669-9': ['flourish_caregiver.sociodemographicdata_2002M',
  'flourish_caregiver.covid19_2002M',
  'flourish_caregiver.caregiverclinicalmeasurements_2100T',
  'flourish_caregiver.sociodemographicdata_2001M',
  'flourish_caregiver.covid19_2001M',
  'flourish_caregiver.sociodemographicdata_3001M',
  'flourish_caregiver.covid19_3001M',
  'flourish_caregiver.sociodemographicdata_2003M',
  'flourish_caregiver.sociodemographicdata_2002M',
  'flourish_caregiver.covid19_2002M',
  'flourish_caregiver.caregiverclinicalmeasurements_2100T',
  'flourish_caregiver.sociodemographicdata_2001M',
  'flourish_caregiver.covid19_2001M',
  'flourish_caregiver.sociodemographicdata_3001M',
  'flourish_caregiver.covid19_3001M',
  'flourish_caregiver.sociodemographicdata_2003M'],
 'B142-040990529-5': ['flourish_caregiver.sociodemographicdata_2008M',
  'flourish_caregiver.medicalhistory_2008M',
  'flourish_caregiver.relationshipfatherinvolvement_2008M',
  'flourish_caregiver.sociodemographicdata_2008

In [None]:
def format_data_to_csv(data, csv_file_path):
    # Open a new CSV file to write to
    with open(csv_file_path, mode='w', newline='') as csv_file:
        writer = csv.writer(csv_file)

        # Write headers
        writer.writerow(['Key', 'Type', 'Data'])

        # Iterate over the data and write to CSV
        for key, value in data.items():
            for item in value:
                for data_type, data_value in item.items():
                    # Transform model name to a human-readable format
                    readable_type = transform_model_name(data_type)
                    writer.writerow([key, readable_type, data_value])

In [None]:
format_data_to_csv(mothers_with_different_data, 'missmatch_matenal_crfs.csv')