# Safety Bloods Date & Time Reconciliation

In [1]:
import csv
import json
import pprint
import datetime
import os
import re
from matplotlib import pyplot

%matplotlib inline

## Setup Output Folder

_Global_ scope variables:

In [2]:
outputfolder = './test/safety/output'

Create the __output folder__ in which all output files will be placed.

In [3]:
if not os.path.exists(outputfolder):
    os.makedirs(outputfolder, 0o755) # owner execution 

View contents of folder:

In [4]:
%ls -l ./test

total 0
drwxr-xr-x  9 herbsca  staff  288 17 Apr 11:48 [34mpk[m[m/
drwxr-xr-x  7 herbsca  staff  224 17 Apr 14:22 [34msafety[m[m/


Define output file writing functions:

In [5]:
def writeFile(filename: str, contents: object):
    # Create function to allow for file creation in output.
    path = os.path.join(outputfolder, filename)
    with open(path, 'w') as outputfile:
        outputfile.write(pprint.pformat(contents))

def writeJSON(filename: str, contents: object):
    # Create function to allow for file creation in output.
    path = os.path.join(outputfolder, filename)
    with open(path, 'w') as outputfile:
        outputfile.write(json.dumps(contents))

## Create Mapping

Map csv file records to the csv file/subject records of medrio.

In [6]:
randfilename = './test/safety/rand_coh1.csv'

subjectMap = dict() # dictionary for randomisation to subject id.

randCol = 'DARandNum_C' # column header for randomisation number
subjectCol = 'Subject ID' # column header for subject id

with open(randfilename) as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        subjectMap[f'{row[randCol]}'] = row[subjectCol]

writeFile('subjectMapping.txt', subjectMap)

## Read Configuration File

Context variables:

In [7]:
configfilename = './test/safety/config.json'

Parse config file and establish relationships.

In [8]:
configDict = dict()

with open(configfilename) as jsonfile:
    configDict = json.load(jsonfile)

pprint.pp(configDict)

{'match': {'visit': {'Screening': 0,
                     'Period 1 Day-2': 1,
                     'Period 1 Day 2': 2,
                     'Period 2 Day 2': 4,
                     'Period 2 Day-2': 3,
                     'Period 2 Day 7/EOS': 5,
                     'Unscheduled': 6,
                     'Day -2 [Period 1]': 1,
                     'Day 2 [Period 1]': 2,
                     'Day -2 [Period 2]': 3,
                     'Day 2 [Period 2]': 4,
                     'Day 7 [Period 2] EOS': 5}}}


## Read Medrio Source File

Create dictionary mapping for each subject to allow for hash map search of data.

Context variables:

In [9]:
sourcefilename = './test/safety/source_coh1.csv'

sourceMap = dict() # dictionary for subject data

subjectidcol = 'Subject ID'
visitCol = 'Visit'

# Regular expression objects
datetimeregex = re.compile(r'dattim', flags=re.I)
coagregex = re.compile(r'coag', flags=re.I)
chemregex = re.compile(r'chem', flags=re.I)
glucregex = re.compile(r'gluc', flags=re.I)
serregex = re.compile(r'ser', flags=re.I)
haemregex = re.compile(r'haem', flags=re.I)
commregex = re.compile(r'comm', flags=re.I)

# timeregex = re.compile(r'tim', flags=re.I)
# periodRegex = re.compile(r'period.*([0-9])', flags=re.I)

Parse the source file and structure data into the __sourceMap__ dictionary variable.

In [10]:
# Read File
with open(sourcefilename) as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        # Get Period of Row
        try:
            visitid = configDict['match']['visit'][f'{row[visitCol]}']
        except:
            print(f'Error: Visit could not be matched from configuration file: {row[visitCol]}')
            continue
        
        if row[subjectidcol] not in sourceMap:
            sourceMap[f'{row[subjectidcol]}'] = dict()
        if visitid not in sourceMap[f'{row[subjectidcol]}']:
            sourceMap[f'{row[subjectidcol]}'][f'{visitid}'] = dict()
        
        # Record data values
        for key in row:
            k = 'comment'
            if row[key] == '':
                continue # skip empty cell values
            value = None
            if datetimeregex.search(key) is not None:
                # Date
                value = datetime.datetime.strptime(row.get(key).strip(), '%m/%d/%Y %H:%M')
                k = 'datetime'
            elif coagregex.search(key) is not None:
                k = 'coag'
                value = row.get(key, '')
            elif chemregex.search(key) is not None:
                k = 'chem'
                value = row.get(key, '')
            elif glucregex.search(key) is not None:
                k = 'gluc'
                value = row.get(key, '')
            elif serregex.search(key) is not None:
                k = 'ser'
                value = row.get(key, '')
            elif haemregex.search(key) is not None:
                k = 'haem'
                value = row.get(key, '')
            elif commregex.search(key) is not None:
                k = 'comm'
                value = row.get(key, '')
            else:
                k = key
                value = row.get(key, '')
            
            sourceMap[f'{row[subjectidcol]}'][f'{visitid}'][f'{k}'] = value

writeFile('medrioData.txt', sourceMap)
pprint.pp(sourceMap)

{'S007-14101': {'0': {'\ufeffMedrio ID': '0007',
                      'Group': 'Cohort 1',
                      'Subject ID': 'S007-14101',
                      'Visit': 'Screening',
                      'Form': 'Safety Blood Collection',
                      'datetime': datetime.datetime(2019, 11, 7, 16, 6),
                      'coag': 'Yes',
                      'chem': 'Yes',
                      'ser': 'Yes',
                      'haem': 'Yes',
                      'comm': 'pt had not fasted when he arrived, returned in '
                              'the afternoon after fasting 8hrs'},
                '1': {'\ufeffMedrio ID': '0007',
                      'Group': 'Cohort 1',
                      'Subject ID': 'S007-14101',
                      'Visit': 'Day -2 [Period 1]',
                      'Form': 'Safety Blood Collection',
                      'datetime': datetime.datetime(2019, 12, 1, 9, 55),
                      'chem': 'Yes',
                      'haem':

## Read Comparison File

Context variables:

In [11]:
comparisonMap = dict() # emtpy dictionary

comparisonfilename = './test/safety/comparison_coh1.csv'

#periodRegex = re.compile(r'period.*([0-9])', flags=re.I)
coagregex = re.compile(r'coag', flags=re.I)
chemregex = re.compile(r'chem', flags=re.I)
glucregex = re.compile(r'gluc', flags=re.I) # compared if 'TestPanel' is blank
serregex = re.compile(r'ser', flags=re.I)
haemregex = re.compile(r'haem', flags=re.I)
commregex = re.compile(r'comm', flags=re.I)

# scheduleCol = 'Scheduled time (hrs post dose)'
subjectCol = 'PatientID1'
dateCol = 'CollDate'
timeCol = 'CollTime'
visitCol = 'Visit'
panelCol = 'TestPanel'
nameCol = 'ResultName'

Parse comparison file data and structure into the __comparisonMap__ dictionary

In [16]:
# Read File
with open(comparisonfilename) as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row[dateCol] == '':
            continue # Skip empty rows
        
        # Interpret Visit
        try:
            visitid = configDict['match']['visit'][f'{row[visitCol]}']
        except:
            print(f'Error: Visit could not be matched from configuration file: {row[visitCol]}')
            continue
            
        if row[subjectCol] not in comparisonMap:
            comparisonMap[f'{row[subjectCol]}'] = dict()
        if visitid not in comparisonMap[f'{row[subjectCol]}']:
            comparisonMap[f'{row[subjectCol]}'][visitid] = dict()
        
        # Fetch infomation
        info = dict()
        timeval = {
            'date': datetime.datetime.strptime(row.get(dateCol).strip(), '%d/%m/%Y'),
            'time': datetime.datetime.strptime(row.get(timeCol).strip(), '%H:%M:%S')
        }
        k = None # test name
        
        if coagregex.search(row[panelCol]) is not None:
            k = 'coag'
            info['coag'] = {**timeval}
        elif chemregex.search(row[panelCol]) is not None:
            k = 'chem'
            info['chem'] = {**timeval}
        elif serregex.search(row[panelCol]) is not None:
            k = 'ser'
            info['ser'] = {**timeval}
        elif haemregex.search(row[panelCol]) is not None:
            k = 'haem'
            info['haem'] = {**timeval}
        elif row[panelCol] == '':
            if glucregex.search(row[nameCol]) is not None:
                k = 'gluc'
                info['gluc'] = {**timeval}
            else:
                continue # not interested
        else:
            continue # not interested
        
        # Append new information if verified to be identical
        if k not in comparisonMap[f'{row[subjectCol]}'][visitid]:
            # print(comparisonMap[f'{row[subjectCol]}'][visitid])
            comparisonMap[f'{row[subjectCol]}'][visitid] = info # remember values
        elif comparisonMap[f'{row[subjectCol]}'][visitid][k]['date'] != info[k]['date']:
            # the date in script does not match previously recorded dates
            error = {
                'subject': row[subjectCol],
                'visit': row[visitCol],
                **info
            }
            pprint.pp(error)
            pprint.pp(comparisonMap[f'{row[subjectCol]}'][visitid])
            raise Exception("ERROR: date for data point is not consistent within file!")
        elif comparisonMap[f'{row[subjectCol]}'][visitid][k]['time'] != info[k]['time']:
            # the date in script does not match previously recorded dates
            error = {
                'subject': row[subjectCol],
                'visit': row[visitCol],
                **info
            }
            pprint.pp(error)
            pprint.pp(comparisonMap[f'{row[subjectCol]}'][visitid])
            raise Exception("ERROR: time for data point is not consistent within file!")
        else:
            # append information
            comparisonMap[f'{row[subjectCol]}'][visitid].update(info)
            #pprint.pp(comparisonMap[f'{row[subjectCol]}'][visitid])
            

writeFile('comparisonData.txt', comparisonMap)
pprint.pp(comparisonMap)

{'subject': 'S025',
 'visit': 'Period 1 Day-2',
 'chem': {'date': datetime.datetime(2019, 12, 4, 0, 0),
          'time': datetime.datetime(1900, 1, 1, 10, 25)}}
{'chem': {'date': datetime.datetime(2019, 12, 1, 0, 0),
          'time': datetime.datetime(1900, 1, 1, 9, 26)}}


Exception: ERROR: date for data point is not consistent within file!