# Study Summary Report
> Author: Clayton Herbst

## Business Needs Statement
CRO need to be able to review the state of a study on demand, following the progression of study participants over time using key metrics captured within the study.

## Task Description
Generate a report that reflects the current data within the study, collating all ECG, Vital Signs, PK data on a per tab basis ensuring the links to study participants, forms, visits and timepoints remain. Data must accurately reflect the data stored in the study database.

In [1]:
from openpyxl import Workbook, load_workbook
import json
import pprint
import datetime
import os
import re
from matplotlib import pyplot
from src import FileReader, ParseData, FileWriter, ErrorLogger

%matplotlib inline

## Setup Output Folder

Create the __output folder__ in which all output files will be placed.

In [2]:
# Set the output folder location
outputfolder = './test/output'

In [3]:
if not os.path.exists(outputfolder):
    os.makedirs(outputfolder, 0o755) # owner execution 

View contents of folder:

In [4]:
%ls -l ./test

total 26840
-rw-r--r--@ 1 herbsca  staff  13733314 16 Jun 19:44 browse_data.xlsx
-rw-r--r--@ 1 herbsca  staff      3729 16 Jun 20:56 config.json
drwxr-xr-x  6 herbsca  staff       192 16 Jun 21:56 [34moutput[m[m/
-rw-r--r--@ 1 herbsca  staff       165 16 Jun 19:43 ~$browse_data.xlsx


## Define Utility Functions
These functions generate the format of the output file. They are supplied with a _dictionary variable_ (`contents`) and then convert this information structure into the output file format (csv -> columns, json -> key-value pairs etc).

In [5]:
def writeFile(filename: str, contents: object):
    # Create function to allow for file creation in output.
    path = os.path.join(outputfolder, filename)
    with open(path, 'w') as outputfile:
        outputfile.write(pprint.pformat(contents))
    print('Text file write complete.')

def writeJSON(filename: str, contents: object):
    # Create function to allow for file creation in output.
    path = os.path.join(outputfolder, filename)
    with open(path, 'w') as outputfile:
        outputfile.write(json.dumps(contents))
    print('JSON file write complete.')

def writeErrorCSV(filename: str, contents: object):
    path = os.path.join(outputfolder, filename)
    with open(path, 'w') as outputfile:
        fieldnames = ['subjectid', 'type', 'collection', 'visit', 'panel', 'sourceDate', 'sourceTime', 'comparisonDate', 'comparisonTime', 'message']
        writer = csv.DictWriter(outputfile, fieldnames=fieldnames)
        writer.writeheader() # place headers in csv file.
        for key in contents:
            if key == 'errors' or  key  == 'total':
                    continue
            for data in contents[key]:
                # cycle through list of errors
                sourcedate = '-'
                sourcetime = '-'
                comparisondate = '-'
                comparisontime = '-'
                msg =  ''
                if 'date' in data and data['date']['error']:
                    sourcedate = data['date']['source']
                    comparisondate = data['date']['comparison']
                if 'time' in data and data['time']['error']:
                    sourcetime = data['time']['source']
                    comparisontime = data['time']['comparison']
                if 'msg' in data:
                    msg = data['msg']

                row = {
                    'subjectid': data['subject'],
                    'visit': data['visit'],
                    'type': data['type'],
                    'collection': data['collection'],
                    'panel': data['panel'],
                    'sourceDate': sourcedate,
                    'sourceTime': sourcetime,
                    'comparisonDate': comparisondate,
                    'comparisonTime': comparisontime,
                    'message': msg
                }
                writer.writerow(row) # write to file
    print('CSV file write complete.')

## Parse Config File

__EDIT HERE:__

In [6]:
config_file_path = './test/config.json'

Parse config file and establish relationships.

In [7]:
config_dict = dict()

with open(config_file_path) as jsonfile:
    config_dict = json.load(jsonfile)

pprint.pp(config_dict)

{'tabs': ['ECG',
          'Vital Signs',
          'Brief Physical',
          'DSST',
          'Neurological',
          'Physical Exam',
          'Dose Administration',
          'Adverse Events',
          'Orthostatic Vital'],
 'general_headers': ['Medrio ID', 'Group', 'Subject ID', 'Visit', 'Form'],
 'map': {'Orthostatic Vital': {'_formregex': 'Orthostatic',
                               '_colregex': {'date': 'dat',
                                             'supinetime': 'SuMeTim',
                                             'supinesys': 'SuSys',
                                             'supinedias': 'SuDia',
                                             'supineheart': 'SuHr',
                                             'supinerr': 'SuRR',
                                             'supinetemp': 'SuTe',
                                             'standingreview': 'ReviewOrthoS',
                                             'rp': 'RPRev',
                           

## Process Workbook

__EDIT HERE:__ Specify where the input file can be found.

In [8]:
input_file_path = './test/browse_data.xlsx'

form_name_column_header = 'Form'

Read the xlsx file and create a searchable dictionary variable.

In [None]:
# -- Create File Reader Object --
filereader = FileReader(input_file_path)

# -- Get File Contents --
headers = filereader.getSheetHeaders() # all medrio variables etc
ws = filereader.getWorksheet() # worksheet for the file read

# -- Get General Headers --
general_headers = config_dict.get("general_headers", [])
ignore_col_before = len(general_headers) # number of rows to ignore --> i.e row 1 to 5

# -- Create File Processing Object --
parser = ParseData(headers, config_dict.get('map', None), general_headers=general_headers) # pd = processdata -- allows data to be processed

# -- Create Error Logger Object --
err_logger = ErrorLogger(outputfolder, headers=general_headers)

# -- Parse Contents --
row_id = 0 # keep track of number of rows looked at
for row in ws.rows:
    data = dict()
    row_id += 1 # increment the row
    
    # -- Skip the xlsx headers --
    if row_id == 1:
        continue
    
    '''# -- TESTING --
    if row_id > 200:
        break
    '''
    
    # -- Get the common headers --
    general = parser.process_generalised_cells(row)
    
    # -- Find information about form name --
    form_info = parser.generate_form_type(general.get(form_name_column_header, None)) # get the form identifier
    
    # -- Extract information from form_info --
    formtype = form_info[0]
    is_triplicate = form_info[1]
    form_typemap = form_info[2]
    
    # -- No formtype defined for form name --
    if formtype is None:
        error = {
            'type': 'WARNING',
            'message': f"Skipped form name: {general.get(form_name_column_header, 'UNKNOWN')}.",
            "function": "Process Workbook"
        }
        if err_logger is not None:
            err_logger.add({**general, **error})
        else:
            print(f"Skipped form name: {general.get(form_name_column_header, 'UNKNOWN')}.")
        continue
    
    # -- Process based on if the form is a triplicate or not --
    if is_triplicate: # is triplicate
        data = None
        if form_typemap.get("_triplicate_id_regex", None) is None:
            # use medrio order
            data = parser.process_triplicate_order(row, formtype, ignore_col_before, err_logger=err_logger, start_dict=general)
        else:
            # try find id in the name
            data = parser.process_triplicate_loop(row, formtype, ignore_col_before, err_logger=err_logger, start_dict=general)
        if data is None:
            continue
    else: # not triplicate
        data = parser.process_loop(row, formtype, ignore_col_before, err_logger=err_logger, start_dict=general)
        if data is None:
            continue
    
    # -- ADD TO OVERALL DICTIONARY --
    parser.add_row(data, formtype) # use the formtype as the output file tab name


In [None]:
# pprint.pp(parser.get_data())
writeJSON('temp_output.json', parser.get_data())

In [None]:
# -- FORMAT THE DATA --
parser.format_data(err_logger=err_logger)

## Output Results to XLSX File

In [None]:
output_filename = 'output'

tabs = config_dict.get('tabs', None) # Fetch tabs to be included from config file

# Create file writer object
writer = FileWriter(outputfolder, output_filename, tabs, general_headers, parser.get_data())

for tab in parser.get_data().keys():
    writer.write_worksheet(tab)

writer.save() # saves the file to disk

## Output Error Log

In [None]:
err_logger.write()
err_logger.save()