# Create requisition and aliquot datasets and export to CSV

### This notebook creates two files:
1. `lab_requisition_{project}_{timestamp}.csv`
2. `lab_aliquot_{project}_{timestamp}.csv`


### This notebook accesses mysql tables not models
This notebook accesses mysql tables directly and not through model classes. A complete project VENV is not required.


### Testing your DB connection

See notebook `test_db_connection`

In [None]:
# imports
import os
import numpy as np
import pandas as pd
import sys

from copy import copy
from datetime import datetime
from edc_base.constants import DEFAULT_BASE_FIELDS
from edc_lab.model_mixins.requisition import RequisitionStatusMixin
from edc_constants.constants import YES, NO, NEG, UNK
from edc_pdutils import Database, Aliquot, Consent, Visit, Requisition
from edc_pdutils.table_to_dataframe import Helper, TableToDataframe
from pprint import pprint

### Change the project settings to your needs:

In [None]:
project = 'bcpp_clinic'  # for export file name
table_prefix = 'bcpp_clinic'  

In [None]:
# settings
date_format = '%Y-%m-%d %H:%M:%S.%f'
export_date_format = '%Y-%m-%d'
local_tz = 'Africa/Gaborone'
delimiter = ',' # '|'
start_date = None   # datetime(2013, 10, 1)

# tables
consent_table = f'{table_prefix}_subject_subjectconsent'
visit_table = f'{table_prefix}_subject_subjectvisit'
requisition_table = f'{table_prefix}_subject_subjectrequisition'
aliquot_table = 'edc_lab_aliquot'

In [None]:
timestamp = datetime.today().strftime('%Y%m%d%H%M%S')

Import consents

In [None]:
# df_subjects
consent = Consent(table_name=consent_table, local_tz=local_tz)
df_subjects = consent.dataframe
df_subjects.head()

In [None]:
print(df_subjects['consent_datetime'].min())
print(df_subjects['consent_datetime'].max())

In [None]:
df_subjects.to_csv(os.path.expanduser(f'~/consents_{project}_{timestamp}.csv'), index=False)
print(os.path.expanduser(f'~/consents_{project}_{timestamp}.csv'))

In [None]:
# consent = Consent(filename='/Users/erikvw/consents_bcpp_20171014172658.csv')
# df_subjects = consent.dataframe
# df_subjects.head()

Import visit model

In [None]:
# df_visit
visit = Visit(table_name=visit_table, df_subjects=df_subjects, local_tz=local_tz)
df_visit = visit.dataframe
df_visit.head()

In [None]:
df_visit.info()

In [None]:
print(df_visit['visit_datetime'].min())
print(df_visit['visit_datetime'].max())

In [None]:
df_visit.to_csv(os.path.expanduser(f'~/visit_{project}_{timestamp}.csv'), index=False)
print(os.path.expanduser(f'~/visit_{project}_{timestamp}.csv'))

In [None]:
# df_visit = pd.read_csv('/Users/erikvw/visit_bcpp_20171014173100.csv', low_memory=False)
# df_visit = visit.dataframe
# df_visit.head()

Import requisitions

In [None]:
# df_requisition
requisition_cols = copy(Requisition.default_columns)
requisition_cols.extend([
    f.name for f in RequisitionStatusMixin._meta.get_fields() if f.name not in requisition_cols])
requisition = Requisition(
    table_name=requisition_table,
    columns=requisition_cols,
    df_visit=df_visit,
    local_tz=local_tz)
df_requisition = requisition.dataframe
df_requisition.head()

In [None]:
df_requisition.to_csv(os.path.expanduser(f'~/requisition_{project}_{timestamp}.csv'), index=False)
print(os.path.expanduser(f'~/requisition_{project}_{timestamp}.csv'))

In [None]:
df_requisition.groupby('panel_name').size()

In [None]:
df_requisition.groupby('study_site').size()

In [None]:
print(df_requisition['requisition_datetime'].min())
print(df_requisition['requisition_datetime'].max())

Import aliquots

In [None]:
aliquot = Aliquot(table_name=aliquot_table, df_requisition=df_requisition, local_tz=local_tz)
df_aliquot = aliquot.dataframe
df_aliquot.head()

In [None]:
print(df_aliquot['aliquot_datetime'].min())
print(df_aliquot['aliquot_datetime'].max())

In [None]:
df_aliquot.info()

### Export to CSV

In [None]:
# export requisitions to CSV as a single file
path = f'~/lab_requisitions_{project}_{timestamp}.csv'
df_requisition.to_csv(path, index=True, date_format=export_date_format, sep=delimiter)
sys.stdout.write(f'* {path}\n')

In [None]:
count = len(df_requisition)
subject_count = len(df_requisition[['subject_identifier']].drop_duplicates())
print(f'{count} requisitions found for {subject_count} subjects.')

In [None]:
df_requisition.groupby(['study_site_name']).size()

In [None]:
# grouping = df_requisition.groupby(['study_site_name', 'requisition_datetime'])

df_requisition.groupby(['study_site_name'], axis=1)['study_site_name']

In [None]:
# export aliquots to CSV as a single file
path = f'~/lab_aliquots_{project}_{timestamp}.csv'
df_aliquot.to_csv(path, index=True, date_format=export_date_format, sep=delimiter)
sys.stdout.write(f'* {path}\n')

### Missing

In [None]:
df_aliquot[df_aliquot['missing_requisition']].head()

In [None]:
# export aliquots missing requisition to CSV
path = f'~/lab_aliquots_missing_{project}_{timestamp}.csv'
df_aliquot[df_aliquot['missing_requisition']].to_csv(path, index=True, date_format=export_date_format, sep=delimiter)
sys.stdout.write(f'* {path}\n')

In [None]:
print(df_aliquot.groupby('medium').size())
print('---')

print(df_aliquot.groupby('aliquot_type').size())
print('---')

print(df_aliquot.groupby('alpha_code').size())
print('---')

print(df_aliquot.groupby('numeric_code').size())
print('---')

print(df_aliquot.groupby('condition').size())
print('---')

print(df_aliquot.groupby('missing_requisition').size())
print('---')