In [1]:
# import packages and set options
from formatting_functions_open_source import *
from utility_functions import *
import numpy as np
import pandas as pd
import xlsxwriter
import pandas.io.formats.excel
import warnings
import re

# this allows custom formatting of header
pandas.io.formats.excel.ExcelFormatter.header_style = None


In [2]:
# suppress the close file warning from xlsxwriter, which comes up in error
warnings.filterwarnings('ignore')
warnings.warn("Calling close() on already closed file.")
warnings.warn('Do not show this message')

In [3]:
# load in data
oneD_df = pd.read_csv('Data/client_data.csv')

twoD_df = pd.read_csv('Data/avg_medical_svc_cost.csv')

threeD_row_df_1 = pd.read_csv('Data/clients_by_gender.csv')

threeD_row_df_2 = pd.read_csv('Data/clients_by_gender_2.csv')

multiindex_test_df = pd.read_csv('Data/test_set_datatype_multiindex.csv')


In [4]:
# set indices
twoD_df.set_index('Department', inplace=True)

threeD_row_df_1.set_index(['Department', 'Gender'], inplace=True)

threeD_row_df_2.set_index(['Department','Gender','Trans'], inplace=True)

multiindex_test_df.set_index(['department', 'gender'], inplace=True)

In [5]:
# set date vars to datetime

oneD_df['birth_date'] = pd.to_datetime(oneD_df['birth_date'])
oneD_df['admit_date'] = pd.to_datetime(oneD_df['admit_date'])
oneD_df['chart_start'] = pd.to_datetime(oneD_df['chart_start'])
oneD_df['chart_end'] = pd.to_datetime(oneD_df['chart_end'])

multiindex_test_df['date_var'] = pd.to_datetime(multiindex_test_df['date_var'])
multiindex_test_df['date_alt_var'] = pd.to_datetime(multiindex_test_df['date_alt_var'])
multiindex_test_df['datetime_var'] = pd.to_datetime(multiindex_test_df['datetime_var'])
multiindex_test_df['datetime_alt_var'] = pd.to_datetime(multiindex_test_df['datetime_alt_var'])

In [7]:
# create excel report

# nan_inf_to_errors will prevent nans from breaking report creation
## but any nans in the report are a bug that will need to be fixed

#writer = pd.ExcelWriter('Reports/Example Clinical Report.xlsx', engine='xlsxwriter', options={'nan_inf_to_errors':True})
writer = pd.ExcelWriter('Reports/Example Clinical Report.xlsx', engine='xlsxwriter')

# creating workbook with sheets
ecr_wb = writer.book

oneD_sheet = ecr_wb.add_worksheet('Example Client Data')

twoD_sheet = ecr_wb.add_worksheet('Avg Svc Cost by Dept')

threeDrow_sheet1 = ecr_wb.add_worksheet('Clients by Dept & Gender')

threeDrow_sheet2 = ecr_wb.add_worksheet('Clients by Dept & Gender NEW')

test_sheet = ecr_wb.add_worksheet('Test')


# FORMATTING EXAMPLE CLIENT DATA (1 dimensional data)

# format header
format_header(oneD_df, ecr_wb, oneD_sheet, header_offset=2, clean_header=True)

# data
insert_data(oneD_df, ecr_wb, oneD_sheet, header_offset=2)

set_col_width(oneD_df, ecr_wb, oneD_sheet, 'client_id', method='all')
set_col_width(oneD_df, ecr_wb, oneD_sheet, 'doctor', method='all')

set_col_data_type(oneD_df, ecr_wb, oneD_sheet, 'cell_count', 'numeric', col_width_method='all')
set_col_data_type(oneD_df, ecr_wb, oneD_sheet, 'glucose_level', 'decimal_1', col_width_method='all')
set_col_data_type(oneD_df, ecr_wb, oneD_sheet, 'sucrose_level', 'decimal_2', col_width_method='all')
set_col_data_type(oneD_df, ecr_wb, oneD_sheet, 'procedure_cost', 'dollar', col_width_method='all')
set_col_data_type(oneD_df, ecr_wb, oneD_sheet, 'insurance_pymt', 'dollar_cents', col_width_method='all')
set_col_data_type(oneD_df, ecr_wb, oneD_sheet, 'pct_paid', 'percent', col_width_method='all')
set_col_data_type(oneD_df, ecr_wb, oneD_sheet, 'client_pay_pct', 'percent_1', col_width_method='all')
set_col_data_type(oneD_df, ecr_wb, oneD_sheet, 'recovery_rate', 'percent_2', col_width_method='all')
set_col_data_type(oneD_df, ecr_wb, oneD_sheet, 'birth_date', 'date', col_width_method='all')
set_col_data_type(oneD_df, ecr_wb, oneD_sheet, 'admit_date', 'date_alt', col_width_method='all')
set_col_data_type(oneD_df, ecr_wb, oneD_sheet, 'chart_start', 'datetime', col_width_method='all')
set_col_data_type(oneD_df, ecr_wb, oneD_sheet, 'chart_end', 'datetime_alt', col_width_method='all')

# borders
table_bottom_border(oneD_df, ecr_wb, oneD_sheet, header_offset=2)

table_right_border(oneD_df, ecr_wb, oneD_sheet, header_offset=2)

# title
insert_title(oneD_df, ecr_wb, oneD_sheet, 'Example Client Data')


# FORMATTING AVG SVC COST BY DEPT (2 dimensional data)

# header
last_col_highlight_header(twoD_df, ecr_wb, twoD_sheet, header_offset=2)

# index
format_index(twoD_df, ecr_wb, twoD_sheet, header_offset=2)

# data
insert_data(twoD_df, ecr_wb, twoD_sheet, header_offset=2, data_type='dollar_cents')

set_column_widths(twoD_df, ecr_wb, twoD_sheet)

# borders
table_bottom_border(twoD_df, ecr_wb, twoD_sheet, header_offset=2)

table_right_border(twoD_df, ecr_wb, twoD_sheet, header_offset=2)

# title
insert_title(twoD_df, ecr_wb, twoD_sheet, 'Avg Service Cost by Department')


# FORMATTING CLIENTS BY DEPT & GENDER (3 dimensional data with row mulitiindex)

# header
last_col_highlight_header(threeD_row_df_1, ecr_wb, threeDrow_sheet1, header_offset=2)

# index
merge_row_index_cells(threeD_row_df_1, ecr_wb, threeDrow_sheet1, header_offset=2)

format_row_multiindex(threeD_row_df_1, ecr_wb, threeDrow_sheet1, header_offset=2)

# data
insert_row_multiindex_data(threeD_row_df_1, ecr_wb, threeDrow_sheet1, header_offset=2, data_type='numeric')

set_column_widths(threeD_row_df_1, ecr_wb, threeDrow_sheet1)

# borders
table_bottom_border(threeD_row_df_1, ecr_wb, threeDrow_sheet1, header_offset=2)

table_right_border(threeD_row_df_1, ecr_wb, threeDrow_sheet1, header_offset=2)

# title
insert_title(threeD_row_df_1, ecr_wb, threeDrow_sheet1, 'Unique Clients by Department & Gender')


# FORMATTING CLIENTS BY DEPT & GENDER NEW (3 dimensional data with row mulitiindex)

# header
last_col_highlight_header(threeD_row_df_2, ecr_wb, threeDrow_sheet2, header_offset=2)

# index
merge_row_index_cells(threeD_row_df_2, ecr_wb, threeDrow_sheet2, header_offset=2)

format_row_multiindex(threeD_row_df_2, ecr_wb, threeDrow_sheet2, header_offset=2)

# data
insert_row_multiindex_data(threeD_row_df_2, ecr_wb, threeDrow_sheet2, header_offset=2, data_type='numeric')

set_column_widths(threeD_row_df_2, ecr_wb, threeDrow_sheet2)

# borders
table_bottom_border(threeD_row_df_2, ecr_wb, threeDrow_sheet2, header_offset=2)

table_right_border(threeD_row_df_2, ecr_wb, threeDrow_sheet2, header_offset=2)

# title
insert_title(threeD_row_df_2, ecr_wb, threeDrow_sheet2, 'Unique Clients by Department & Gender (New Gender Classification Method)')


# TEST

# format header
format_header(multiindex_test_df, ecr_wb, test_sheet, header_offset=2, clean_header=True)

# index
merge_row_index_cells(multiindex_test_df, ecr_wb, test_sheet, header_offset=2)

format_row_multiindex(multiindex_test_df, ecr_wb, test_sheet, header_offset=2)

# data
insert_row_multiindex_data(multiindex_test_df, ecr_wb, test_sheet, header_offset=2)

set_row_multiindex_col_dtype(multiindex_test_df, ecr_wb, test_sheet, 'numeric_var', 'numeric', header_offset=2)
set_row_multiindex_col_dtype(multiindex_test_df, ecr_wb, test_sheet, 'decimal_1_var', 'decimal_1', header_offset=2)
set_row_multiindex_col_dtype(multiindex_test_df, ecr_wb, test_sheet, 'decimal_2_var', 'decimal_2', header_offset=2)
set_row_multiindex_col_dtype(multiindex_test_df, ecr_wb, test_sheet, 'dollar_var', 'dollar', header_offset=2)
set_row_multiindex_col_dtype(multiindex_test_df, ecr_wb, test_sheet, 'dollar_cents_var', 'dollar_cents', header_offset=2)
set_row_multiindex_col_dtype(multiindex_test_df, ecr_wb, test_sheet, 'percent_var', 'percent', header_offset=2)
set_row_multiindex_col_dtype(multiindex_test_df, ecr_wb, test_sheet, 'percent_1_var', 'percent_1', header_offset=2)
set_row_multiindex_col_dtype(multiindex_test_df, ecr_wb, test_sheet, 'percent_2_var', 'percent_2', header_offset=2)
set_row_multiindex_col_dtype(multiindex_test_df, ecr_wb, test_sheet, 'date_var', 'date', header_offset=2)
set_row_multiindex_col_dtype(multiindex_test_df, ecr_wb, test_sheet, 'date_alt_var', 'date_alt', header_offset=2)
set_row_multiindex_col_dtype(multiindex_test_df, ecr_wb, test_sheet, 'datetime_var', 'date', header_offset=2)
set_row_multiindex_col_dtype(multiindex_test_df, ecr_wb, test_sheet, 'datetime_alt_var', 'datetime_alt', header_offset=2)

set_column_widths(multiindex_test_df, ecr_wb, test_sheet, method='all')

# borders
table_bottom_border(multiindex_test_df, ecr_wb, test_sheet, header_offset=2)

table_right_border(multiindex_test_df, ecr_wb, test_sheet, header_offset=2)

# title
insert_title(multiindex_test_df, ecr_wb, test_sheet, 'Set Multiindex Data Type Test')

# save workbook
writer.save()
writer.close()