In [1]:
# import packages and set options
from formatting_functions_open_source import *
from utility_functions import *
import numpy as np
import pandas as pd
import xlsxwriter
import pandas.io.formats.excel

# this allows custom formatting of header
pandas.io.formats.excel.ExcelFormatter.header_style = None


In [2]:
# load in data
twoD_df = pd.read_csv('Data/avg_medical_svc_cost.csv')

threeD_row_df_1 = pd.read_csv('Data/clients_by_gender.csv')

threeD_row_df_2 = pd.read_csv('Data/clients_by_gender_2.csv')

In [3]:
# set indices
twoD_df.set_index('Department', inplace=True)

threeD_row_df_1.set_index(['Department', 'Gender'], inplace=True)

threeD_row_df_2.set_index(['Department','Gender','Trans'], inplace=True)

In [17]:
def format_row_multiindex_test(df, wb, sheet, header_offset=0, column_offset=0):

    # This function will apply formatting to your index to bold it and give a right border
    ## Meant only for dataframes with row mulitiindex and and number of columns levels
    ### NOTE: will break if not all index categories are present in each index!
    ### NOTE: will also break if row indices are not arranged in least categories to most categories order (which is pandas standard)
    ### if you are not importing with to_excel(), the merge_row_index_cells() function must be applied first

    # ARGUMENTS
    
    ## MANDATORY:
    ### df is your data from your dataframe
    ### wb is your workbook
    ### sheet is your worksheet

    ## OPTIONAL:
    ### header_offset is the number of rows to skip if you want blank rows on top for title etc. defaults to 0
    ### column_offset is the number of columns to shift to the right if you do not want your table to start on column A. defaults to 0

    #getting count of row_indices
    num_row_indices = len(df.index.names)

    # getting rows per category in first index
    rows_per_major_index = int(len(df)/len(df.index.unique(0)))

    # exit function with error if it is not a multiindex
    if num_row_indices == 1:
        raise Exception("Function is not meant for single row index datasets.")
    else:
        pass

    # getting the count of categories per index:
    
    # create an empty list to hold the values:
    cat_counts = []

    # iterating over our row indices:
    for col_num in range(num_row_indices):
        # get count of unique values
        cat_count = len(df.index.unique(col_num))
        # append them to list
        cat_counts.append(cat_count)
        
    # create a empty list to hold the values
    cat_row_counts = []

    # iterating through the number of row indices we have:
    for col_num in range(num_row_indices):
        # get the category count of the index
        cat_count = len(df.index.unique(col_num))
        # if it is the major index[0]:
        if col_num == 0:
            # rows_per_cat is the rows_per_major_index
            rows_per_cat = rows_per_major_index
        else:
            # else rows_per_cat of last index divided by the category count of current index
            rows_per_cat = int(cat_row_counts[col_num-1]/cat_counts[col_num])
        # append rows_per_cat value to list
        cat_row_counts.append(rows_per_cat)

    cat_total_rows = []

    for col_num in range(num_row_indices):
        if col_num == 0:
            cat_rows = 1
        else:
            cat_rows = cat_counts[col_num] * cat_total_rows[col_num-1]
        cat_total_rows.append(cat_rows)


    # this will try to get the count of column levels you have if it's a multiindex but if it fails since it's only one level
    try:
        num_col_indices = len(df.columns.levshape)
    # then it will assign a value of 1 for column_indices
    except:
        num_col_indices = 1  

     
    # creating formats
    index_format = wb.add_format({'bold':True,'valign':'vcenter'})
    index_bottom_row_format = wb.add_format({'bold':True,'valign':'vcenter','bottom':True})
    last_index_format = wb.add_format({'bold':True,'valign':'vcenter','right':True})
    last_index_bottom_format = wb.add_format({'bold':True,'valign':'vcenter','bottom':True,'right':True})
    
    # iterating over our indices:
    for col_num in range(num_row_indices):
        # if it is the first (major) index:
        if col_num == 0:
            # iterating over the values in the index:
            for row_num, value in enumerate(df.index.get_level_values(col_num)):
                # insert index value and apply bottom border index format to all cells
                sheet.write(row_num + num_col_indices + header_offset, col_num + column_offset, value, index_bottom_row_format)
                #skip
        # if it is the last (one row per category) index:
        elif col_num == max(range(num_row_indices)):
            # iterating over the values in the index:
            for row_num, value in enumerate(df.index.get_level_values(col_num)):
                # if it is the last row before a new major index category:
                if (row_num+1)%rows_per_major_index==0:
                    # apply last index bottom format
                    sheet.write(row_num + num_col_indices + header_offset, col_num + column_offset, value, last_index_bottom_format)
                else:
                    # apply last index format
                   sheet.write(row_num + num_col_indices + header_offset, col_num + column_offset, value, last_index_format) 
        else:        
            # for all other indices iterate over index values:
            for row_num, value in enumerate(df.index.get_level_values(col_num)):
                # if it is the last row in the index category:
                if (row_num+1)%(cat_total_rows[col_num]*cat_counts[col_num+1])==0:
                    # insert index value and apply bottom border index formatting
                    sheet.write(row_num + num_col_indices + header_offset, col_num + column_offset, value, index_bottom_row_format)
                    print(row_num+1, cat_total_rows[col_num], row_num + num_col_indices + header_offset, col_num + column_offset, value)
                else:
                    # else insert index value and apply no border index formating
                     sheet.write(row_num + num_col_indices + header_offset, col_num + column_offset, value, index_format)



    # set index column widths 

    # create empty list to hold max_index_lengths
    max_index_lengths = []

    # iterating over row indices:
    for col_num in range(num_row_indices):
        # store the length of all index values in a list
        index_values = [len(value) for i, value in enumerate(df.index.get_level_values(col_num))]
        # get the max width of the longest value or title, whichever is longer
        ## + 1 for 'wiggle room'
        max_index_length = max(max(index_values), len(df.index.names[col_num])) + 1
        # add that to the max_index_lengths list
        max_index_lengths.append(max_index_length)

    # iterating over row indices again:
    for col_num in range(num_row_indices):
        # set width to matching max index length
        sheet.set_column(col_num + column_offset, col_num + column_offset, max_index_lengths[col_num])

#format_row_multiindex_test(threeD_row_df_2, 'wb', 'sheet', header_offset=2, column_offset=0)

In [18]:
# create excel report

# nan_inf_to_errors will prevent nans from breaking report creation
## but any nans in the report are a bug that will need to be fixed

writer = pd.ExcelWriter('Reports/Example Clinical Report.xlsx', engine='xlsxwriter', options={'nan_inf_to_errors':True})

# exporting analyses to excel sheets
#twoD_df.to_excel(writer, sheet_name='Avg Svc Cost by Dept')

# creating workbook with sheets
ecr_wb = writer.book

twoD_sheet = ecr_wb.add_worksheet('Avg Svc Cost by Dept')

threeDrow_sheet1 = ecr_wb.add_worksheet('Clients by Dept & Gender')

threeDrow_sheet2 = ecr_wb.add_worksheet('Clients by Dept & Gender NEW')


# FORMATTING AVG SVC COST BY DEPT (2 dimensional data)

# header
last_col_highlight_header(twoD_df, ecr_wb, twoD_sheet, header_offset=2)

# index
format_index(twoD_df, ecr_wb, twoD_sheet, header_offset=2)

# data
insert_data(twoD_df, ecr_wb, twoD_sheet, header_offset=2, data_type='dollar_cents')

set_column_widths(twoD_df, ecr_wb, twoD_sheet)

# borders
table_bottom_border(twoD_df, ecr_wb, twoD_sheet, header_offset=2)

table_right_border(twoD_df, ecr_wb, twoD_sheet, header_offset=2)

# title
insert_title(twoD_df, ecr_wb, twoD_sheet, 'Avg Service Cost by Department')


# FORMATTING CLIENTS BY DEPT & GENDER (3 dimensional data with row mulitiindex)

# header
last_col_highlight_header(threeD_row_df_1, ecr_wb, threeDrow_sheet1, header_offset=2)

# index
merge_row_index_cells(threeD_row_df_1, ecr_wb, threeDrow_sheet1, header_offset=2)

format_row_multiindex_test(threeD_row_df_1, ecr_wb, threeDrow_sheet1, header_offset=2)

# data
insert_row_multiindex_data(threeD_row_df_1, ecr_wb, threeDrow_sheet1, header_offset=2, data_type='numeric')

set_column_widths(threeD_row_df_1, ecr_wb, threeDrow_sheet1)

# borders
table_bottom_border(threeD_row_df_1, ecr_wb, threeDrow_sheet1, header_offset=2)

table_right_border(threeD_row_df_1, ecr_wb, threeDrow_sheet1, header_offset=2)

# title
insert_title(threeD_row_df_1, ecr_wb, threeDrow_sheet1, 'Unique Clients by Department & Gender')


# FORMATTING CLIENTS BY DEPT & GENDER NEW (3 dimensional data with row mulitiindex)

# header
last_col_highlight_header(threeD_row_df_2, ecr_wb, threeDrow_sheet2, header_offset=2)

# index
merge_row_index_cells(threeD_row_df_2, ecr_wb, threeDrow_sheet2, header_offset=2)

format_row_multiindex_test(threeD_row_df_2, ecr_wb, threeDrow_sheet2, header_offset=2)

# data
insert_row_multiindex_data(threeD_row_df_2, ecr_wb, threeDrow_sheet2, header_offset=2, data_type='numeric')

set_column_widths(threeD_row_df_2, ecr_wb, threeDrow_sheet2)

# borders
table_bottom_border(threeD_row_df_2, ecr_wb, threeDrow_sheet2, header_offset=2)

table_right_border(threeD_row_df_2, ecr_wb, threeDrow_sheet2, header_offset=2)

# title
insert_title(threeD_row_df_2, ecr_wb, threeDrow_sheet2, 'Unique Clients by Department & Gender (New Gender Classification Method)')

# save workbook
writer.save()
writer.close()

8 4 10 1 Unknown
16 4 18 1 Unknown
24 4 26 1 Unknown
32 4 34 1 Unknown
40 4 42 1 Unknown


  writer = pd.ExcelWriter('Reports/Example Clinical Report.xlsx', engine='xlsxwriter', options={'nan_inf_to_errors':True})
