# Create invoice attachments
- it is necessary to run only the first two code cells of this notebook.

# Select input file

In [None]:
import pandas as pd
from pathlib import Path

PROVIDER = 'Light Microscopy Unit'
#INVOICE_DIR = '/work/data/OpenIRIS/LMU-20210113'
INVOICE_DIR = '/work/data/OpenIRIS/BIU_'
INVOICE_NAME = 'Invoice30_fixed.xlsx'

INVOICE_DIR = Path(INVOICE_DIR)
if not INVOICE_DIR.is_dir():
    raise ValueError('Please check INVOICE_DIR.')
INVOICE_FILE = INVOICE_DIR / INVOICE_NAME
if not INVOICE_FILE.exists():
    raise ValueError('Please check INVOICE_FILE.')

ERITTELYT = INVOICE_DIR / 'erittelyt'
ERITTELYT.mkdir(exist_ok=True)

df = pd.read_excel(INVOICE_FILE)

# force wbs codes to be string
df['Cost center code'] =  df['Cost center code'].astype(str)


In [None]:
df.shape

# Create invoice attachments

In [None]:
from copy import copy
from datetime import datetime
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import Alignment, Font
from openpyxl.styles.borders import Border, Side


# column names
COL_GROUP = 'Group'
COL_WBS = 'Cost center code'
COL_START = 'Booking start'
COL_END = 'Booking end'
COL_RESOURCE = 'Resource/Product'
COL_USER = 'User name'
COL_PRICE = 'Price'
COL_QTY = 'Qty'
COL_CHARGE = 'Charge'
COL_PRICE_ITEM = 'Price item'
COL_REQUEST_TITLE = 'Request Title'
COL_DISCOUNT = 'Discount'
COL_BOOKING_TITLE = 'Booking title'
COL_BOOKING_COMMENTS = 'Booking comments'
COL_COMMENTS_CHARGE = 'Comments (charge)'
COL_PRODUCT_COMMENTS = 'Product comments'

# columns are listed in this order
cols = [COL_START, COL_END, COL_RESOURCE, COL_USER, COL_PRICE, COL_QTY, COL_CHARGE, \
        COL_PRICE_ITEM, COL_REQUEST_TITLE, COL_BOOKING_TITLE, COL_BOOKING_COMMENTS, \
        COL_DISCOUNT, COL_COMMENTS_CHARGE, COL_PRODUCT_COMMENTS, \
        ]

df.sort_values(by=[COL_GROUP, COL_WBS], inplace=True)
print(df.shape)

WBSs = df[COL_WBS].unique()

begin = df[COL_START].min().strftime('%d.%m.%Y')
end = df[COL_END].max().strftime('%d.%m.%Y')



def get_col_index(col):
    i = cols.index(col)
    return 'ABCDEFGHIJKLMNOPQRSTU'[i]
    

def get_col_num_index(col):
    return ord(get_col_index(col)) - ord('A') + 1
    
def apply_formats(ws):
    # column widths
    ws.column_dimensions[get_col_index(COL_START)].width = 16
    ws.column_dimensions[get_col_index(COL_END)].width = 16
    ws.column_dimensions[get_col_index(COL_RESOURCE)].width = 26
    ws.column_dimensions[get_col_index(COL_USER)].width = 24
    ws.column_dimensions[get_col_index(COL_PRICE)].width = 8
    ws.column_dimensions[get_col_index(COL_QTY)].width = 8
    ws.column_dimensions[get_col_index(COL_CHARGE)].width = 10
    ws.column_dimensions[get_col_index(COL_PRICE_ITEM)].width = 10
    ws.column_dimensions[get_col_index(COL_REQUEST_TITLE)].width = 55
    ws.column_dimensions[get_col_index(COL_DISCOUNT)].width = 8
    ws.column_dimensions[get_col_index(COL_BOOKING_TITLE)].width = 40
    ws.column_dimensions[get_col_index(COL_BOOKING_COMMENTS)].width = 40
    ws.column_dimensions[get_col_index(COL_COMMENTS_CHARGE)].width = 40
    ws.column_dimensions[get_col_index(COL_PRODUCT_COMMENTS)].width = 40
    
    # allow wrap text to these fields
    for col in [get_col_index(COL_BOOKING_TITLE), get_col_index(COL_BOOKING_COMMENTS), \
                get_col_index(COL_COMMENTS_CHARGE), get_col_index(COL_PRODUCT_COMMENTS)]:
        for cell in ws[col]:
            cell.alignment = Alignment(wrapText = True)
            
    # Date time columns
    for cell in ws[get_col_index(COL_START)]:
        cell.alignment = Alignment(horizontal='center')
    for cell in ws[get_col_index(COL_END)]:
        cell.alignment = Alignment(horizontal='center')
    
    # price column
    for cell in ws[get_col_index(COL_PRICE)]:
        cell.number_format = '##0.00'
        cell.alignment = Alignment(horizontal='center')

    # qty column
    for cell in ws[get_col_index(COL_QTY)]:
        cell.number_format = '##0.00'
        cell.alignment = Alignment(horizontal='center')

    # charge column
    for cell in ws[get_col_index(COL_CHARGE)]:
        cell.number_format = '###0.00€'
        cell.alignment = Alignment(horizontal='center')
        
    # change font
    mr = ws.max_row
    mc = ws.max_column
    for i in range(1,mr+1):
        for j in range(1,mc+1):
            ws.cell(row=i,column=j).font = Font(name='Calibri', size='10')
 
    # date formats
    for col in [get_col_num_index(COL_START), get_col_num_index(COL_END)]:
        #for row in range(HEADER_ROW+1 ,mr+1):
        for row in range(1, mr+1):
            ws.cell(row=row,column=col).number_format = 'YYYY-MM-DD HH:mm'        

    
def prepare_sheet(ws, tmp):
    group = tmp['Group'][0]
    email = tmp['Group head(s) text'][0]
    billing_address = ''
    organization = tmp['Organization'][0]
    cost_center = tmp['Cost center code'][0]
    remit_code = tmp['Remit code'][0]
    price_type = tmp['Price type'][0]
    total_sum = tmp['Charge'].sum()
    
    ws.title = group + ' ' + str(wbs)
    
    # summary
    ws['C1'] = PROVIDER
    ws['C3'] = 'Billing cycle:'
    ws['D3'] = str(begin) + '-' + str(end)
    ws['C4'] = 'Group/Payer:'
    ws['D4'] = group
    ws['C5'] = 'Email address:'
    ws['D5'] = email
    ws['C6'] = 'Billing address:'
    ws['D6'] = billing_address
    ws['C7'] = 'Organization:'
    ws['D7'] = organization
    ws['C8'] = 'Cost center:'
    ws['D8'] = cost_center
    ws['C9'] = 'Remit code:'
    ws['D9'] = remit_code
    ws['C10'] = 'Price type:'
    ws['D10'] = price_type
    ws['C11'] = 'Total sum:'
    ws['D11'] = total_sum
    ws['D11'].number_format = '##,##0.00€'
    ws['D12'] = ''
    ws['D13'] = ''
    
    # summary (above) includes a couple of empty lines, the row after that is the table header
    HEADER_ROW = 14
    
    # pick the specified columns from IRIS invoice
    tmp = tmp[cols]
    
    # append dataframe rows to worksheet
    for r in dataframe_to_rows(tmp, index=False, header= True):
        ws.append(r)
        
    # rename charge comment column
    ws.cell(row=HEADER_ROW, column=get_col_num_index(COL_COMMENTS_CHARGE)).value = 'Discount comments'
    
    # add total to end of charge column
    ws.cell(column=get_col_num_index(COL_CHARGE),row=ws.max_row+1).value = total_sum
    
    apply_formats(ws)
    
    #bold unit name
    ws['C1'].font = Font(name='Calibri', size='14', bold=True)

    # total sum
    ws['C11'].font = Font(name='Calibri', size='14', bold=True)
    ws['D11'].font = Font(name='Calibri', size='14', bold=True)

    # header row
    border = Border(top=Side(style='thin'), bottom=Side(style='thin'))
    for cell in ws[HEADER_ROW]:
        cell.font = Font(name='Calibri', bold=True)
        cell.border = border
        cell.alignment = Alignment(horizontal='center')
        
        
# workbook with all data
wb = Workbook()
# append dataframe rows to worksheet
for r in dataframe_to_rows(df[cols], index=False, header= True):
    wb.active.append(r)
    
apply_formats(wb.active)
wb.save(INVOICE_DIR / (INVOICE_FILE.stem + '_new_summary.xlsx'))


for wbs in WBSs:
    tmp = df[df[COL_WBS] == wbs]
    tmp = tmp.reset_index()
    
    #print(wbs)
    if tmp[COL_GROUP].size == 0:
        print('no group found for WBS: ' + str(wbs))
        continue
    
    # add new sheet to main workbook
    #ws = wb.create_sheet()
    #prepare_sheet(ws, tmp)

    # create separate workbook per wbs
    wb2 = Workbook()
    ws2 = wb2.active
    prepare_sheet(ws2, tmp)
    wb2.save(ERITTELYT / (ws2.title + '.xlsx'))

    


# Debug and fix a case of missing rows
- this is a historical remnant (as of 2021-09-21), please ignore errors in the cells below.
- at some point, WBS codes could be either 'int' or 'str'
- this caused either group to be eliminated from an invoice
- diff_erittelyt.ipynb was used to add the missing rows to the next invoice
- the cells below were used to format the fixed attachments

In [None]:
WBS_int = []
WBS_str = []
for wbs in WBSs:
    if isinstance(wbs, int):
        WBS_int.append(wbs)
    if isinstance(wbs, str):
        WBS_str.append(wbs)
print(len(WBSs))
print(len(WBS_int))
print(len(WBS_str))
    
print(df[df[COL_WBS].isin(WBSs)].shape)
print(df[df[COL_WBS].isin(WBS_int)].shape)
print(df[df[COL_WBS].isin(WBS_str)].shape)

print(df[df[COL_WBS].isin(WBSs)][COL_QTY].sum())
print(df[df[COL_WBS].isin(WBS_int)][COL_QTY].sum())
print(df[df[COL_WBS].isin(WBS_str)][COL_QTY].sum())


In [None]:
WBSs


In [None]:
# apply formatting to a folder of invoice attachments
ERITTELYT_DIFF_ADDED = '/work/data/OpenIRIS/LMU-20210225/erittelyt_diff_added'
ERITTELYT_DIFF_ADDED_FORMATTED = ERITTELYT_DIFF_ADDED + '_formatted'
ERITTELYT_DIFF_ADDED = Path(ERITTELYT_DIFF_ADDED)
ERITTELYT_DIFF_ADDED_FORMATTED = Path(ERITTELYT_DIFF_ADDED_FORMATTED)
ERITTELYT_DIFF_ADDED_FORMATTED.mkdir(exist_ok=True)

for f in ERITTELYT_DIFF_ADDED.glob("*.xlsx"):
    print(str(f))
    
    df = pd.read_excel(f)
    wb = Workbook()
    ws = wb.active
    prepare_sheet(ws, df)
    wb.save(ERITTELYT_DIFF_ADDED_FORMATTED / f.name)


In [None]:
f
df = pd.read_excel(f)
df.head()

In [None]:
for r in dataframe_to_rows(df, index=False, header= True):
    print(r)