# Generate Documents - Svenska Kyrkan i Wien

In [1]:
# import sys
# !python -m pip install openpyxl

### Preliminaries

In [2]:
import pandas as pd
import numpy as np
from pathlib import Path

In [3]:
BASE_DIR = Path.cwd()
print(BASE_DIR)

C:\Users\atw10wp4\Jupyter\Enterprise\SvenskaKyrkan


In [4]:
inputDirectory = 'input_pask'
outputDirectory = 'output_pask'

# Inputs
fileNameProducts = 'wc-product-export.csv'
fileNameOrders = 'orders.xlsx'
fileNameTimeslots = 'tidsbokning.csv'
fileNameProductPlacement = 'product_list.xlsx'

# Outputs
fileNameMasterList = 'masterlista.xlsx'

fileNameAllData = 'allData.xlsx'

### Read Data

In [5]:
dfProducts = pd.read_csv(BASE_DIR / inputDirectory / fileNameProducts)

In [6]:
if 'Artikelnummer' in dfProducts.columns:
    dfProducts.rename(columns={'Artikelnummer':'SKU'}, inplace=True)
    
if 'Typ' in dfProducts.columns:
    dfProducts.rename(columns={'Typ':'Type'}, inplace=True)
    
if 'Regulärer Preis' in dfProducts.columns:
    dfProducts.rename(columns={'Regulärer Preis':'Regular price'}, inplace=True)

In [7]:
dfOrders = pd.read_excel(str(BASE_DIR / inputDirectory / fileNameOrders))

In [8]:
if 'E-Mail-Adresse' in dfOrders.columns:
    dfOrders.rename(columns={'E-Mail-Adresse':'Email (Billing)'}, inplace=True)
    
if 'Telefon' in dfOrders.columns:
    dfOrders.rename(columns={'Telefon':'Telephone'}, inplace=True)
    
if 'Phone (Billing)' in dfOrders.columns:
    dfOrders.rename(columns={'Phone (Billing)':'Telephone'}, inplace=True)
    
if 'Info_OK' in dfOrders.columns:
    dfOrders.rename(columns={'Info_OK':'Info'}, inplace=True)

In [9]:
dfTimeslots = pd.read_csv(BASE_DIR / inputDirectory / fileNameTimeslots, encoding='latin_1', 
                         parse_dates=['Time', 'app_date_1', 'app_datetime_1'], index_col=False)

In [10]:
dfProductPlacement = pd.read_excel(str(BASE_DIR / inputDirectory / fileNameProductPlacement), sheet_name='Produktlista')

In [11]:
# dfOrders[['Full Name (Billing)', 'Email (Billing)', 'Telephone', 'Order Total Amount','Order Date', 'Customer Note' ]].drop_duplicates()

### Merge and Select Data

In [12]:
# Merge orders with timeslots
dfAllData = pd.merge(dfOrders, dfTimeslots, left_on='Email (Billing)', right_on='Email')

In [13]:
# Merge with products
dfAllData = pd.merge(dfAllData, dfProducts, on='SKU')

In [14]:
# Merge with product placements
dfAllData = pd.merge(dfAllData, dfProductPlacement, left_on='SKU', right_on='Artikelnummer', suffixes=[None, '_y'])

In [15]:
# Fields with data
dfAllData = dfAllData[['Order Number', 'Full Name (Billing)', 'Email (Billing)',
                       'Order Total Amount', 'Order Date', 'Order Status', 'Customer Note',
                       'Telephone', 'Quantity', 'Product Name', 'Item Cost (inc. tax)',
                       'Item #', 'Info', 'Time', 'app_date_1', 'app_slot_1',
                       'Type', 'Artikelnummer', 'Kurzbeschreibung',
                       'Beschreibung', 'Lager', 'Regular price', 'Kategorien',
                       'Placering/1', 'Placering/2', 'PlockOmr', 'PlockHylla']]

In [16]:
# Standardized column namings
dfAllData.columns = ['OrderNumber', 'FullName', 'Email',
                     'OrderTotalAmount', 'OrderDate', 'OrderStatus', 'CustomerNote',
                     'Telephone', 'Quantity', 'ProductName', 'ItemCost',
                     'ItemID', 'Info', 'OrderTime', 'PickUpDate', 'PickUpTime',
                     'Type', 'ArticleNumber', 'ProductNameDE',
                     'Description', 'Warehouse', 'RegularPrice', 'Categories',
                     'Placement_1', 'Placement_2', 'PickUpArea', 'PickUpShelf']

In [17]:
# Selection of relevant data and more logical sorting of columns
dfAllData = dfAllData[['OrderNumber', 'OrderDate', 'OrderTime', 
                       'FullName', 'Email', 'Telephone', 'CustomerNote',
                       'ItemID', 'Info', 'Type', 'ArticleNumber', 'ProductName', 'ProductNameDE',
                       'Quantity',  'ItemCost', 'OrderTotalAmount',  'OrderStatus',
                       'RegularPrice', 'Categories',
                       'PickUpDate', 'PickUpTime',
                       'Placement_1', 'Placement_2', 'PickUpArea', 'PickUpShelf']]

In [18]:
dfAllData.to_excel(BASE_DIR / outputDirectory / fileNameAllData)

### Diffs

In [19]:
orderEmails = dfOrders['Email (Billing)'].unique()

In [20]:
timeSlotEmails = dfTimeslots['Email'].unique()

##### Order - har ej bokat tid

In [21]:
np.setdiff1d(orderEmails, timeSlotEmails)

array(['anki.windauer@gmx.at', 'elisabeth.ebermann@gmail.com',
       'k.hieke@telia.com'], dtype=object)

### Plocklista

In [22]:
from openpyxl import Workbook, load_workbook
import shutil
from copy import copy

In [23]:
def getFirstCellForNamedRange(workbook, sheet, range_name):
    cellNameList = []
    firstCellTuple = ()
    if range_name in workbook.defined_names:
        cellNameList = [(s, c) for s, c in workbook.defined_names[range_name].destinations if (s == sheet.title)]
    if cellNameList:
        firstCellTuple = cellNameList[0]
    return firstCellTuple[1]

In [24]:
def fillCellForAttribute(rangeName, workbook, template, sheet, row_offset, value):
    templateCellNumber = getFirstCellForNamedRange(workbook, template, rangeName)
    targetCellNumber = template[templateCellNumber].offset(row=row_offset).coordinate
    sheet[targetCellNumber] = value

In [25]:
def getMergedCellsOrNone(sheet, row, column):
    cell = sheet.cell(row, column)
    for mergedCell in sheet.merged_cells.ranges:
        if (cell.coordinate in mergedCell):
            return mergedCell
    return None

In [26]:
def insertRowAndCopyFormat(template, sheet, source_row, target_row):
    sheet.insert_rows(idx=target_row)
    sheet.row_dimensions[target_row].height = template.row_dimensions[source_row].height
    for col in range(1, 11):
        sheet.cell(row=target_row, column=col)._style = template.cell(row=source_row, column=col)._style
        mergedCells = getMergedCellsOrNone(template, source_row, col)
        if mergedCells is not None and col == mergedCells.bounds[0]:
            mergedCellsCopy = copy(mergedCells)
            if target_row > source_row:
                mergedCellsCopy.shift(row_shift=target_row-source_row)
                sheet.merge_cells(range_string=(mergedCellsCopy.coord))

In [27]:
fileNamePackList = 'pack_list.xlsx'
fullPathPackListTemplate = BASE_DIR / 'templates' / 'pack_list_template.xlsx'

In [28]:
Path.unlink(BASE_DIR / outputDirectory / fileNamePackList, missing_ok=True)
shutil.copyfile(fullPathPackListTemplate, BASE_DIR / outputDirectory / fileNamePackList)

WindowsPath('C:/Users/atw10wp4/Jupyter/Enterprise/SvenskaKyrkan/output_pask/pack_list.xlsx')

In [29]:
workbook = load_workbook(filename=BASE_DIR / outputDirectory / fileNamePackList)

In [30]:
template_sheet = workbook['template']

In [31]:
dfGroupedByOrderNumber = dfAllData.groupby('OrderNumber')

In [32]:
articleCell = getFirstCellForNamedRange(workbook, template_sheet, 'ArticleNumber')
headerRow = template_sheet[articleCell].row

for order, group in dfGroupedByOrderNumber:
    new_sheet = workbook.copy_worksheet(template_sheet)
    new_sheet.title = 'PO_' + str(order)
    for rangeName in ['FullName', 'OrderTotalAmount', 'CustomerNote', 'OrderNumber', 'Telephone', 'PickUpDate', 'PickUpTime']:
        fieldValue = group.iloc[0][rangeName]
        fillCellForAttribute(rangeName, workbook, template_sheet, new_sheet, 0, fieldValue)
    new_sheet.delete_rows(idx=headerRow, amount=2)
    dfGroupedByPickUpArea = group.groupby('PickUpArea')
    i = 0
    d = 0
    for area, subGroup in dfGroupedByPickUpArea:
        for index, row in subGroup.iterrows():
            # index is the (arbitrary) row number and cannot be used instead of i!
            insertRowAndCopyFormat(template_sheet, new_sheet, headerRow, headerRow+i)
            for rangeName in ['ArticleNumber', 'Quantity', 'ProductName', 'ItemCost', 'PickUpArea']:
                fieldValue = row[rangeName]
                fillCellForAttribute(rangeName, workbook, template_sheet, new_sheet, i, fieldValue)
            i+=1
            fillCellForAttribute('RowNumber', workbook, template_sheet, new_sheet, i-1, i-d)
        insertRowAndCopyFormat(template_sheet, new_sheet, headerRow+1, headerRow+i)
        i+=1
        d+=1
    for j in range(0,10):
        offset = headerRow+i
        new_sheet.row_dimensions[offset+j].height = template_sheet.row_dimensions[headerRow+2+j].height
    fillCellForAttribute('NumberOfRows', workbook, template_sheet, new_sheet, 0, 'Ant.Orderrader ' + str(i-d))

In [33]:
workbook.remove(template_sheet)

In [34]:
workbook.save(filename=BASE_DIR / outputDirectory / fileNamePackList)

### Hämtlista

In [35]:
from datetime import datetime

In [36]:
weekDayDict = {
0: 'Mån',
1: 'Tis',
2: 'Ons',
3: 'Tor',
4: 'Fre',
5: 'Lör',
6: 'Sön'}

In [37]:
fileNamePickUpList = 'pick_up_list.xlsx'
fullPathPickUpTemplate = BASE_DIR / 'templates' / 'pick_up_template.xlsx'

In [38]:
Path.unlink(BASE_DIR / outputDirectory / fileNamePickUpList, missing_ok=True)
shutil.copyfile(fullPathPickUpTemplate, BASE_DIR / outputDirectory / fileNamePickUpList)

WindowsPath('C:/Users/atw10wp4/Jupyter/Enterprise/SvenskaKyrkan/output_pask/pick_up_list.xlsx')

In [39]:
workbook = load_workbook(filename=BASE_DIR / outputDirectory / fileNamePickUpList)

In [40]:
template_sheet = workbook['template']

In [41]:
dfGroupedByPickUpDate = dfAllData.drop_duplicates(subset=['OrderNumber']).groupby('PickUpDate')

In [42]:
pickUpTimeCell = getFirstCellForNamedRange(workbook, template_sheet, 'PickUpTime')
orderNumberCol = template_sheet[pickUpTimeCell].column

for ts, group in dfGroupedByPickUpDate:
    new_sheet = workbook.copy_worksheet(template_sheet)
    new_sheet.title = 'Hämtas ' + str(ts.date())
    fillCellForAttribute('Weekday', workbook, template_sheet, new_sheet, 0, weekDayDict[ts.weekday()])
    fillCellForAttribute('PickUpDate', workbook, template_sheet, new_sheet, 0, ts.date())
    fillCellForAttribute('CreationDateTime', workbook, template_sheet, new_sheet, 0, str(datetime.now()))
    headerRow = template_sheet[pickUpTimeCell].row
    new_sheet.delete_rows(idx=headerRow, amount=2)
    dfGroupedByPickUpTime = group.groupby('PickUpTime')
    i = 0
    d = 0
    for time, subGroup in dfGroupedByPickUpTime:
        insertRowAndCopyFormat(template_sheet, new_sheet, headerRow, headerRow+i)
        fillCellForAttribute('PickUpTime', workbook, template_sheet, new_sheet, i, time)
        i+=1
        d+=1
        for index, row in subGroup.iterrows():
            insertRowAndCopyFormat(template_sheet, new_sheet, headerRow+1, headerRow+i)
            for rangeName in ['OrderNumber', 'FullName', 'Telephone', 'OrderTotalAmount', 'CustomerNote']:
                fieldValue = row[rangeName]
                fillCellForAttribute(rangeName, workbook, template_sheet, new_sheet, i-1, fieldValue)
            i+=1
            fillCellForAttribute('RowNumber', workbook, template_sheet, new_sheet, i-2, i-d)

In [43]:
workbook.remove(template_sheet)

In [44]:
workbook.save(filename=BASE_DIR / outputDirectory / fileNamePickUpList)

### Masterlista

In [45]:
dfMasterList = dfAllData[['OrderNumber', 'FullName', 'Email', 'OrderTotalAmount', 'OrderDate', 
              'OrderStatus', 'Telephone', 'CustomerNote', 'InfoOK', 'PickUpDate', 'PickUpTime']]

KeyError: "['InfoOK'] not in index"

In [None]:
dfMasterList.columns = ['Order Number', 'Full Name (Billing)', 'Email (Billing)', 'Order Total Amount', 'Order Date', 
              'Order Status', 'Telephone', 'Customer Note', 'Info ok', 'Hämtningsdag', 'Hämtning Tid']

In [None]:
dfMasterList = dfMasterList.drop_duplicates()

In [None]:
dfMasterList.to_excel(BASE_DIR / outputDirectory / fileNameMasterList)