# Load libraries

In [221]:
import sys
sys.path.insert(0,'/Users/jarad')

import pandas as pd
import numpy as np
from db2 import *

import datetime as dt
import calendar

import glob

import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
pd.options.display.precision = 5

# Load formatter

In [2]:
def format_(df, fmt):
    # make a copy so u don't alter the origial
    dfcpy = df.copy()
    
    # format headers
    col_list = []
    for col in dfcpy.columns:
        new_col = col.split('_')
        new_col = ' '.join(new_col)
        new_col = new_col.title()
        col_list.append(new_col)
    
    dfcpy.columns = col_list
    
    # format columns
    for ix, f in enumerate(fmt):
        if f == 0:
            pass
        elif f == 1:
            dfcpy.iloc[:, ix] = ['${:,.0f}'.format(x) for x in dfcpy.iloc[:, ix]]            
        elif f == 2:               
            dfcpy.iloc[:, ix] = [ '{:,.2f}%'.format(x * 100) for x in dfcpy.iloc[:, ix]]           
            
    return dfcpy

# add to pandas module
pd.DataFrame.format_ = format_

# Set constraints

In [417]:
date_start = '2017-01-01'
date_end = '2017-08-31'

# Get db sales data

In [4]:
sales = pd.read_sql(
'''
SELECT
DATE(o.date_purchased) AS date,
DATE_FORMAT(o.date_purchased, '%Y-%m') AS 'year and month',
SUM((op.products_quantity - op.products_quantity_free) * op.products_price) AS revenue
FROM orders o
JOIN orders_products op ON o.orders_id = op.orders_id
WHERE o.orders_status != 9
AND o.orders_status != 10
AND o.payment_method != 'Replacement Order'
AND DATE(o.date_purchased) BETWEEN ' '''+ date_start +''' ' AND ' '''+ date_end +''' '
GROUP BY DATE_FORMAT(o.date_purchased, '%Y-%m') 
''', db)

sales['date'] = pd.to_datetime(sales['date'])

In [5]:
sales.format_([0,0,1])

Unnamed: 0,Date,Year And Month,Revenue
0,2017-01-01,2017-01,"$3,676,805"
1,2017-02-01,2017-02,"$3,197,529"
2,2017-03-01,2017-03,"$3,997,553"
3,2017-04-01,2017-04,"$3,158,621"
4,2017-05-01,2017-05,"$3,605,169"
5,2017-06-01,2017-06,"$3,571,425"
6,2017-07-01,2017-07,"$3,117,731"
7,2017-08-01,2017-08,"$3,438,147"


# Get and clean QuickBooks data

In [6]:
# get all CSVs
path = r'/Users/jarad/Desktop/Desktop/The Five Pillars Project/QB CSVs/'
all_files = glob.glob(path + '/*.csv')
frame = pd.DataFrame()
list_ = []

for file_ in all_files:
    df = pd.read_csv(file_)
    list_.append(df)
    
qb_main = pd.concat(list_)   

# make a copy
qb = qb_main.copy()

# drop columns of all nulls
qb.dropna(how = 'all', axis = 1, inplace = True)

# distinguish between income and expenses
qb.rename(columns = {'Unnamed: 4':'main cat',
                    'Unnamed: 5':'sub cat',
                    'Unnamed: 3':'income/expense'}, inplace = True)

# fill out income/expense
qb['income/expense'].fillna(method = 'ffill',inplace = True)

# drop "totals" rows
qb['main cat'].fillna('', inplace = True)
qb = qb[~qb['main cat'].str.contains('Total')]
qb['main cat'].replace('', np.nan, inplace = True)
qb['main cat'].fillna(method = 'ffill', inplace = True)

qb['sub cat'].fillna('', inplace = True)
qb = qb[~qb['sub cat'].str.contains('Total')]
qb['sub cat'].replace('', np.nan, inplace = True)
qb['sub cat'].fillna(method = 'ffill', inplace = True)
qb['sub cat'].fillna(qb['main cat'], axis = 0, inplace = True)

# drop columns
qb.drop(['Unnamed: 0','Unnamed: 1','Unnamed: 2','Num','Clr','Split','Balance'], 1, inplace = True)

# clean strings
main_fix = qb['main cat'].str.split('·', expand = True)
main_fix[1] = main_fix[1].fillna(main_fix[0])
main_fix.drop(0, 1, inplace = True)
main_fix.rename(columns = {1:'main cat'}, inplace = True)

# merge clean strings
qb.drop('main cat', 1, inplace = True)
qb = pd.merge(qb, main_fix, left_index = True, right_index = True, copy = False)

# clean strings
sub_fix = qb['sub cat'].str.split('·', expand = True)
sub_fix[1] = sub_fix[1].fillna(sub_fix[0])
sub_fix.drop(0, 1, inplace = True)
sub_fix.rename(columns = {1:'sub cat'}, inplace = True)

# merge clean strings
qb.drop('sub cat', 1, inplace = True)
qb = pd.merge(qb, sub_fix, left_index = True, right_index = True, copy = False)

# fill nan with main cat
qb['sub cat'] = np.where(qb['sub cat'].isnull(), qb['main cat'], qb['sub cat'])

# fix numbers
qb['Amount'] = qb['Amount'].str.replace(',','')
qb['Amount'] = pd.to_numeric(qb['Amount'])

# get rid of where amount is null
qb = qb[qb['Amount'].isnull() == False]

# fil string nulls
qb['Name'].fillna('', inplace = True)

# fix memo columns
qb['Memo'].fillna('',inplace = True)

# fix date column and add another
qb = qb[qb['Date'].isnull() == False]
qb['Date'] = pd.to_datetime(qb['Date'])
qb['year and month'] = pd.to_datetime(qb['Date'].dt.year.map(int).map(str) + '-' + qb['Date'].dt.month.map(str))
qb['year and month'] = [str(x)[:7] for x in qb['year and month']]

# fix headers
qb.columns = [x.lower() for x in qb.columns]

# fix more
qb['main cat'] = [x.strip() for x in qb['main cat']]
qb['main cat'] = [x.lower() for x in qb['main cat']]

qb['sub cat'] = [x.strip() for x in qb['sub cat']]
qb['sub cat'] = [x.lower() for x in qb['sub cat']]

qb['income/expense'] = [x.lower() for x in qb['income/expense']]
qb['type'] = [x.lower() for x in qb['type']]
qb['memo'] = [x.lower() for x in qb['memo']]

# organize columns
qb = qb[['income/expense','date', 'year and month','main cat','sub cat','type','name','amount','memo']]

def string_clean(x):
    if x == '401kcontribution':
        return '401k contribution'
    elif x == 'healthcontribution':
        return 'health contribution'
    elif x == 'visioncontribution':
        return 'vision contribution'
    elif x == 'dentalcontribution':
        return 'dental contribution'
    else:
        return x
    
qb['main cat'] = qb['main cat'].apply(string_clean)    

if qb[qb.isnull().any(1)].empty:
    print ('no nulls!')
else:
    print ('you have nulls')

no nulls!


### View line entry types

In [7]:
[x for x in qb['type'].unique()]

['deposit',
 'general journal',
 'sales receipt',
 'check',
 'credit card credit',
 'credit card charge']

### View types

In [8]:
[x for x in qb['type'].unique()]

['deposit',
 'general journal',
 'sales receipt',
 'check',
 'credit card credit',
 'credit card charge']

# About some "general journal" entries
* Some entries contain large deductions
* these are addressing past reconciliation needs or to reclassify during past months
* for the sake of this project we'll exclude these types of entries

In [9]:
general_journal_exclude_list = ['reclassify','reconciliation','a/r','to post','accrued','discrepancy']
general_journal_exclude = '|'.join(general_journal_exclude_list)

excluded = qb[qb['memo'].str.contains(general_journal_exclude)]

qb = qb[~qb['memo'].str.contains(general_journal_exclude)]

In [10]:
[x for x in qb['memo'][(qb['type'] == 'general journal') & (qb['amount'] < 0)].unique()]

['amazon product refund',
 'amazon product refunds',
 'invoice refund',
 'amazon fee refund march 28, 2017']

### Check totals 
* Compare these with the actual CSV from Jan to Aug 2017

In [11]:
format_(qb.groupby('income/expense')[['amount']].sum(), [1])

Unnamed: 0_level_0,Amount
income/expense,Unnamed: 1_level_1
expense,"$29,534,452"
income,"$30,393,195"


# Groupby income/expense

In [12]:
in_exp = qb.copy()

# groupby
in_exp = in_exp.groupby(['year and month','income/expense'])[['amount']].sum().unstack(1).fillna(0).reset_index()

# clean columns
in_exp.columns = ['year and month','expense','income']

# create YTD row
in_exp = pd.concat([in_exp, pd.DataFrame({'year and month':'YTD',
                                          'expense':in_exp['expense'].sum(),
                                          'income':in_exp['income'].sum()}, index = [0])])

# get percentages
in_exp['expense as % of income'] = in_exp['expense']/in_exp['income']

# organize headers
in_exp = in_exp[['year and month','expense','income','expense as % of income']]

in_exp.format_([0,1,1,2])

Unnamed: 0,Year And Month,Expense,Income,Expense As % Of Income
0,2017-01,"$2,878,325","$3,858,782",74.59%
1,2017-02,"$3,223,365","$3,804,452",84.73%
2,2017-03,"$3,541,673","$4,353,168",81.36%
3,2017-04,"$3,790,626","$3,554,433",106.65%
4,2017-05,"$3,843,972","$3,544,212",108.46%
5,2017-06,"$4,567,287","$3,815,079",119.72%
6,2017-07,"$3,507,905","$3,579,063",98.01%
7,2017-08,"$4,181,298","$3,884,005",107.65%
0,YTD,"$29,534,452","$30,393,195",97.17%


# All expenses

In [13]:
format_(qb[qb['income/expense'] == 'expense'].groupby('main cat')[['amount']].sum().sort_values('amount', ascending = False), [1])

Unnamed: 0_level_0,Amount
main cat,Unnamed: 1_level_1
cost of goods,"$12,559,742"
payroll expenses,"$3,705,013"
freight in,"$3,062,122"
marketing,"$2,635,941"
contractors,"$1,767,544"
rent,"$1,542,950"
tax personal,"$1,165,543"
insurance,"$522,504"
merchant account fees,"$473,827"
health contribution,"$357,134"


# All income

In [14]:
format_(qb[qb['income/expense'] == 'income'].groupby('main cat')[['amount']].sum().sort_values('amount', ascending = False), [1])

Unnamed: 0_level_0,Amount
main cat,Unnamed: 1_level_1
kit sales,"$30,271,118"
interest income,"$53,188"
contractor,"$30,000"
"other inc, bus","$24,167"
ads (youtube),"$10,765"
"royalties, inc","$2,350"
apple app store,"$1,607"


### Income by month

In [15]:
income_by_month = qb.copy()

# groupby
income_by_month = income_by_month[income_by_month['income/expense'] == 'income'].groupby('year and month', as_index = False)[['amount']].sum()

# rename
income_by_month.rename(columns = {'amount':'income'}, inplace = True)

# get YTD
income_by_month = pd.concat([income_by_month, pd.DataFrame({'year and month':'YTD',
                                                            'income':income_by_month['income'].sum()},
                                                            index = [0])])
# organize headers
income_by_month = income_by_month[['year and month','income']]

income_by_month.format_([0,1])

Unnamed: 0,Year And Month,Income
0,2017-01,"$3,858,782"
1,2017-02,"$3,804,452"
2,2017-03,"$4,353,168"
3,2017-04,"$3,554,433"
4,2017-05,"$3,544,212"
5,2017-06,"$3,815,079"
6,2017-07,"$3,579,063"
7,2017-08,"$3,884,005"
0,YTD,"$30,393,195"


### Total income

In [16]:
total_income = income_by_month['income'].sum()

# Rent

### Rent - by month

In [45]:
rent_list = ['rent','utilities']

rent_by_month = qb.copy()

# groupby
rent_by_month = rent_by_month[rent_by_month['main cat'].isin(rent_list)].groupby(['year and month','main cat'])[['amount']].sum().unstack(1).fillna(0)

# rename columns
rent_by_month.columns = rent_by_month.columns.get_level_values(1)
rent_by_month.reset_index(inplace = True)
rent_by_month['total building expenses'] = rent_by_month.iloc[:,1:4].sum(1)

# merge with income
rent_by_month = pd.merge(rent_by_month, income_by_month, on = 'year and month', copy = False)


# create YTD
rent_by_month = pd.concat([rent_by_month, pd.DataFrame({'year and month':'YTD',
                                                        'rent':rent_by_month['rent'].sum(),
                                                        'utilities':rent_by_month['utilities'].sum(),
                                                        'income':rent_by_month['income'].sum(),
                                                        'total building expenses':rent_by_month['total building expenses'].sum()}, index = [0])])
# get percentages
rent_by_month['building expenses as % of income'] = rent_by_month[['rent','utilities']].sum(1)/rent_by_month['income']

# organize headers
rent_by_month = rent_by_month[['year and month','rent','utilities','income','total building expenses','building expenses as % of income']]

# reindex
rent_by_month.reset_index(drop = True, inplace = True)

rent_by_month.format_([0,1,1,1,1,2])

Unnamed: 0,Year And Month,Rent,Utilities,Income,Total Building Expenses,Building Expenses As % Of Income
0,2017-01,"$146,084","$1,087","$3,858,782","$147,171",3.81%
1,2017-02,"$279,539","$1,421","$3,804,452","$280,959",7.39%
2,2017-03,"$148,965","$1,332","$4,353,168","$150,298",3.45%
3,2017-04,"$194,288","$1,236","$3,554,433","$195,524",5.50%
4,2017-05,"$190,630","$1,694","$3,544,212","$192,324",5.43%
5,2017-06,"$192,966","$1,614","$3,815,079","$194,580",5.10%
6,2017-07,"$194,712","$1,318","$3,579,063","$196,029",5.48%
7,2017-08,"$195,767","$1,437","$3,884,005","$197,204",5.08%
8,YTD,"$1,542,950","$11,138","$30,393,195","$1,554,089",5.11%


### Rent - averages

In [495]:
rent_monthly_avg = rent_by_month.copy()
rent_monthly_avg.drop('income', 1, inplace = True)
rent_monthly_avg = pd.DataFrame(rent_monthly_avg.mean())
rent_monthly_avg.reset_index(inplace = True)
rent_monthly_avg.rename(columns = {'index':'building expense type',0:'monthly avg'}, inplace = True)
rent_monthly_avg

Unnamed: 0,building expense type,monthly avg
0,rent,342877.88222
1,utilities,2475.12889
2,total building expenses,345353.01111
3,building expenses as % of income,0.0515


# Ad Spend

### Ad Spend by month

In [324]:
ad_spend_by_month = qb.copy()

# groupby
ad_spend_by_month = ad_spend_by_month[ad_spend_by_month['main cat'] == 'marketing'].groupby('year and month', as_index = False)[['amount']].sum()

# rename colimns
ad_spend_by_month.rename(columns = {'amount':'ad spend'}, inplace = True)

# merge with income
ad_spend_by_month = pd.merge(ad_spend_by_month, income_by_month, on = 'year and month', copy = False)

# create YTD 
ad_spend_by_month = pd.concat([ad_spend_by_month, pd.DataFrame({'year and month':'YTD',
                                                               'ad spend':ad_spend_by_month['ad spend'].sum(),
                                                               'income':ad_spend_by_month['income'].sum()}, index = [0])])
# get percentages
ad_spend_by_month['ad spend as % of income'] = ad_spend_by_month['ad spend']/ad_spend_by_month['income']

# organize headers
ad_spend_by_month = ad_spend_by_month[['year and month','ad spend','income','ad spend as % of income']]

# reindex
ad_spend_by_month.reset_index(drop = True, inplace = True)

ad_spend_by_month.format_([0,1,1,2])

Unnamed: 0,Year And Month,Ad Spend,Income,Ad Spend As % Of Income
0,2017-01,$494,"$3,858,782",0.01%
1,2017-02,"$373,682","$3,804,452",9.82%
2,2017-03,"$357,064","$4,353,168",8.20%
3,2017-04,"$275,340","$3,554,433",7.75%
4,2017-05,"$410,632","$3,544,212",11.59%
5,2017-06,"$455,177","$3,815,079",11.93%
6,2017-07,"$404,874","$3,579,063",11.31%
7,2017-08,"$358,677","$3,884,005",9.23%
8,YTD,"$2,635,941","$30,393,195",8.67%


### Ad Spend - monthly averages

In [488]:
ad_spend_monthly_avg = ad_spend_by_month.copy()
ad_spend_monthly_avg.drop('income',1,inplace = True)
ad_spend_monthly_avg = pd.DataFrame(ad_spend_monthly_avg.iloc[:-1].mean())
ad_spend_monthly_avg.reset_index(inplace = True)
ad_spend_monthly_avg.rename(columns = {'index':'ad spend type',0:'monthly avg'}, inplace = True)
ad_spend_monthly_avg

Unnamed: 0,ad spend type,monthly avg
0,ad spend,329492.56625
1,ad spend as % of income,0.08731


### Ad Spend  - by type

In [347]:
ad_spend_by_type = qb.copy()

# groupby
ad_spend_by_type = ad_spend_by_type[ad_spend_by_type['main cat'] == 'marketing'].groupby('name', as_index = False)[['amount']].sum().sort_values('amount', ascending = False)

# add total row
ad_spend_by_type = pd.concat([ad_spend_by_type, pd.DataFrame({'name':'YTD','amount':ad_spend_by_type['amount'].sum()}, index = [0])])

# get percentages
ad_spend_by_type['ad spend as % of total ad spend'] = ad_spend_by_type['amount']/ad_spend_by_type['amount'].iloc[:-1].sum()
ad_spend_by_type['ad spend as % of income'] = ad_spend_by_type['amount']/total_income

# organize headers
ad_spend_by_type = ad_spend_by_type[['name','amount','ad spend as % of total ad spend','ad spend as % of income']]

ad_spend_by_type.format_([0,1,2,2])

Unnamed: 0,Name,Amount,Ad Spend As % Of Total Ad Spend,Ad Spend As % Of Income
1,"Google, Inc.","$2,622,315",99.48%,4.31%
0,FACEBK,"$8,152",0.31%,0.01%
3,Python Software Fdn.,"$5,000",0.19%,0.01%
2,La France Corp - Pac T,$474,0.02%,0.00%
0,YTD,"$2,635,941",100.00%,4.34%


# Wages

In [21]:
wages_type_list = ['401k contribution','contractor','contractors','dental contribution',
                   'health contribution','insurance','payroll expenses','pension administration - 401k',
                   'vision contribution']

### Wages - by month

In [388]:
wages_by_month = qb.copy()

wages_by_month = wages_by_month[(wages_by_month['main cat'].isin(wages_type_list)) & (qb['income/expense'] == 'expense')].groupby(['year and month','main cat'])[['amount']].sum().unstack(1).fillna(0)
wages_by_month.columns = wages_by_month.columns.get_level_values(1)
wages_by_month.reset_index(inplace = True)

wages_by_month['total wages'] = wages_by_month.iloc[:,1:].sum(1)
wages_by_month = pd.merge(wages_by_month, income_by_month, on = 'year and month', copy = False)

wages_by_month = pd.concat([wages_by_month, pd.DataFrame({'year and month':'YTD',
                                         'payroll expenses':wages_by_month['payroll expenses'].sum(),
                                         'contractors':wages_by_month['contractors'].sum(),
                                         '401k contribution':wages_by_month['401k contribution'].sum(),
                                         'pension administration - 401k':wages_by_month['pension administration - 401k'].sum(),
                                         'health contribution':wages_by_month['health contribution'].sum(),
                                         'dental contribution':wages_by_month['dental contribution'].sum(),
                                         'vision contribution':wages_by_month['vision contribution'].sum(),
                                         'insurance':wages_by_month['insurance'].sum(),
                                         'total wages':wages_by_month['total wages'].sum()}, index = [0])])

wages_by_month['total wages as % of income'] = wages_by_month['total wages']/wages_by_month['income']
wages_by_month['total wages as % of income'].fillna(wages_by_month['total wages']/total_income, inplace = True)

wages_by_month = wages_by_month[['year and month',
                                'payroll expenses',
                                'contractors',
                                '401k contribution',
                                'pension administration - 401k',
                                 
                                'health contribution',
                                'dental contribution',
                                'vision contribution',                                

                                'insurance',
                                'total wages',
                                'total wages as % of income']]
# reindex
wages_by_month.reset_index(drop = True, inplace = True)

wages_by_month.format_([0,1,1,1,1,1,1,1,1,1,2])

Unnamed: 0,Year And Month,Payroll Expenses,Contractors,401K Contribution,Pension Administration - 401K,Health Contribution,Dental Contribution,Vision Contribution,Insurance,Total Wages,Total Wages As % Of Income
0,2017-01,"$406,531","$160,561","$10,467",$0,"$40,017","$1,025",$438,"$66,887","$685,927",17.78%
1,2017-02,"$393,069","$185,522","$10,358",$0,"$39,165","$1,003",$429,"$60,750","$690,295",18.14%
2,2017-03,"$415,116","$223,529","$10,811","$1,571","$40,019","$1,021",$444,"$68,249","$760,760",17.48%
3,2017-04,"$428,373","$288,573","$10,789",$0,"$40,706","$1,052",$458,"$64,867","$834,818",23.49%
4,2017-05,"$441,394","$246,978","$10,460",$0,"$42,274","$1,087",$479,"$64,012","$806,685",22.76%
5,2017-06,"$693,366","$220,861","$15,907","$1,702","$65,763","$1,680",$744,"$67,733","$1,067,758",27.99%
6,2017-07,"$458,269","$232,804","$12,051",$0,"$44,528","$1,125",$499,"$2,268","$751,543",21.00%
7,2017-08,"$468,895","$208,715","$12,063",$0,"$44,662","$1,127",$500,"$127,737","$863,699",22.24%
8,YTD,"$3,705,013","$1,767,544","$92,906","$3,274","$357,134","$9,122","$3,990","$522,504","$6,461,486",10.63%


### Wages - monthly avg

In [437]:
wages_by_month_avg = pd.DataFrame(wages_by_month.iloc[:,1:].mean())
wages_by_month_avg.reset_index(inplace = True)
wages_by_month_avg.rename(columns = {'index':'wage type',0:'monthly avg'}, inplace = True)
wages_by_month_avg.sort_values('monthly avg', ascending = False, inplace = True)
wages_by_month_avg

Unnamed: 0,wage type,monthly avg
8,total wages,1435890.0
0,payroll expenses,823336.0
1,contractors,392787.0
7,insurance,116112.0
4,health contribution,79363.2
2,401k contribution,20645.8
5,dental contribution,2027.04
6,vision contribution,886.716
3,pension administration - 401k,727.447
9,total wages as % of income,0.201663


### Wages  - by wage type

In [23]:
wages_by_wage_type = qb.copy()

# groupby
wages_by_wage_type = wages_by_wage_type[(wages_by_wage_type['main cat'].isin(wages_type_list)) & (qb['income/expense'] == 'expense')].groupby('main cat', as_index = False)[['amount']].sum().rename(columns = {'amount':'wage expense'}).sort_values('wage expense', ascending = False)

# creat total
wages_by_wage_type = pd.concat([wages_by_wage_type, pd.DataFrame({'main cat':'YTD',
                                                                 'wage expense':wages_by_wage_type['wage expense'].sum()}, index = [0])])

# get percentage
wages_by_wage_type['wages as % of income'] = wages_by_wage_type['wage expense']/total_income

# rename
wages_by_wage_type.rename(columns = {'main cat':'expense type'}, inplace = True)

wages_by_wage_type.format_([0,1,2])

Unnamed: 0,Expense Type,Wage Expense,Wages As % Of Income
5,payroll expenses,"$3,705,013",6.10%
1,contractors,"$1,767,544",2.91%
4,insurance,"$522,504",0.86%
3,health contribution,"$357,134",0.59%
0,401k contribution,"$92,906",0.15%
2,dental contribution,"$9,122",0.02%
7,vision contribution,"$3,990",0.01%
6,pension administration - 401k,"$3,274",0.01%
0,YTD,"$6,461,486",10.63%


### Employee wages breakdown

In [503]:
payroll_expense = qb.copy()

# groupby
payroll_expense = payroll_expense[payroll_expense['main cat'] == 'payroll expenses'].groupby(['year and month','sub cat'])[['amount']].sum().unstack(1).fillna(0)

# rename colimns
payroll_expense.columns = payroll_expense.columns.get_level_values(1)
payroll_expense.reset_index(inplace = True)

# get total row
payroll_expense['total payroll'] = payroll_expense.iloc[:,1:].sum(1)

# merge income
payroll_expense = pd.merge(payroll_expense, income_by_month, on = 'year and month')

# get YTD
payroll_expense = pd.concat([payroll_expense, pd.DataFrame({'year and month':'YTD',
                                                           'service fee':payroll_expense['service fee'].sum(),
                                                           'taxes':payroll_expense['taxes'].sum(),
                                                           'wages':payroll_expense['wages'].sum(),
                                                           'total payroll':payroll_expense['total payroll'].sum(),
                                                           'income':payroll_expense['income'].sum()}, index = [0])])
# get percentages
payroll_expense['total payroll as % of income'] = payroll_expense['total payroll']/payroll_expense['income']

# organize columns
payroll_expense = payroll_expense[['year and month','wages','taxes','service fee','total payroll','income','total payroll as % of income']]

# drop income column
payroll_expense.drop('income',1,inplace = True)

payroll_expense.format_([0,1,1,1,1,2])

Unnamed: 0,Year And Month,Wages,Taxes,Service Fee,Total Payroll,Total Payroll As % Of Income
0,2017-01,"$366,569","$39,288",$674,"$406,531",10.54%
1,2017-02,"$355,711","$36,933",$426,"$393,069",10.33%
2,2017-03,"$377,384","$36,294","$1,437","$415,116",9.54%
3,2017-04,"$392,419","$33,936","$2,018","$428,373",12.05%
4,2017-05,"$408,376","$32,573",$445,"$441,394",12.45%
5,2017-06,"$641,812","$51,096",$458,"$693,366",18.17%
6,2017-07,"$424,445","$33,363",$461,"$458,269",12.80%
7,2017-08,"$434,253","$34,087",$555,"$468,895",12.07%
0,YTD,"$3,400,969","$297,570","$6,474","$3,705,013",12.19%


### Employee wage - monthly avg

In [504]:
payroll_expense_avg = pd.DataFrame(payroll_expense.mean())
payroll_expense_avg.reset_index(inplace = True)
payroll_expense_avg.rename(columns = {'index':'full time employee wage type',0:'monthly avg'}, inplace = True)
payroll_expense_avg

Unnamed: 0,full time employee wage type,monthly avg
0,wages,755770.88222
1,taxes,66126.57556
2,service fee,1438.74667
3,total payroll,823336.20444
4,total payroll as % of income,0.12239


# COGS

### COGS - by month

In [25]:
cogs_list = ['cost of goods']

cogs = qb.copy()

# filter
cogs = cogs[(cogs['income/expense'] == 'expense') & (cogs['main cat'].isin(cogs_list))]

# groupby
cogs_by_month = cogs.groupby('year and month', as_index = False)[['amount']].sum().rename(columns = {'amount':'total COGS'})

# merge with income
cogs_by_month = pd.merge(cogs_by_month, income_by_month, on = 'year and month', copy = False)

# create YTD
cogs_by_month = pd.concat([cogs_by_month, pd.DataFrame({'year and month':'YTD',
                                                       'total COGS':cogs_by_month['total COGS'].sum(),
                                                       'income':cogs_by_month['income'].sum()}, index = [0])])

# get percentages
cogs_by_month['COGS as % of income'] = cogs_by_month['total COGS']/cogs_by_month['income']

# organize headers
cogs_by_month = cogs_by_month[['year and month','total COGS','income','COGS as % of income']]

cogs_by_month.format_([0,1,1,2])

Unnamed: 0,Year And Month,Total Cogs,Income,Cogs As % Of Income
0,2017-01,"$1,298,484","$3,858,782",33.65%
1,2017-02,"$1,341,918","$3,804,452",35.27%
2,2017-03,"$1,592,339","$4,353,168",36.58%
3,2017-04,"$1,676,560","$3,554,433",47.17%
4,2017-05,"$1,756,226","$3,544,212",49.55%
5,2017-06,"$1,479,675","$3,815,079",38.78%
6,2017-07,"$1,513,858","$3,579,063",42.30%
7,2017-08,"$1,900,681","$3,884,005",48.94%
0,YTD,"$12,559,742","$30,393,195",41.32%


### COGS by vendor

In [26]:
cogs_by_vendor = cogs.groupby('name', as_index = False)[['amount']].sum().rename(columns = {'name':'vendor','amount':'COGS'}).sort_values('COGS', ascending = False)

first = cogs_by_vendor['COGS'].quantile(0.25)
third = cogs_by_vendor['COGS'].quantile(0.75)

def quantile_ranking(x):
    if x < first:
        return 'bottom 25%'
    elif x >= first and x < third:
        return 'middle 50%'
    else:
        return 'top 25%'
    
cogs_by_vendor['quantile rank by COGS'] = cogs_by_vendor['COGS'].apply(quantile_ranking)

In [27]:
cogs_by_vendor['COGS as % of income'] = cogs_by_vendor['COGS']/total_income

cogs_by_vendor.format_([0,1,0,2]).head()

Unnamed: 0,Vendor,Cogs,Quantile Rank By Cogs,Cogs As % Of Income
45,Cixi Intec,"$1,859,736",top 25%,3.06%
4,AQS Inc.,"$1,349,378",top 25%,2.22%
143,MCM Electronics Inc,"$828,461",top 25%,1.36%
10,Allied Electronics,"$683,522",top 25%,1.12%
200,Raspberry PI Foundation,"$652,630",top 25%,1.07%


* Put the below block on hold
* We try and link revenue to the skus that we bought from the vendors listed in quickbooks
* The vendor names in QB do not exactly match the vendor names in the database
* You need to take some time and clean these up
* Once you clean these up you can also look at discrepancy rates per vendor

In [28]:
vendors_to_skus = pd.read_sql(
'''
SELECT
p1.po_source_name AS vendor,
p2.sku_id,
p2.po_stuff_adjusted_cost AS 'cost per item',
p2.po_stuff_qty AS 'qty purchased'
FROM purchase_orders p1
JOIN po_stuff p2 ON p1.po_id = p2.po_id
WHERE YEAR(p1.po_date_created) = 2017
AND po_source_name IN '''+ str(tuple(cogs_by_vendor['vendor'])) +'''
''', db)

In [29]:
parts_to_skus = pd.read_sql(
'''
SELECT
k.part_id,
p1.sku_id,
p1.contains_sku_id,
p1.pts_quantity
FROM products_to_stuff p1
JOIN skus k ON p1.sku_id = k.sku_id

WHERE p1.sku_id = 8487
''', db)

In [30]:
parts_to_revenue = pd.read_sql(
'''
SELECT
op.part_id,
SUM((op.products_quantity - op.products_quantity_free) * op.products_price) AS revenue
FROM orders_products op
JOIN orders o ON op.orders_id = o.orders_id AND YEAR(o.date_purchased) = 2017
GROUP BY op.part_id
''', db)

* Put the above block on hold

# Shipping

In [31]:
shipping_list = ['freight in','landing costs-duty/taxes','postage and delivery','shipping logistics and supplies',]

shipping_by_month = qb.copy()

# filter
shipping_by_month = shipping_by_month[(shipping_by_month['income/expense'] == 'expense') & (shipping_by_month['main cat'].isin(shipping_list))]

# groupby
shipping_by_month = shipping_by_month.groupby(['year and month','main cat'])[['amount']].sum().unstack(1).fillna(0)

# rename columns
shipping_by_month.columns = shipping_by_month.columns.get_level_values(1)
shipping_by_month.reset_index(inplace= True)

# get total shipping rows
shipping_by_month['total shipping'] = shipping_by_month.iloc[:, 1:].sum(1)

# merge with income
shipping_by_month = pd.merge(shipping_by_month, income_by_month, on = 'year and month')

# create YTD
shipping_by_month = pd.concat([shipping_by_month, pd.DataFrame({'year and month':'YTD',
                                                               'freight in':shipping_by_month['freight in'].sum(),
                                                               'landing costs-duty/taxes':shipping_by_month['landing costs-duty/taxes'].sum(),
                                                               'postage and delivery':shipping_by_month['postage and delivery'].sum(),
                                                               'shipping logistics and supplies':shipping_by_month['shipping logistics and supplies'].sum(),
                                                               'total shipping':shipping_by_month['total shipping'].sum(),
                                                               'income':shipping_by_month['income'].sum()}, index = [0])])

# get percentages
shipping_by_month['total shipping as % of income'] = shipping_by_month['total shipping']/shipping_by_month['income']

# organize headers
shipping_by_month = shipping_by_month[['year and month',
                                        'freight in',
                                        'postage and delivery',
                                        'shipping logistics and supplies',
                                        'landing costs-duty/taxes',                                       
                                        'total shipping',
                                        'income',
                                        'total shipping as % of income']]

shipping_by_month.format_([0,1,1,1,1,1,1,2])

Unnamed: 0,Year And Month,Freight In,Postage And Delivery,Shipping Logistics And Supplies,Landing Costs-Duty/Taxes,Total Shipping,Income,Total Shipping As % Of Income
0,2017-01,"$412,395","$30,815","$3,062",$0,"$446,272","$3,858,782",11.57%
1,2017-02,"$363,797","$28,501","$2,233",$0,"$394,531","$3,804,452",10.37%
2,2017-03,"$402,189","$38,503","$5,303","$22,500","$468,494","$4,353,168",10.76%
3,2017-04,"$401,183","$26,127","$15,771",$0,"$443,082","$3,554,433",12.47%
4,2017-05,"$406,433","$30,959","$9,716",$0,"$447,108","$3,544,212",12.62%
5,2017-06,"$286,017","$25,880","$25,583",$0,"$337,480","$3,815,079",8.85%
6,2017-07,"$363,478","$20,800","$19,077",$0,"$403,354","$3,579,063",11.27%
7,2017-08,"$426,630","$21,443","$9,775",$0,"$457,848","$3,884,005",11.79%
0,YTD,"$3,062,122","$223,027","$90,521","$22,500","$3,398,169","$30,393,195",11.18%


### Shipping - monthly avg

In [459]:
shipping_avg = shipping_by_month.copy()
shipping_avg.drop('income',1,inplace = True)
shipping_avg = pd.DataFrame(shipping_avg.mean())
shipping_avg.reset_index(inplace = True)
shipping_avg.rename(columns = {'index':'shipping type',0:'monthly average'}, inplace = True)
shipping_avg

Unnamed: 0,shipping type,monthly average
0,freight in,680471.54889
1,postage and delivery,49561.50222
2,shipping logistics and supplies,20115.7
3,landing costs-duty/taxes,5000.0
4,total shipping,755148.75111
5,total shipping as % of income,0.11207


# Excel start
* "add table" [example](http://xlsxwriter.readthedocs.io/example_tables.html)
* "working with worksheet tables" [here](http://xlsxwriter.readthedocs.io/working_with_tables.html)
* [format methods](http://xlsxwriter.readthedocs.io/format.html#set_font_size)
* [html colors](https://www.w3schools.com/colors/colors_shades.asp)
* [win32](https://stackoverflow.com/questions/33665865/adjust-cell-width-in-excel)
* [loop through worksheets](https://github.com/jmcnamara/XlsxWriter/issues/333)

# Set up worksheet variables

In [470]:
import xlsxwriter

#===== create workbook
workbook = xlsxwriter.Workbook(workbook_title + '.xlsx')

#===== create temp workbook for formatting
writer_temp = pd.ExcelWriter('The Five Pillars TEMP.xlsx')

#===== table styles
table_style = 'table style light 15'

#===== colors
color01 = '#cfe0e8'
color02 = '#b7d7e8'
color03 = '#87bdd8'
color04 = '#daebe8'

workbook_title = 'The Five Pillars'
current_period = 'August 2018'

#===== formats
money = workbook.add_format({'num_format': '$#,##0'})
percent = workbook.add_format({'num_format': '0.00%'})
number = workbook.add_format({'num_format': '#,##0'})

center = workbook.add_format({'valign':'vcenter',
                              'align':'center'})

current_period_format = workbook.add_format({'bold':True,
                                      'font_size':14})

table_title01 = workbook.add_format({'valign':'vcenter',
                                  'align':'center',
                                  'font_size':14,
                                  'bold':True,
                                  'bg_color':color01,
                                  'border':1})

table_title02 = workbook.add_format({'valign':'vcenter',
                                  'align':'center',
                                  'font_size':14,
                                  'bold':True,
                                  'bg_color':color02,
                                  'border':1})

table_title03 = workbook.add_format({'valign':'vcenter',
                                  'align':'center',
                                  'font_size':14,
                                  'bold':True,
                                  'bg_color':color03,
                                  'border':1})

table_title04 = workbook.add_format({'valign':'vcenter',
                                  'align':'center',
                                  'font_size':14,
                                  'bold':True,
                                  'bg_color':color04,
                                  'border':1})

worksheet_title01 = workbook.add_format({'font_size':26,
                                      'bold':True,
                                      'bg_color':color01,
                                      'valign':'vcenter',
                                      'align':'center',
                                      'border': 1})

worksheet_title02 = workbook.add_format({'font_size':26,
                                      'bold':True,
                                      'bg_color':color02,
                                      'valign':'vcenter',
                                      'align':'center',
                                      'border': 1})

worksheet_title03 = workbook.add_format({'font_size':26,
                                      'bold':True,
                                      'bg_color':color03,
                                      'valign':'vcenter',
                                      'align':'center',
                                      'border': 1})

worksheet_title04 = workbook.add_format({'font_size':26,
                                      'bold':True,
                                      'bg_color':color04,
                                      'valign':'vcenter',
                                      'align':'center',
                                      'border': 1})


# Excel table header formatter

In [471]:
def xl_header(df, fmt):
    header_list = []
    for ix, f in enumerate(fmt):
        temp_dict = {}
        if f == 0:
            temp_dict['header'] = df.iloc[:, ix].name
            temp_dict['header_format'] = center
        elif f == 1:
            temp_dict['header'] = df.iloc[:, ix].name
            temp_dict['format'] = money
            temp_dict['header_format'] = center
        elif f == 2:
            temp_dict['header'] = df.iloc[:, ix].name            
            temp_dict['format'] = percent
            temp_dict['header_format'] = center
        elif f == 3:
            temp_dict['header'] = df.iloc[:, ix].name            
            temp_dict['format'] = number
            temp_dict['header_format'] = center
        header_list.append(temp_dict)
    return header_list

# Add worksheets

In [472]:
#worksheet1 = workbook.add_worksheet('overview')
worksheet2 = workbook.add_worksheet('rent')
worksheet3 = workbook.add_worksheet('ad spend')
worksheet4 = workbook.add_worksheet('wages')
worksheet5 = workbook.add_worksheet('cogs')
worksheet6 = workbook.add_worksheet('shipping')

# Worksheet 01 Overview

In [473]:
#===== WORKSHEET 01

# Worksheet 02 Rent

In [474]:
worksheet2.merge_range(0,0,2,12, workbook_title, worksheet_title02)

worksheet2.merge_range(6, 0, 6, 0 + len(rent_by_month.columns) - 1, 'Building Expenses by Month', table_title02)
worksheet2.add_table(7, 0, 7 + len(rent_by_month.index), 0 + len(rent_by_month.columns) - 1,
                    {'data':rent_by_month.values.tolist(),
                    'columns':xl_header(rent_by_month, [0,1,1,1,1,2]),
                    'style': table_style})
rent_by_month.to_excel(writer_temp, 'rent', startrow = 7, startcol = 0, index = False)

#=====

worksheet2.merge_range(6, 8, 6, 8 + len(rent_monthly_avg.columns) - 1, 'Monthly Averages', table_title02)
worksheet2.add_table(7, 8, 7 + len(rent_monthly_avg.index), 8 + len(rent_monthly_avg.columns) - 1,
                    {'data':rent_monthly_avg.values.tolist(),
                     'columns':xl_header(rent_monthly_avg, [1,1,1,1,2]),
                     'style': table_style,
                     'autofilter':0})
rent_monthly_avg.to_excel(writer_temp, 'rent', startrow = 7, startcol = 8, index = False)

#=====

chart = workbook.add_chart({'type':'column'})
chart.add_series({'categories':['rent', 8, 0, 8 + len(rent_by_month.index) - 2, 0],
                               'values':['rent', 8, 4, 8 + len(rent_by_month.index) - 2, 4]})
chart.set_legend({'none':True})
chart.set_title({'name':'Total Building Expenses'})
chart.set_x_axis({'name':'Year and Month'})
chart.set_y_axis({'name':'Total Building Expenses',
                               'num_format':'$#,##0'})
chart.set_plotarea({'border':{'color':'black','width':2},
                                 'fill':{'color':color02}})
worksheet2.insert_chart(50, 0, chart)

#=====

chart = workbook.add_chart({'type':'line'})
chart.add_series({'categories':['rent', 8, 0, 8 + len(rent_by_month.index) - 2, 0],
                               'values':['rent', 8, 5, 8 + len(rent_by_month.index) - 2, 5],
                                'line':{'width':5}})
chart.set_legend({'none':True})
chart.set_title({'name':'Building Expenses as a % of Income'})
chart.set_x_axis({'name':'Year and Month'})
chart.set_y_axis({'name':'%', 'num_format':'0.00%'})
chart.set_plotarea({'border':{'color':'black','width':2},
                                 'fill':{'color':color02}})
worksheet2.insert_chart(60, 0, chart)

# Worksheet 03 Ad Spend

In [475]:
worksheet3.merge_range(0,0,2,12, workbook_title, worksheet_title03
                     )
worksheet3.merge_range(6, 0, 6, 0 + len(ad_spend_by_month.columns) - 1, 'Ad Spend by Month', table_title03)
worksheet3.add_table(7, 0, 7 + len(ad_spend_by_month.index), 0 + len(ad_spend_by_month.columns) - 1,
                    {'data':ad_spend_by_month.values.tolist(),
                    'columns':xl_header(ad_spend_by_month, [0,1,1,2]),
                    'style': table_style})
ad_spend_by_month.to_excel(writer_temp, 'ad spend', startrow = 7, startcol = 0, index = False)

#=====

worksheet3.merge_range(6, 8, 6, 8 + len(ad_spend_monthly_avg.columns) - 1, 'Monthly Averages', table_title03)
worksheet3.add_table(7, 8, 7 + len(ad_spend_monthly_avg.index), 8 + len(ad_spend_monthly_avg.columns) - 1,
                    {'data':ad_spend_monthly_avg.values.tolist(),
                     'columns':xl_header(ad_spend_monthly_avg, [1,1,2]),
                     'style': table_style,
                     'autofilter':0})
ad_spend_monthly_avg.to_excel(writer_temp, 'ad spend', startrow = 7, startcol = 8, index = False)

#=====

worksheet3.merge_range(18, 0, 18, 0 + len(ad_spend_by_type.columns) - 1, 'Ad Spend by Type', table_title03)
worksheet3.add_table(19, 0, 19 + len(ad_spend_by_type.index), 0 + len(ad_spend_by_type.columns) - 1,
                    {'data':ad_spend_by_type.values.tolist(),
                     'columns':xl_header(ad_spend_by_type, [0,1,2,2]),
                     'style': table_style,
                     'autofilter':0})
ad_spend_by_type.to_excel(writer_temp, 'ad spend', startrow = 19, startcol = 0, index = False)

#=====

worksheet3_chart01 = workbook.add_chart({'type':'column'})
worksheet3_chart01.add_series({'categories':['ad spend', 8, 0, 8 + len(ad_spend_by_month.index) - 2, 0],
                               'values':['ad spend', 8, 1, 8 + len(ad_spend_by_month.index) - 2, 1]})
worksheet3_chart01.set_legend({'none':True})
worksheet3_chart01.set_title({'name':'Ad Spend by Month'})
worksheet3_chart01.set_x_axis({'name':'Year and Month'})
worksheet3_chart01.set_y_axis({'name':'Ad Spend',
                               'num_format':'$#,##0'})
worksheet3_chart01.set_plotarea({'border':{'color':'black','width':2},
                                 'fill':{'color':color03}})
worksheet3.insert_chart(50, 0, worksheet3_chart01)

#=====

chart = workbook.add_chart({'type':'line'})
chart.add_series({'categories':['ad spend', 8, 0, 8 + len(ad_spend_by_month.index) - 2, 0],
                               'values':['ad spend', 8, 3, 8 + len(ad_spend_by_month.index) - 2, 3],
                                'line':{'width':5}})
chart.set_legend({'none':True})
chart.set_title({'name':'Ad Spend as a % of Income'})
chart.set_x_axis({'name':'Year and Month'})
chart.set_y_axis({'name':'%', 'num_format':'0.00%'})
chart.set_plotarea({'border':{'color':'black','width':2},
                                 'fill':{'color':color02}})

worksheet3.insert_chart(50, 0, chart)

# Worksheet 04 Wages

In [476]:
worksheet4.merge_range(0,0,2,12, workbook_title, worksheet_title04)

worksheet4.merge_range(6, 0, 6, 0 + len(wages_by_month.columns) - 1, 'Wages by Month', table_title04)
worksheet4.add_table(7, 0, 7 + len(wages_by_month.index), 0 + len(wages_by_month.columns) - 1,
                    {'data':wages_by_month.values.tolist(),
                    'columns':xl_header(wages_by_month, [0,1,1,1,1,1,1,1,1,1,2]),
                    'style': table_style})
wages_by_month.to_excel(writer_temp, 'wages', startrow = 7, startcol = 0, index = False)

#=====

worksheet4.merge_range(30, 1, 30, 1 + len(wages_by_month_avg.columns) - 1, 'Monthly Averages', table_title04)
worksheet4.add_table(31, 1, 31 + len(wages_by_month_avg.index), 1 + len(wages_by_month_avg.columns) - 1,
                    {'data':wages_by_month_avg.values.tolist(),
                     'columns':xl_header(wages_by_month_avg, [0,1]),
                     'style': table_style})
wages_by_month_avg.to_excel(writer_temp, 'wages', startrow = 31, startcol = 1, index = False)

#=====

worksheet4.merge_range(18, 1, 18, 1 + len(wages_by_wage_type.columns) - 1, 'Wages by Wage Type', table_title04)
worksheet4.add_table(19, 1, 19 + len(wages_by_wage_type.index), 1 + len(wages_by_wage_type.columns) - 1,
                    {'data':wages_by_wage_type.values.tolist(),
                     'columns':xl_header(wages_by_wage_type, [0,1,2]),
                     'style': table_style})
wages_by_wage_type.to_excel(writer_temp, 'wages', startrow = 19, startcol = 1, index = False)

#=====

worksheet4.merge_range(18, 5, 18, 5 + len(payroll_expense.columns) - 1, 'Full Time Employee Wages Breakdown', table_title04)
worksheet4.add_table(19, 5, 19 + len(payroll_expense.index), 5 + len(payroll_expense.columns) - 1,
                    {'data':payroll_expense.values.tolist(),
                     'columns':xl_header(payroll_expense, [0,1,1,1,1,2]),
                     'style': table_style})
payroll_expense.to_excel(writer_temp, 'wages', startrow = 19, startcol = 5, index = False)

#=====

chart = workbook.add_chart({'type':'pie'})
chart.add_series({'categories':['wages', 19, 1, 19 + len(wages_by_wage_type.index) - 2, 1],
                               'values':['wages', 19, 2, 19 + len(wages_by_wage_type.index) - 2, 2]})
chart.set_title({'name':'Wages by Wage Type'})
worksheet4.insert_chart(50, 0, chart)

#=====

chart = workbook.add_chart({'type':'line'})
chart.add_series({'categories':['wages', 8, 0, 8 + len(wages_by_month.index) - 2, 0],
                    'values':['wages', 8, 9, 8 + len(wages_by_month.index) - 2, 9],
                    'line':{'width':5}})
chart.set_legend({'none':True})
chart.set_title({'name':'Total Wages by Month'})
chart.set_x_axis({'name':'Year and Month'})
chart.set_y_axis({'name':'Total Wages',
                'num_format':'$#,##0',
                'min':wages_by_month['total wages'].iloc[:-1].min() - 10000,
                'max':wages_by_month['total wages'].iloc[:-1].max() + 100000})
chart.set_plotarea({'border':{'color':'black','width':2},
                                 'fill':{'color':color04}})
worksheet4.insert_chart(60, 0, chart)

# Worksheet 05 COGS

In [477]:
worksheet5.merge_range(0,0,2,12, workbook_title, worksheet_title01)

worksheet5.merge_range(6, 0, 6, 0 + len(cogs_by_month.columns) - 1, 'COGS by Month', table_title01)
worksheet5.add_table(7, 0, 7 + len(cogs_by_month.index), 0 + len(cogs_by_month.columns) - 1,
                    {'data':cogs_by_month.values.tolist(),
                    'columns':xl_header(cogs_by_month, [0,1,1,2]),
                    'style': table_style})
cogs_by_month.to_excel(writer_temp, 'cogs', startrow = 7, startcol = 0, index = False)

#=====

worksheet5.merge_range(18, 0, 18, 0 + len(cogs_by_vendor.columns) - 1, 'COGS by Vendor', table_title01)
worksheet5.add_table(19, 0, 19 + len(cogs_by_vendor.index), 0 + len(cogs_by_vendor.columns) - 1,
                    {'data':cogs_by_vendor.values.tolist(),
                    'columns':xl_header(cogs_by_vendor, [0,1,0,2]),
                    'style': table_style})
cogs_by_vendor.to_excel(writer_temp, 'cogs', startrow = 19, startcol = 0, index = False)

#=====

chart = workbook.add_chart({'type':'column'})
chart.add_series({'categories':['cogs', 8, 0, 8 + len(cogs_by_month.index) - 2, 0],
                    'values':['cogs', 8, 1, 8 + len(cogs_by_month.index) - 2, 1]})
chart.set_legend({'none':True})
chart.set_title({'name':'Total COGS by Month'})
chart.set_x_axis({'name':'Year and Month'})
chart.set_y_axis({'name':'Total COGS',
                'num_format':'$#,##0'})
chart.set_plotarea({'border':{'color':'black','width':2},
                                 'fill':{'color':color01}})
worksheet5.insert_chart(50, 0, chart)

#=====

chart = workbook.add_chart({'type':'bar'})
chart.add_series({'categories':['cogs', 20, 0, 20 + len(cogs_by_vendor.iloc[:25].index) - 2, 0],
                    'values':['cogs', 20, 1, 20 + len(cogs_by_vendor.iloc[:25].index) - 2, 1]})
chart.set_legend({'none':True})
chart.set_title({'name':'COGS by Top Vendors'})
chart.set_x_axis({'name':'COGS',
                 'num_format':'$#,##0'})
chart.set_y_axis({'name':'Vendor'})
chart.set_plotarea({'border':{'color':'black','width':2},
                                 'fill':{'color':color01}})
worksheet5.insert_chart(50, 0, chart)

# Worksheet 06 Shipping

In [478]:
worksheet6.merge_range(0,0,2,12, workbook_title, worksheet_title01)

worksheet6.merge_range(6, 0, 6, 0 + len(shipping_by_month.columns) - 1, 'Shipping by Month', table_title02)
worksheet6.add_table(7, 0, 7 + len(shipping_by_month.index), 0 + len(shipping_by_month.columns) - 1,
                    {'data':shipping_by_month.values.tolist(),
                    'columns':xl_header(shipping_by_month, [0,1,1,1,1,1,1,2]),
                    'style': table_style})
shipping_by_month.to_excel(writer_temp, 'shipping', startrow = 7, startcol = 0, index = False)

In [482]:
shipping_by_month

Unnamed: 0,year and month,freight in,postage and delivery,shipping logistics and supplies,landing costs-duty/taxes,total shipping,income,total shipping as % of income
0,2017-01,412395.28,30814.78,3062.2,0.0,446272.26,3858780.0,0.11565
1,2017-02,363796.65,28500.65,2233.23,0.0,394530.53,3804450.0,0.1037
2,2017-03,402188.97,38502.53,5302.96,22500.0,468494.46,4353170.0,0.10762
3,2017-04,401183.3,26127.1,15771.48,0.0,443081.88,3554430.0,0.12466
4,2017-05,406432.72,30959.18,9715.79,0.0,447107.69,3544210.0,0.12615
5,2017-06,286016.67,25879.81,25583.38,0.0,337479.86,3815080.0,0.08846
6,2017-07,363478.1,20799.7,19076.51,0.0,403354.31,3579060.0,0.1127
7,2017-08,426630.28,21443.01,9775.1,0.0,457848.39,3884010.0,0.11788
0,YTD,3062121.97,223026.76,90520.65,22500.0,3398169.38,30393200.0,0.11181


# Format workbook

In [480]:
#===== close temp workbook
writer_temp.save()

#===== get all sheets into a dict
my_worksheets = {}
for worksheet in workbook.worksheets():
    my_worksheets[worksheet.get_name()] = worksheet

#===== format each sheet in workbook
xl = pd.ExcelFile(r'/Users/jarad/Desktop/Desktop/The Five Pillars Project/The Five Pillars TEMP.xlsx')

#===== get max column width per column in temp workbook
for sheet in my_worksheets.keys():
    worksheet = my_worksheets[sheet]
    temp_sheet = xl.parse(sheet)
    
    max_row = len(temp_sheet.index)
    max_col = len(temp_sheet.columns)
    
    # round any numbers to 2 sig figs
    for col in range(max_col):
        for row in range(max_row):
            try:
                temp_sheet.iloc[row, col] = np.round(temp_sheet.iloc[row, col], 2)
            except:
                pass
    
    # get max width of each column in temp sheet
    max_length = []
    for col in range(max_col):
        temp = []
        for row in range(max_row):
            temp.append(len(str(temp_sheet.iloc[row, col])))
        max_length.append(int(np.ceil(np.max(temp))) + 3)
    # change column width in real workbook
    for i in range(len(max_length)):
        worksheet.set_column(i, i, max_length[i])
    
    # do other stuff to real workbook
    worksheet.write(4, 0, current_period, current_period_format)
    worksheet.hide_gridlines(2)
    worksheet.set_landscape()
    worksheet.fit_to_pages(1, 1)
    worksheet.set_footer('&L&D')
    worksheet.insert_image(0,0,r'/Users/jarad/Desktop/Desktop/Adafruit Logo.png',
                            {'x_scale': 0.55, 'y_scale': 0.5,'x_offset': 5, 'y_offset': 5})
#===== close and save
workbook.close()        

# Write raw data to CSV for DataStudio

In [481]:
writer = pd.ExcelWriter('Five Pillars for DataStudio.xlsx')
qb.to_excel(writer, 'Profit and Loss', index = False)
excluded.to_excel(writer, 'Excluded', index = False)
#writer.save()