# Load libraries

In [1]:
import sys
sys.path.insert(0,'/Users/jarad')

import pandas as pd
import numpy as np
from db2 import *

import datetime as dt
import calendar

import glob

import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
pd.options.display.precision = 5

# Key metrics for dashboard

### Button
* monthly $ - button
* % of total spending - button 

### Charts
* monthly total $
* % of total income 
* % breakdown of main category (top 3? 5?)

# Load formatter

In [2]:
def format_(df, fmt):
    # make a copy so u don't alter the origial
    dfcpy = df.copy()
    
    # format headers
    col_list = []
    for col in dfcpy.columns:
        new_col = col.split('_')
        new_col = ' '.join(new_col)
        new_col = new_col.title()
        col_list.append(new_col)
    
    dfcpy.columns = col_list
    
    # format columns
    for ix, f in enumerate(fmt):
        if f == 0:
            pass
        elif f == 1:
            dfcpy.iloc[:, ix] = ['${:,.0f}'.format(x) for x in dfcpy.iloc[:, ix]]            
        elif f == 2:               
            dfcpy.iloc[:, ix] = [ '{:,.2f}%'.format(x * 100) for x in dfcpy.iloc[:, ix]]           
            
    return dfcpy

# add to pandas module
pd.DataFrame.format_ = format_

# Set constraints

In [3]:
date_start = '2017-01-01'
date_end = '2017-08-31'

current_year_and_month = '2017-08'

# Get db sales data

In [4]:
sales = pd.read_sql(
'''
SELECT
DATE_FORMAT(o.date_purchased, '%Y-%m') AS 'year and month',
SUM((op.products_quantity - op.products_quantity_free) * op.products_price) AS revenue
FROM orders o
JOIN orders_products op ON o.orders_id = op.orders_id
WHERE o.orders_status != 9
AND o.orders_status != 10
AND o.payment_method != 'Replacement Order'
AND DATE(o.date_purchased) BETWEEN ' '''+ date_start +''' ' AND ' '''+ date_end +''' '
GROUP BY DATE_FORMAT(o.date_purchased, '%Y-%m') 
''', db)

# Get and clean QuickBooks data

In [5]:
# get all CSVs
path = r'/Users/jarad/Desktop/Desktop/The Five Pillars Project/QB CSVs/'
all_files = glob.glob(path + '/*.csv')
frame = pd.DataFrame()
list_ = []

for file_ in all_files:
    df = pd.read_csv(file_)
    list_.append(df)
    
qb_main = pd.concat(list_)   

# make a copy
qb = qb_main.copy()

# drop columns of all nulls
qb.dropna(how = 'all', axis = 1, inplace = True)

# distinguish between income and expenses
qb.rename(columns = {'Unnamed: 4':'main cat',
                    'Unnamed: 5':'sub cat',
                    'Unnamed: 3':'income/expense'}, inplace = True)

# fill out income/expense
qb['income/expense'].fillna(method = 'ffill',inplace = True)

# drop "totals" rows
qb['main cat'].fillna('', inplace = True)
qb = qb[~qb['main cat'].str.contains('Total')]
qb['main cat'].replace('', np.nan, inplace = True)
qb['main cat'].fillna(method = 'ffill', inplace = True)

qb['sub cat'].fillna('', inplace = True)
qb = qb[~qb['sub cat'].str.contains('Total')]
qb['sub cat'].replace('', np.nan, inplace = True)
qb['sub cat'].fillna(method = 'ffill', inplace = True)
qb['sub cat'].fillna(qb['main cat'], axis = 0, inplace = True)

# drop columns
qb.drop(['Unnamed: 0','Unnamed: 1','Unnamed: 2','Num','Clr','Split','Balance'], 1, inplace = True)

# clean strings
main_fix = qb['main cat'].str.split('·', expand = True)
main_fix[1] = main_fix[1].fillna(main_fix[0])
main_fix.drop(0, 1, inplace = True)
main_fix.rename(columns = {1:'main cat'}, inplace = True)

# merge clean strings
qb.drop('main cat', 1, inplace = True)
qb = pd.merge(qb, main_fix, left_index = True, right_index = True, copy = False)

# clean strings
sub_fix = qb['sub cat'].str.split('·', expand = True)
sub_fix[1] = sub_fix[1].fillna(sub_fix[0])
sub_fix.drop(0, 1, inplace = True)
sub_fix.rename(columns = {1:'sub cat'}, inplace = True)

# merge clean strings
qb.drop('sub cat', 1, inplace = True)
qb = pd.merge(qb, sub_fix, left_index = True, right_index = True, copy = False)

# fill nan with main cat
qb['sub cat'] = np.where(qb['sub cat'].isnull(), qb['main cat'], qb['sub cat'])

# fix numbers
qb['Amount'] = qb['Amount'].str.replace(',','')
qb['Amount'] = pd.to_numeric(qb['Amount'])

# get rid of where amount is null
qb = qb[qb['Amount'].isnull() == False]

# fil string nulls
qb['Name'].fillna('', inplace = True)

# fix memo columns
qb['Memo'].fillna('',inplace = True)

# fix date column and add another
qb = qb[qb['Date'].isnull() == False]
qb['Date'] = pd.to_datetime(qb['Date'])
qb['year and month'] = pd.to_datetime(qb['Date'].dt.year.map(int).map(str) + '-' + qb['Date'].dt.month.map(str))
qb['year and month'] = [str(x)[:7] for x in qb['year and month']]

# fix headers
qb.columns = [x.lower() for x in qb.columns]

# fix more
qb['main cat'] = [x.strip() for x in qb['main cat']]
qb['main cat'] = [x.lower() for x in qb['main cat']]

qb['sub cat'] = [x.strip() for x in qb['sub cat']]
qb['sub cat'] = [x.lower() for x in qb['sub cat']]

qb['income/expense'] = [x.lower() for x in qb['income/expense']]
qb['type'] = [x.lower() for x in qb['type']]
qb['memo'] = [x.lower() for x in qb['memo']]

# organize columns
qb = qb[['income/expense','date', 'year and month','main cat','sub cat','type','name','amount','memo']]

def string_clean(x):
    if x == '401kcontribution':
        return '401k contribution'
    elif x == 'healthcontribution':
        return 'health contribution'
    elif x == 'visioncontribution':
        return 'vision contribution'
    elif x == 'dentalcontribution':
        return 'dental contribution'
    else:
        return x
    
qb['main cat'] = qb['main cat'].apply(string_clean)    

if qb[qb.isnull().any(1)].empty:
    print ('no nulls!')
else:
    print ('you have nulls')

no nulls!


# About some "general journal" entries
* Some entries contain large deductions
* these are addressing past reconciliation needs or to reclassify during past months
* for the sake of this project we'll exclude these types of entries

In [6]:
general_journal_exclude_list = ['reclassify','reconciliation','a/r','to post','accrued','discrepancy']
general_journal_exclude = '|'.join(general_journal_exclude_list)

excluded = qb[qb['memo'].str.contains(general_journal_exclude)]

qb = qb[~qb['memo'].str.contains(general_journal_exclude)]

# Build expenses dict

In [7]:
exp_dict = {'building expenses':['rent',
                                 'equipment',
                                 'office',
                                 'utilities',
                                 'insurance',
                                 'internet',
                                 'insurance, bus',
                                 'telephone, bus'],
            
           'shipping':['freight in',
                       'landing costs-duty/taxes',
                       'postage and delivery',
                       'shipping logistics and supplies'],
            
           'employees':['401k contribution',
                        'contractors',
                        'dental contribution',
                        'education',
                        'health contribution',
                        'insurance',
                        'payroll expenses',
                        'pension administration - 401k',
                        'tax personal',
                        'vision contribution'],
           
           'ad spend':['marketing'],
           
           'COGS':['cost of goods']}

# Get totals

In [8]:
cur_income = qb.copy()
cur_income = cur_income['amount'][(cur_income['income/expense'] == 'income') & (cur_income['year and month'] == current_year_and_month)].sum()
print ('total income for ' + current_year_and_month + ' is ${:,.0f}'.format(cur_income))

cur_exp = qb.copy()
cur_exp = cur_exp['amount'][(cur_exp['income/expense'] == 'expense') & (cur_exp['year and month'] == current_year_and_month)].sum()
print ('total expenses for ' + current_year_and_month + ' is ${:,.0f}'.format(cur_exp))

total income for 2017-08 is $3,884,005
total expenses for 2017-08 is $4,181,298


# Build Excel workbook

### Create color palettes
* [Hex colors](http://www.color-hex.com/)
* [color palettes](https://www.w3schools.com/colors/colors_palettes.asp)

In [9]:
# border = top row, last
# face color = top row, first
# the hex category is the face color hex number

In [10]:
my_colors = {'color01':{'face color':'#66c0b7', #http://www.color-hex.com/color-palette/2971
                    'bar 01':'white',
                    'bar 02':'#cceae7',
                    'bar 03':'#99d5cf',
                    'bar 04':'#32ab9f',
                    'bar 05':'#009688'},
            'color02':{'face color':'#ef4f91', #http://www.color-hex.com/color-palette/809
                     'bar 01':'white',
                     'bar 02':'#363b74',
                     'bar 03':'#c79dd7',
                     'bar 04':'#4d1b7b',
                     'bar 05':'#673888'},
            'color03':{'face color':'#63ace5', #http://www.color-hex.com/color-palette/358
                     'bar 01':'white',
                     'bar 02':'#2a4d69',
                     'bar 03':'#adcbe3',
                     'bar 04':'#4b86b4',
                     'bar 05':'#e7eff6'},
             'color04':{'face color':'#6b3e26', #http://www.color-hex.com/color-palette/660
                     'bar 01':'white',
                     'bar 02':'#ffc5d9',
                     'bar 03':'#fdf5c9',
                     'bar 04':'#ffcb85',
                     'bar 05':'#c2f2d0'},
             'color05':{'face color':'#a19c9c', #http://www.color-hex.com/color-palette/1160
                    'bar 01':'white',
                    'bar 02':'#326ada',
                    'bar 03':'#d4d8d4',
                    'bar 04':'#a19c9c',
                    'bar 05':'#d2d2d2'},
            'color06':{'face color':'#3366ff', #http://www.color-hex.com/color-palette/189
                      'bar 01':'white',
                      'bar 02':'#99ccff',
                      'bar 03':'#5588ff',
                      'bar 04':'#bbeeff',
                      'bar 05':'#77aaff'}}

color_list = []
for key in my_colors.keys():
    color_list.append(key)

### Set RC Params for charts
* [rc params](https://matplotlib.org/api/matplotlib_configuration_api.html#matplotlib.RcParams)

In [11]:
# set font
plt.rcParams['font.family'] = 'Arial'

# set line width/border
plt.rcParams['figure.edgecolor'] = 'black'

# set axes
plt.rcParams['axes.labelsize'] = 20
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['xtick.major.pad'] = 15

plt.rcParams['xtick.labelsize'] = 20
plt.rcParams['ytick.labelsize'] = 20

# set title
plt.rcParams['figure.titlesize'] = 30
plt.rcParams['figure.titleweight'] = 'bold'

#plt.rcdefaults() # to restore defaults

### Build workbook

In [19]:
import xlsxwriter
import xlwings as xw
import textwrap

#===== set title and period
workbook_title = 'The Five Pillars'
xl_current_period = 'August 2018'

#===== create workbook
workbook = xlsxwriter.Workbook(workbook_title + '.xlsx')

#===== formats
money = workbook.add_format({'num_format': '$#,##0',
                             'valign':'vcenter',
                             'align':'center',
                             'font_size':20})

percent = workbook.add_format({'num_format': '0.00%',
                              'valign':'vcenter',
                              'align':'center',
                              'font_size':20})

number = workbook.add_format({'num_format': '#,##0'})

center = workbook.add_format({'valign':'vcenter',
                              'align':'center'})

current_period_format = workbook.add_format({'font_size':14,
                                             'font_name':'Arial (Bold)'})

plot_height = 2.10
plot_width = 3.10

#===== add worksheets
worksheet1 = workbook.add_worksheet('overview')
worksheet2 = workbook.add_worksheet('building expenses')
worksheet3 = workbook.add_worksheet('ad spend')
worksheet4 = workbook.add_worksheet('employees')
worksheet5 = workbook.add_worksheet('COGS')
worksheet6 = workbook.add_worksheet('shipping')

#===== create worksheets dict
my_worksheets = {}
for worksheet in workbook.worksheets():
    my_worksheets[worksheet.get_name()] = worksheet

# color index
i = 0
    
# create worksheets    
for key, value in my_worksheets.items():
       
    worksheet_title_format = workbook.add_format({'font_size':18,
                                              'font_color':'white',
                                              'valign':'vcenter',
                                              'align':'center',
                                              'bottom':2,
                                              'font_name':'Arial (Bold)',
                                              'bg_color':my_colors[color_list[i]]['face color']})
    
    button_format = workbook.add_format({'font_size':14,
                                     'font_name':'Arial (Bold)',
                                     'valign':'vcenter',
                                     'align':'center',
                                     'bottom':2,
                                     'top':2,
                                     'bg_color':my_colors[color_list[i]]['face color'],
                                     'font_color':'white'})

    
    # call worksheet
    worksheet = my_worksheets[key]
    
    # format the rest of the workbook
    worksheet.merge_range(0, 0, 1, 8, key.title(), worksheet_title_format)
    worksheet.write(2, 7, xl_current_period, current_period_format)
    worksheet.hide_gridlines(2)
    worksheet.set_zoom(140)
    worksheet.set_footer('&L&D')
    worksheet.insert_image(0,0,r'/Users/jarad/Desktop/Desktop/Adafruit Logo.png',
                            {'x_scale': 0.30, 'y_scale': 0.30,'x_offset': 5, 'y_offset': 5})
    
    # open workbook
    sht = xw.Book(r'/Users/jarad/Desktop/Desktop/The Five Pillars Project/The Five Pillars.xlsx').sheets[key]

    
    # create df
    if key != 'overview':
        
        exp_over_time = qb[(qb['income/expense'] == 'expense') & (qb['main cat'].isin(exp_dict[key]))].groupby('year and month')[['amount']].sum()
        income_over_time = qb[qb['income/expense'] == 'income'].groupby('year and month')[['amount']].sum()


        # create df
        df = qb[(qb['main cat'].isin(exp_dict[key])) & (qb['year and month'] == current_year_and_month)]

        # make buttons
        monthly_total = df['amount'].sum()
        percent_of_total_exp = monthly_total/cur_exp
        
        worksheet.merge_range(4, 0, 4, 3, 'Monthly Total', button_format)
        worksheet.merge_range(5, 0, 5, 3, monthly_total, money)
        
        worksheet.merge_range(4, 4, 4, 8, '% of Total Expenses', button_format)
        worksheet.merge_range(5, 4, 5, 8, percent_of_total_exp, percent)
        
        # make charts
        # open workbook
        sht = xw.Book(r'/Users/jarad/Desktop/Desktop/The Five Pillars Project/The Five Pillars.xlsx').sheets[key]

        #===== PLOT 01 - dollar amount over time                
        fig1 = plt.figure(figsize = (20,5), linewidth = 5)
        ax = fig1.add_subplot(1,1,1)

        x1 = np.arange(len(exp_over_time.index))
        y1 = exp_over_time['amount']

        # plot
        ax.plot(x1, y1, '--o', linewidth = 5, markersize = 15, color = 'white')

        # yticks
        vals = ax.get_yticks()
        ax.set_yticklabels(['${:,.0f}'.format(x) for x in vals])

        # x axis
        ax.set_xticks(np.arange(0, len(exp_over_time.index)))
        ax.set_xticklabels([calendar.month_abbr[int(x)] for x in exp_over_time.index.str[-1:]])
        ax.set_xlabel('2017')

        # title
        plt.suptitle('Totals Since ' + calendar.month_name[int(date_start[9:10])] + ' ' + date_start[:4],
                    position = [0.5, 0.955])

        # foreground
        ax.set_facecolor(my_colors[color_list[i]]['face color'])
        ax.grid(color = 'white')

        # add plot to excel
        sht_plot = sht.pictures.add(fig1, 
                                    name='MyPlot', 
                                    update = True, 
                                    left = sht.range('A8').left, 
                                    top = sht.range('A8').top)
        sht_plot.height /= plot_height
        sht_plot.width /= plot_width
    
        plt.close(fig1)     
        
        #===== PLOT 02 - expense as proportion of income        
        perc_of_income = pd.merge(exp_over_time, income_over_time, left_index = True, right_index = True)
        perc_of_income.rename(columns = {'amount_x':'expense','amount_y':'income'}, inplace = True)
        
        fig2 = plt.figure(figsize = (20,5), linewidth = 3)
        ax = fig2.add_subplot(1,1,1)

        x1 = np.arange(len(perc_of_income.index))
        y1 = perc_of_income['expense']
        y2 = perc_of_income['income']

        bar_width = 0.25

        ax.bar(x1, y1, width = bar_width, color = my_colors[color_list[i]]['bar 01'], label = key.title(), edgecolor = 'white')
        ax.bar(x1 + bar_width, y2, width = bar_width, color = my_colors[color_list[i]]['bar 02'], label = 'Income', edgecolor = 'white')

        vals = ax.get_yticks()
        ax.set_yticklabels(['${:,.0f}'.format(x) for x in vals])

        # x axis
        ax.set_xticks(np.arange(0, len(perc_of_income.index)))
        ax.set_xticklabels([calendar.month_abbr[int(x)] for x in perc_of_income.index.str[-1:]])
        ax.set_xlabel('2017')

        # title
        plt.suptitle('Totals Compared to Income',
                    position = [0.5, 0.955])

        # foreground
        ax.grid(color = 'white')
        ax.set_facecolor(my_colors[color_list[i]]['face color'])
        ax.legend(fontsize = 15, loc = 'upper right')
            
        # add plot to excel
        sht_plot = sht.pictures.add(fig2, 
                                    name = 'MyPlot02', 
                                    update = True, 
                                    left = sht.range('A20').left,
                                    top = sht.range('A20').top)
        sht_plot.height /= plot_height
        sht_plot.width /= plot_width
    
        plt.close(fig2)     
        
        #===== PLOT 03 - breakdown
        
        fig3 = plt.figure(figsize = (20,5), linewidth = 3)
        ax = fig3.add_subplot(1,1,1)

        if key == 'employees':
            groupby_on = 'sub cat'
        else:
            groupby_on = 'name'

        sub_cat = qb[(qb['main cat'].isin(exp_dict[key])) & (qb['year and month'] == current_year_and_month)].groupby(groupby_on)[['amount']].sum().sort_values('amount', ascending = False).head()      

        sub_cat.plot(kind = 'bar', 
                     color = [my_colors[color_list[i]]['bar 01'],
                             my_colors[color_list[i]]['bar 02'],
                             my_colors[color_list[i]]['bar 03'],
                             my_colors[color_list[i]]['bar 04'],
                             my_colors[color_list[i]]['bar 05'],],
                             width = 0.15,
                             legend = False, ax = ax,
                             edgecolor = 'white')

        # y axis
        vals = ax.get_yticks()
        ax.set_yticklabels(['${:,.0f}'.format(x) for x in vals])

        # x axis
        labels = sub_cat.index
        lables = [textwrap.fill(text, 20) for text in labels]
        ax.set_xticklabels(lables, rotation = 0)
        ax.set_xlabel('')

        # title
        plt.suptitle('Breakdown of Top Expenses in ' + key.title(),
                    position = [0.5, 0.955])

        # foreground
        ax.grid(color = 'white')
        ax.set_facecolor(my_colors[color_list[i]]['face color'])

        # add plot to excel
        sht_plot = sht.pictures.add(fig3, 
                                    name = 'MyPlot03', 
                                    update = True, 
                                    left = sht.range('A32').left,
                                    top = sht.range('A32').top)
        sht_plot.height /= plot_height
        sht_plot.width /= plot_width
    
        plt.close(fig3)         

        i += 1
        
    else:
        
        exp_over_time = qb[qb['income/expense'] == 'expense'].groupby('year and month')[['amount']].sum()
        income_over_time = qb[qb['income/expense'] == 'income'].groupby('year and month')[['amount']].sum()
        
        worksheet = my_worksheets[key]
                
        overall_total = qb['amount'][(qb['income/expense'] == 'expense') & (qb['year and month'] == current_year_and_month)].sum()
        worksheet.merge_range(4, 0, 4, 3, 'Monthly Total', button_format)
        worksheet.merge_range(5, 0, 5, 3, overall_total, money)
        
        monthly_avg = float(qb[(qb['income/expense'] == 'expense')].groupby('year and month')[['amount']].sum().mean())
        worksheet.merge_range(4, 4, 4, 8, 'Monthly Average', button_format)
        worksheet.merge_range(5, 4, 5, 8, monthly_avg, money)
        
        total_exp_overtime = qb[(qb['income/expense'] == 'expense')].groupby('year and month')[['amount']].sum().rename(columns = {'amount':'expenses'})
        
        totals = pd.merge(total_exp_overtime, income_over_time, left_index = True, right_index = True)
        totals.rename(columns = {'amount':'income'}, inplace = True)

        fig4 = plt.figure(figsize = (20,5), linewidth = 5)
        ax = fig4.add_subplot(1,1,1)

        x1 = np.arange(len(totals.index))
        y1 = totals['expenses']
        y2 = totals['income']

        ax.plot(x1, y1, '--', marker = 'o', linewidth = 5, markersize = 15, label = 'Expenses', color = 'black')
        ax.plot(x1, y2, '--', marker = 'o', linewidth = 5, markersize = 15, label = 'Income', color = 'white')

        # yticks
        vals = ax.get_yticks()
        ax.set_yticklabels(['${:,.0f}'.format(x) for x in vals])

        # x axis
        ax.set_xticks(np.arange(0, len(exp_over_time.index)))
        ax.set_xticklabels([calendar.month_abbr[int(x)] for x in exp_over_time.index.str[-1:]])
        ax.set_xlabel('2017')

        # title
        plt.suptitle('Expenses and Income Since ' + calendar.month_name[int(date_start[9:10])] + ' ' + date_start[:4],
                    position = [0.5, 0.955])

        # foreground
        ax.set_facecolor(my_colors[color_list[i]]['face color'])
        ax.grid(color = 'white')
        ax.legend(fontsize = 15)
        
        # add plot to excel
        sht_plot = sht.pictures.add(fig4, 
                                    name = 'MyPlot04', 
                                    update = True, 
                                    left = sht.range('A12').left,
                                    top = sht.range('A12').top)
        sht_plot.height /= plot_height
        sht_plot.width /= plot_width
    
        plt.close(fig4)         

        i += 1    
               
# close and save workbook
workbook.close()    

print ('done')

done


# Write raw data to CSV for DataStudio

In [None]:
writer = pd.ExcelWriter('Five Pillars for DataStudio.xlsx')
qb.to_excel(writer, 'Profit and Loss', index = False)
excluded.to_excel(writer, 'Excluded', index = False)
#writer.save()

# [Matplotlib fonts](http://jonathansoma.com/lede/data-studio/matplotlib/list-all-fonts-available-in-matplotlib-plus-samples/)

In [None]:
import matplotlib.font_manager
from IPython.core.display import HTML

def make_html(fontname):
    return "<p>{font}: <span style='font-family:{font}; font-size: 24px;'>{font}</p>".format(font=fontname)

code = "\n".join([make_html(font) for font in sorted(set([f.name for f in matplotlib.font_manager.fontManager.ttflist]))])

HTML("<div style='column-count: 2;'>{}</div>".format(code))