In [27]:
#==========================================================================================
# Load libraries and get dates
#==========================================================================================

import sys
sys.path.insert(0,'/Users/jarad/fake_folder/Python Libraries')

import datetime

from jb_libraries import *
from dhl_data import *
%matplotlib inline

date_end = str(datetime.datetime.now().date())
date_start = (pd.to_datetime(date_end) - pd.DateOffset(months = 36)).date()
d = date_start.day
date_start = str((date_start - pd.DateOffset(d - 1)).date())

#==========================================================================================
# Orders Data
#==========================================================================================

adabox_oids = pd.read_sql(
'''
SELECT
orders_id
FROM subscriptions_history
WHERE subscriptions_id IN (SELECT subscriptions_id FROM subscriptions WHERE subscriptions_type = 'adabox')
''', db)

col_fix(adabox_oids)
adabox_oids = adabox_oids['orders id'].tolist()

o_main = pd.read_sql(
'''
SELECT
DATE(date_purchased) AS date_purchased,
orders_id,
shipping_module_code,
delivery_city,
delivery_state,
delivery_country,
IF(orders_reseller = 0 AND orders_super_reseller = 0,'non reseller','reseller/super') AS customer

FROM orders

# keep in replacements, refunds, returns, etc. since we still pay shipping on those
WHERE orders_status NOT IN (8,9,14,15) #fraud pending/confirmed, voided, fraud-voided
AND DATE(date_purchased) BETWEEN ' '''+ date_start +''' ' AND ' '''+ date_end +''' '
''', db)

col_fix(o_main)

main = o_main.copy()
main['date purchased'] = pd.to_datetime(main['date purchased'])    
main['year and month'] = [str(x)[:7] for x in main['date purchased']]
main['year and week'] = main['date purchased'].dt.year.map(str) + '-' + main['date purchased'].dt.week.map(str)
main['week ending'] = main['date purchased'].groupby(main['year and week']).transform('max')
main.drop(['year and week'], 1, inplace = True)

ot_main = pd.read_sql(
'''
SELECT
orders_id,
CASE WHEN class = 'ot_shipping' THEN 'service revenue'
WHEN class = 'ot_ddp' THEN 'ddp revenue'
WHEN class = 'ot_subtotal' THEN 'subtotal'
END AS class,
value
FROM orders_total
WHERE class IN ('ot_subtotal','ot_shipping','ot_ddp')
AND orders_id IN
(SELECT orders_id FROM orders WHERE DATE(date_purchased) BETWEEN ' '''+ date_start +''' ' AND ' '''+ date_end +''' ')
''', db)

col_fix(ot_main)

ot_main['class'] = ot_main['class'].str.replace('ot_','')
ot_main['class'] = ot_main['class'].str.strip()
# structure so we can map to main
df = ot_main.groupby(['orders id','class'])[['value']].sum().unstack(1).fillna(0)
df.columns = df.columns.droplevel(0)
df['shipping revenue'] = df[['ddp revenue','service revenue']].sum(1)
for col in df.columns:
    main[col] = main['orders id'].map(dict(zip(df.index.to_series(), df[col])))    
    
main['free shipping'] = np.where(main['shipping revenue'] == 0, 'yes','no')    

main['delivery country'] = main['delivery country'].str.strip()
d = {'falkland islands (malvinas)':'falkland islands',
    'macedonia, the former republic of':'macedonia',
    'ontario, canada':'canada',
    'great britain':'united kingdom',
    'virgin islands (u.s.)':'virgin islands'}
for k,v in d.items():
    main['delivery country'].replace(k,v,inplace = True)
    
#==========================================================================================
# UPS Data
#==========================================================================================    
    
ddp_ls = ['Agri Processing',
          'Broker Fee',
          'Brokerage Fees',
          'Brokerage GST',
          'Ca British Columbia Pst',
          'Ca Customs Hst',
          'Complex Entry',
          'Customs Gst',
          'Customs Warehouse',
          'DGoods Air Inaccessible',
          'Duty Amount',
          'Pst Quebec',
          'QST']

ups_super_main = pd.read_sql(
'''
SELECT
orders_id,
charge_description,
netAmount AS net_amount
FROM ups_billing
WHERE orders_id IN
(SELECT
orders_id 
FROM orders
WHERE DATE(date_purchased) BETWEEN ' '''+ date_start +''' ' AND ' '''+ date_end +''' '
AND shipping_module_code = 'upsxml')
''', db)

col_fix(ups_super_main)

ups_super_main['type'] = np.where(ups_super_main['charge description'].isin(ddp_ls), 'ddp charge', 'service charge')
ups_main = ups_super_main.groupby(['orders id','type'])[['net amount']].sum().unstack(1).fillna(0)
ups_main.columns = ups_main.columns.droplevel(0)
ups_main['shipping charge'] = ups_main.sum(1)
ups_main.reset_index(inplace = True)
ups_main['shipping module code'] = 'ups'    

#==========================================================================================
# DHL data
#==========================================================================================    

dhl_for_audit = dhl_main[['orders id','service charge','ddp charge','shipping charge']].copy()
dhl_for_audit['shipping module code'] = 'dhl'

#==========================================================================================
# USPS data
#==========================================================================================    

usps_main = pd.read_sql(
'''
SELECT
orders_id,
sl_cost AS shipping_charge
FROM ship_log
WHERE orders_id IN
(SELECT
orders_id 
FROM orders
WHERE DATE(date_purchased) BETWEEN ' '''+ date_start +''' ' AND ' '''+ date_end +''' '
AND shipping_module_code = 'usps')
''', db)

col_fix(usps_main)

usps_main['ddp charge'] = 0
usps_main['service charge'] = usps_main['shipping charge']
usps_main['shipping module code'] = 'usps'

#==========================================================================================
# Structure it all
#==========================================================================================    

shipping_main = pd.concat([ups_main, dhl_for_audit, usps_main], sort = True)

ls = ['service charge','ddp charge','shipping charge']
for col in ls:
    main[col] = main['orders id'].map(dict(zip(shipping_main['orders id'], shipping_main[col])))
    
main['shipping profit'] = main['shipping revenue'] - main['shipping charge']    
main['profit loss'] = np.where(main['shipping profit'] < 0, 'yes', 'no')
        
main['adabox'] = np.where(main['orders id'].isin(adabox_oids), 'yes', 'no')    
        
#==========================================================================================
# Missing or removed orders
#==========================================================================================    

thirty_five_days_ago = str((datetime.datetime.now() - pd.DateOffset(days = 35)).date())

dhl_remove = main[(main['shipping module code'] == 'dhlexpress') # if it's DHL
                & (main['date purchased'] >= thirty_five_days_ago) # and purchased within the last 35 days
                & (main['ddp revenue'] > 0) # and we charged DDP
                & (main['ddp charge'] == 0)].copy() # but the DDP bill from DHL has not yet come

ls1 = list(o_main['orders id'])
ls2 = ['upsxml','dhlexpress','usps']
ls3 = list(shipping_main['orders id'])
ls4 = list(dhl_remove['orders id'])
main['missing'] = np.where((main['orders id'].isin(ls1)) # if an OID is in orders table
                        & (main['shipping module code'].isin(ls2)) # and is a UPS/DHL/USPS order
                        & ((~main['orders id'].isin(ls3)) | main['orders id'].isin(ls4)), # and is not in shipping_main OR is in dhl_remove
                          'yes','no')

# now, where shipping module code NOT IN (ups, usps, dhl), the shipping rev, profit, etc. will be null
# but these are not missing orders, they just don't have any shipping cost associated with them
# so fill these nulls with zero
ls = ['service charge','ddp charge','shipping charge','shipping profit']

for col in ls:
    main[col] = np.where(~main['shipping module code'].isin(['ups','dhl','usps']), 0, main[col])
    
#==========================================================================================
# Final touches
#==========================================================================================    

for col in main.columns:
    if main[col].dtype == 'object':
        main[col] = [x.title() for x in main[col]]
        
d = {'Upsxml':'UPS',
    'Usps':'USPS',
    'Dhlexpress':'DHL',
    'Free':'Gift Certificate/Software',
    '':'Employee/Other',
    'Resellershipping':'Reseller Shipping',
    '----- No Shipping Selected -----':'Employee/Other'}

for k,v in d.items():
    main['shipping module code'] = main['shipping module code'].replace(k,v)                
    
main.columns = [x.title() for x in main.columns]

#==========================================================================================
# Check totals
#==========================================================================================    

c = pd.read_sql(
'''
SELECT
COUNT(*) AS count
FROM orders 
WHERE orders_status NOT IN (8,9,14,15) #fraud pending/confirmed, voided, fraud-voided
AND DATE(date_purchased) BETWEEN ' '''+ date_start +''' ' AND ' '''+ date_end +''' '
''', db)

c1 = c['count'].values[0]
c2 = len(set(main['orders id']))

if c1 == c2:
    print('match')
else:
    print('mismatch')