In [229]:
import sys
sys.path.insert(0,'/Users/jarad')

import pandas as pd
import numpy as np
from db2 import *

import datetime as dt
import calendar

from scipy import stats

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 100)

import matplotlib.pyplot as plt
%matplotlib inline

# PLM GitHub
* From [Daily Stats: Customer Types](https://volcano.adafruit.com/volcano/ada_plm.php#/Stats/Sales?focus=Customer%20Types)
* GitHub [here](https://github.com/AdafruitInternalDev/shop/blob/1a30b1e0a95812f46990721c46fe0a89dbf469c3/crons/daily_stats_cron.php): line 50: d_customer

In [230]:
git = pd.read_sql(
'''
SELECT
MONTH(o.date_purchased) AS month,
SUM(ot.value) AS git,
IF(o.orders_reseller = 0 AND o.orders_super_reseller = 0, 'normal', 
IF(o.orders_reseller = 1, 'reseller', 'super reseller')) AS variant
FROM orders o, orders_total ot
WHERE ot.class ='ot_subtotal'
AND o.orders_id = ot.orders_id 
AND o.payment_method != 'Replacement Order'
AND o.orders_status != 9
AND o.orders_status != 10
AND o.date_purchased BETWEEN '2017-01-01'AND '2017-08-31'
GROUP BY month, variant
''', db)

git = git[git['variant'] == 'normal']
git.set_index('month', inplace = True)
git.drop('variant',1,inplace = True)

# OT

In [231]:
ot = pd.read_sql(
'''
SELECT
MONTH(o.date_purchased) AS month,
SUM(ot.value) AS ot
FROM orders o
JOIN orders_total ot ON o.orders_id = ot.orders_id
WHERE DATE(o.date_purchased) BETWEEN '2017-01-01' AND '2017-08-31'
AND o.orders_status != 9
AND o.orders_status != 10
AND o.orders_status != 11
AND o.payment_method != 'Replacement Order'
AND o.orders_reseller = 0
AND o.orders_super_reseller = 0
AND ot.class = 'ot_subtotal'
GROUP BY month
''', db)

ot.set_index('month', inplace = True)

# Just fake

In [232]:
fake = pd.read_sql(
'''
SELECT
MONTH(o.date_purchased) AS month,
SUM((op.products_quantity - op.products_quantity_free) * op.products_price) AS fake
FROM orders_products op
JOIN orders o ON op.orders_id = o.orders_id
WHERE op.part_id NOT IN (1205,2481,2684,2904,2931,2877,2893,2905,2929,3126,3075,3358,3359,3360,2907,2908,3074)
AND DATE(o.date_purchased) BETWEEN '2017-01-01' AND '2017-08-31'
AND o.orders_status != 9
AND o.orders_status != 10
AND o.orders_status != 11
AND o.payment_method != 'Replacement Order'
AND o.orders_reseller = 0
AND o.orders_super_reseller = 0
AND orders_fake = 1
AND LOWER(orders_po_number) IN ('','n/a')
GROUP BY month
''', db)

fake.set_index('month', inplace = True)

# Just POs

In [233]:
po = pd.read_sql(
'''
SELECT
MONTH(o.date_purchased) AS month,
SUM((op.products_quantity - op.products_quantity_free) * op.products_price) AS po
FROM orders_products op
JOIN orders o ON op.orders_id = o.orders_id
WHERE op.part_id NOT IN (1205,2481,2684,2904,2931,2877,2893,2905,2929,3126,3075,3358,3359,3360,2907,2908,3074)
AND DATE(o.date_purchased) BETWEEN '2017-01-01' AND '2017-08-31'
AND o.orders_status != 9
AND o.orders_status != 10
AND o.orders_status != 11
AND o.payment_method != 'Replacement Order'
AND o.orders_reseller = 0
AND o.orders_super_reseller = 0
AND orders_fake = 0
AND LOWER(orders_po_number) NOT IN ('','n/a')
GROUP BY month
''', db)

po.set_index('month', inplace = True)

# Fake orders and POs

In [234]:
fake_and_po = pd.read_sql(
'''
SELECT
MONTH(o.date_purchased) AS month,
SUM((op.products_quantity - op.products_quantity_free) * op.products_price) AS fake_and_po
FROM orders_products op
JOIN orders o ON op.orders_id = o.orders_id
WHERE op.part_id NOT IN (1205,2481,2684,2904,2931,2877,2893,2905,2929,3126,3075,3358,3359,3360,2907,2908,3074)
AND DATE(o.date_purchased) BETWEEN '2017-01-01' AND '2017-08-31'
AND o.orders_status != 9
AND o.orders_status != 10
AND o.orders_status != 11
AND o.payment_method != 'Replacement Order'
AND o.orders_reseller = 0
AND o.orders_super_reseller = 0
AND orders_fake = 1
AND LOWER(orders_po_number) NOT IN ('','n/a')
GROUP BY month
''', db)

fake_and_po.set_index('month', inplace = True)

# Neither Fake nor POs

In [235]:
no_fake_no_po = pd.read_sql(
'''
SELECT
MONTH(o.date_purchased) AS month,
SUM((op.products_quantity - op.products_quantity_free) * op.products_price) AS no_fake_no_po
FROM orders_products op
JOIN orders o ON op.orders_id = o.orders_id
WHERE op.part_id NOT IN (1205,2481,2684,2904,2931,2877,2893,2905,2929,3126,3075,3358,3359,3360,2907,2908,3074)
AND DATE(o.date_purchased) BETWEEN '2017-01-01' AND '2017-08-31'
AND o.orders_status != 9
AND o.orders_status != 10
AND o.orders_status != 11
AND o.payment_method != 'Replacement Order'
AND o.orders_reseller = 0
AND o.orders_super_reseller = 0
AND o.orders_fake = 0
AND LOWER(o.orders_po_number) IN ('','n/a')
GROUP BY month
''', db)

no_fake_no_po.set_index('month', inplace = True)

In [236]:
together = pd.merge(no_fake_no_po, fake, left_index = True, right_index = True)
together = pd.merge(together, po, left_index = True, right_index = True, copy = False)
together = pd.merge(together, fake_and_po, left_index = True, right_index = True, copy = False)
together = pd.merge(together, ot, left_index = True, right_index = True, copy = False)
together = pd.merge(together, git, left_index = True, right_index = True, copy = False)

together['diff_from_git'] = together['git'] - together[['no_fake_no_po','fake','po','fake_and_po']].sum(1)
together['diff_from_ot'] = together['ot'] - together[['no_fake_no_po','fake','po','fake_and_po']].sum(1)

together = together[['no_fake_no_po','fake','po','fake_and_po','git','diff_from_git','ot','diff_from_ot']]

In [237]:
together_format = together.copy()
together_format['no_fake_no_po'] = ['${:,.0f}'.format(x) for x in together_format['no_fake_no_po']]
together_format['fake'] = ['${:,.0f}'.format(x) for x in together_format['fake']]
together_format['po'] = ['${:,.0f}'.format(x) for x in together_format['po']]
together_format['fake_and_po'] = ['${:,.0f}'.format(x) for x in together_format['fake_and_po']]
together_format['git'] = ['${:,.0f}'.format(x) for x in together_format['git']]
together_format['diff_from_git'] = ['${:,.0f}'.format(x) for x in together_format['diff_from_git']]
together_format['ot'] = ['${:,.0f}'.format(x) for x in together_format['ot']]
together_format['diff_from_ot'] = ['${:,.0f}'.format(x) for x in together_format['diff_from_ot']]

In [238]:
together_format

Unnamed: 0_level_0,no_fake_no_po,fake,po,fake_and_po,git,diff_from_git,ot,diff_from_ot
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,"$2,078,796","$7,938","$130,890","$4,146","$2,224,378","$2,608","$2,221,770",$0
2,"$2,058,005","$2,381","$91,490","$16,348","$2,173,859","$5,635","$2,169,135",$911
3,"$2,431,587","$6,561","$102,429","$3,530","$2,545,133","$1,026","$2,544,107",$0
4,"$1,861,331","$7,548","$115,638","$10,616","$1,995,138",$5,"$1,995,138",$5
5,"$1,928,093","$2,632","$121,522","$16,532","$2,068,957",$178,"$2,068,957",$178
6,"$2,124,631","$20,470","$91,663","$29,255","$2,266,341",$323,"$2,266,341",$323
7,"$1,829,679","$4,297","$119,833","$6,756","$1,960,934",$369,"$1,960,934",$369
8,"$2,043,034",$190,"$79,419","$5,895","$2,027,533","$-101,005","$2,129,648","$1,110"


# Got versus OT

In [253]:
git_test = pd.read_sql(
'''
SELECT
o.orders_id,
ot.value AS git_test
FROM orders o, orders_total ot
WHERE ot.class ='ot_subtotal'
AND o.orders_id = ot.orders_id 
AND o.payment_method != 'Replacement Order'
AND o.orders_status != 9
AND o.orders_status != 10
AND o.date_purchased BETWEEN '2017-01-01'AND '2017-08-31'
AND o.orders_reseller = 0
AND o.orders_super_reseller = 0
''', db)

In [265]:
ot_test = pd.read_sql(
'''
SELECT
ot.orders_id,
ot.value AS ot_test
FROM orders o
JOIN orders_total ot ON o.orders_id = ot.orders_id
WHERE DATE(o.date_purchased) BETWEEN '2017-01-01' AND '2017-08-31'
AND o.orders_status != 9
AND o.orders_status != 10
AND o.orders_status != 11
AND o.payment_method != 'Replacement Order'
AND o.orders_reseller = 0
AND o.orders_super_reseller = 0
AND ot.class = 'ot_subtotal'
''', db)

In [266]:
print (len(git_test), len(ot_test))

197345 197272


In [267]:
len(ot_test[~ot_test['orders_id'].isin(git_test['orders_id'].tolist())].index)

0

In [268]:
pd.read_sql(
'''
SELECT
COUNT(orders_id)
FROM orders_total
WHERE orders_id IN '''+ str(tuple(ot_test['orders_id'][~ot_test['orders_id'].isin(git_test['orders_id'].tolist())])) +'''
AND class = 'ot_subtotal'
''', db)

ERROR:root:An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line string', (1, 0))



DatabaseError: Execution failed on sql '
SELECT
COUNT(orders_id)
FROM orders_total
WHERE orders_id IN ()
AND class = 'ot_subtotal'
': (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ')\nAND class = 'ot_subtotal'' at line 4")

In [None]:
ot_test[~ot_test['orders_id'].isin(git_test['orders_id'].tolist())].sort_values('date_purchased', ascending = False)