### Change this stuff

In [1]:
# do you want to run the script AND write the workbook? or just run the script?
# the script will run regardless of what you choose here
close_workbook = 'yes'

### Load libraries

In [2]:
import os
tilde = os.path.expanduser('~')

import sys
sys.path.insert(0, tilde + '/Scripts/Fake Folder/Python Libraries')

from jb_libraries import *
%matplotlib inline

### Get chemical data

In [3]:
chem_main = pd.read_sql(
'''
SELECT
sku_id,
CASE WHEN chemical = '' THEN 'no chemical listed'
WHEN chemical = 'led' THEN 'lead' ELSE chemical END AS 'chemical',
danger_type
FROM skus_to_chemicals
''', db)

col_fix(chem_main)

# change to lowercase
for col in chem_main.columns:
    try:
        chem_main[col] = chem_main[col].str.lower()
    except:
        pass

### Check out these columns

In [4]:
for col in chem_main.columns[-2:]:
    print(col.upper())
    display(pd.DataFrame(chem_main[col].value_counts()).format_(['n0']))
    print('\n')

CHEMICAL


Unnamed: 0,Chemical
lead,15177
di(2-ethylhexyl)phthalate (dehp),506
no chemical listed,419
chromium,240
styrene,228
di-isodecyl phthalate (didp),210
di(2-ethylhexyl)phthalate (dehp),168
di(2-ethylhexyl)phthalate-(dehp),162
1,99
4-dioxane,99




DANGER TYPE


Unnamed: 0,Danger Type
cancer,9255
birth defects,8261






### Get combos

In [5]:
pts_main = pd.read_sql(
'''
SELECT
0 AS sku_id,
k.sku_id AS contains_sku_id,
pts.part_id,
pts.contains_part_id,
k.sku_status
FROM products_to_stuff pts
LEFT JOIN skus k ON pts.contains_part_id = k.part_id
WHERE pts.part_id > 0
''', db)

col_fix(pts_main)

### Get skus (no combos)

In [6]:
skus_main = pd.read_sql(
'''
SELECT
sku_id,
0 AS contains_sku_id,
part_id,
0 AS contains_part_id,
sku_status
FROM skus
''', db)

col_fix(skus_main)

### Create a single dataframe

In [7]:
main = pd.concat([pts_main, skus_main.drop('part id', axis = 1)], sort = False)
main.reset_index(drop = True, inplace = True)

main['part id'].fillna(0, inplace = True)

for col in main.columns:
    try:
        main[col] = main[col].map(int)
    except:
        pass

### Map chemical data

In [8]:
# if the sku_id = 0, it's a combo, so use the contains_sku_id for the mapping
# if sku_id > 0, it's a part or sku, so use the sku_id for the mapping
main['for mapping'] = np.where(main['sku id'] == 0, main['contains sku id'], main['sku id'])

for col in ['chemical','danger type']:
    # map columns
    main[col] = main['for mapping'].map(dict(zip(chem_main['sku id'], chem_main[col])))
    
    # fill nulls
    main[col].fillna('no %s listed' % col, inplace = True)
    
main.drop('for mapping',1,inplace = True)
    
# reorder columns    
new_cols = ['sku id',
            'contains sku id',
            'part id',
            'contains part id',
            'chemical',
            'danger type',
            'sku status']

old_cols = main.columns.tolist()

s1 = set(new_cols)
s2 = set(old_cols)
s3 = s1.symmetric_difference(s2)

if len(s3) == 0:
    main = main[new_cols]
else:
    print(s3)
    raise ValueError('check ur columns')     

### Label parts or skus

In [9]:
def product_type(df):
    if df['part id'] == 0:
        return 'sku'
    else:
        if df['part id'] in pts_main['part id']:
            return 'combo'
        elif df['part id'] in skus_main['part id']:
            return 'part'
        else:
            return np.nan
    
main['product type'] = main.apply(product_type, axis = 1)

### Final null check

In [10]:
nulls = main[main.isnull().any(1)]
if nulls.empty == False:
    display(nulls.head())
    raise ValueError('check ur nulls')

### Send to Excel

In [14]:
# get the date right now
now = str(dt.datetime.now().date())

# set the path and workbook title
path = tilde + '/Scripts/Fake Folder/Accounts and Biz Dev/Ad Hoc/Prop 65 Info for Parts/CSVs/'
t = 'Prop 65 Info for Parts as of %s' % now

# create the workbook
workbook = xlsxwriter.Workbook(path + t + '.xlsx')

# set colors
colors = ['#343635',
          '#2e4874',
          '#7eaba4',
          '#928c85',
          '#347c83',
          '#bfb9d6']

# create formats
title = workbook.add_format({'font_size':25,
                             'font_name':'Arial (Bold)'})

subtitle = workbook.add_format({'font_size':15,
                                'font_name':'Arial (Bold)'})

col_names = workbook.add_format({'font_name':'Arial (Bold)',
                                 'font_color':'white',
                                 'valign':'vcenter',
                                 'align':'center',
                                 'bg_color':colors[2],
                                 'bottom':1,
                                 'top':1,
                                 'left':1,
                                 'right':1})

center = workbook.add_format({'valign':'vcenter',
                             'align':'center'})

# create worksheet
sht = workbook.add_worksheet('data')

# write sheet header
sht.write(0, 0, t, title)
sht.write(1, 0, 'As of %s' % now, subtitle)
sht.write(2,0, '"chemical" and "danger type" labels are assigned by sku, so if sku_id = 0, they use contains_sku_id; if sku_id > 0, they use sku_id.')

# write data
start_row = 4
start_col = 0

df = main.copy()

for i in range(len(df.columns)):
    
    # write column headers
    sht.write(start_row,
              start_col + i,
              df.columns[i].title(),
              col_names)
    
    # set cell width
    len1 = [len(str(x)) for x in df.iloc[:, i]]
    len2 = [len(df.columns[i])]
    len3 = np.max(len1 + len2)

    sht.set_column(start_col + i,
                   start_col + i,
                   len3 + 5)
    
    # write rows
    for j in range(len(df)):
        sht.write(start_row + 1 + j,
                 start_col + i,
                 df.iloc[j,i],
                 center)
        
# add a filter
sht.autofilter(start_row,
               start_col,
               start_row + len(df)-1,
               start_col + len(df.columns)-1)

if close_workbook == 'yes':
    workbook.close()

In [15]:
print('done')

done
