In [51]:
import pandas as pd, numpy as np

In [2]:
# Initial Import to get Databricks to start

from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

from IPython.core.magic import line_magic, line_cell_magic, Magics, magics_class

@magics_class
class DatabricksConnectMagics(Magics):

    @line_cell_magic
    def sql(self, line, cell=None):
        if cell and line:
            raise ValueError("Line must be empty for cell magic", line)
        try:
            from autovizwidget.widget.utils import display_dataframe
        except ImportError:
            print("Please run `pip install autovizwidget` to enable the visualization widget.")
            display_dataframe = lambda x: x
        return display_dataframe(self.get_spark().sql(cell or line).toPandas())

    def get_spark(self):
        user_ns = get_ipython().user_ns
        if "spark" in user_ns:
            return user_ns["spark"]
        else:
            from pyspark.sql import SparkSession
            user_ns["spark"] = SparkSession.builder.getOrCreate()
            return user_ns["spark"]

ip = get_ipython()
ip.register_magics(DatabricksConnectMagics)

# Data Investigation

| Hive Table Name | Meaning | Key Fields | Issues |
| --------------- | ------- | ---------- | ------ |
| db_enriched.r_shk_rtl_sales_2016 | 2016 Retail Sales | Not using for this analysis | N/A |
| db_enriched.r_shk_rtl_sales_2017 | 2017 Retail Sales | Not using for this analysis | N/A |
| db_enriched.t_shk_rtl_sales_2017 | 2017 Retail Sales | Not using for this analysis | Appears to be a subset of the above<br>Only 25% of the stores and only from pd5 - pd13 |
| db_enriched.t_shk_rtl_sales_2018 | 2018 Retail Sales at dept_section level by store | p2018_01 - p2018_08 | Only 498 stores<br>Only 2 Divisions<br>Only pd01 - pd08 |
| db_enriched.t_shk_rtl_inv | Total Amount of Inventory sold at dept_section level by store | rtl_inv_amt | Only 678 stores<br>Only 2 Divisions<br>Only goes to p2018_08 |
| db_enriched.t_shk_inv_turns_dept_section | Inv Turns, Inv sales, and total sales by dept_section by store | retail_sales_amt, rtl_inv_amt, inv_turn | Only 646 Stores<br>Only 2 Divisions<br>Only up to p2018_08 |
| db_enriched.t_shk_rtl_sales | Not sure how this differs from other sales data | retail_sales_amt, which we already have in other tables | Only 636 Stores<br>Only 2 Divisions<br>Only up to p2018_08 |
| db_enriched.t_shk_rtl_inv_2017_2018 | Inv sales, already have this data in "db_enriched.t_shk_inv_turns_dept_section" I think | p2018_01 - p2018_08 | Only 498 Stores<br>Only 2 Divisions<br>Only up to p2018_08 |
| db_enriched.r_shk_rtl_inv_2016 | 2016 inventories | Not using for this analysis | N/A |
| db_enriched.r_shk_rtl_inv_2017 | 2017 inventories | Not using for this analysis | N/A |
| db_enriched.lu_store_finance_om | This table has TONS of great information regarding store location, size, age, etc. | Too many to list | None |
| db_enriched.lu_dept_section | Links dept_section_name to ID | dept_section_id, dept_section_nm | None |
| db_enriched.item_distress_agg | CAUTION: >900M ROWS. Every recorded distress at a given store. Still >80M rows in 2018 alone | net_amt, item_qty | None |
| db_enriched.store_item_rcvry_cntr_per | CAUTION: 86M ROWS. 9M in 2018 only | In Progress | None Yet |
| db_enriched.F_RTL_OPS_WK_DEPT | CAUTION: 9M ROWS. 1.7M Rows for 2018 | In Progress | None Yet |
| db_enriched.F_RTL_OPS_WK_FAC | Need to Investigate More | In Progress | None Yet |
| db_enriched.F_RTL_OPS_WK_sect | t | TODO | TODO |
| db_enriched.rpt_paxar_cat_wave_index | CAUTION: 143M ROWS. 94M Rows in 2018. | TODO | TODO |
| db_enriched.rpt_paxar_upc_wave_index | CAUTION: 25M ROWS. 16M Rows in 2018 | TODO | TODO |
| db_landing.by_store_2yr_sd_tenure | t | TODO | TODO |
| db_landing.by_store_by_period_garn | t | TODO | TODO |
| db_landing.by_store_by_period_pf_ft | t | TODO | TODO |
| db_landing.by_store_by_period_turnover | t | TODO | TODO |
| db_enriched.chkr_shrink_wkly_a | t | TODO | TODO |
| db_enriched.facility_shrink_wkly_a | t | TODO | TODO |
| db_enriched.fsm_transaction_sub_type | t | TODO | TODO |
| db_enriched.transaction_type | t | TODO | TODO |
| db_enriched.store_demos | t | TODO | TODO |
| shrink_customer_service | t | TODO | TODO |
| db_enriched.bdr_purchase_item | t | TODO | TODO |

In [3]:
def read_table(tbl_name, to_pandas=True, db='enriched'):
    df = spark.read.table('db_{}.{}'.format(db, tbl_name))
    return df.toPandas() if to_pandas else df

In [4]:
def num_stores(df):
    return df['store_id'].nunique()

def num_divs(df):
    return df['division_id'].nunique()

def df_stats(df):
    print('Number of Stores: {}\nNumber of Divisions: {}'.format(num_stores(df), num_divs(df)))

In [5]:
def to_num(df, col_name, downcast='float'):
    df[col_name] = pd.to_numeric(df[col_name], downcast=downcast)
    return df

In [6]:
def disp_all(df):
    with pd.option_context('max_columns', 1000):
        display(df)

## 2016 Retail Sales - NOT USING

## 2017 Retail Sales - NOT USING

## 2017 Retail Sales (subset) - NOT USING

## 2018 Retail Sales

In [77]:
df = read_table('t_shk_rtl_sales_2018')

In [83]:
num_stores(df), num_divs(df)

(498, 2)

In [78]:
df.head()

Unnamed: 0,store_id,comb_store_id,department_id,dept_section_id,p2018_01,p2018_02,p2018_03,p2018_04,p2018_05,p2018_06,p2018_07,p2018_08,batch_id,division_id
0,1711,1711,301,335,195699.23,198832.65,201044.8,203114.58,197601.19,193427.74,200603.07,206622.03,20190718004951,25
1,975,975,333,334,7935.91,7108.29,8247.62,9005.86,9171.68,8804.49,8968.44,7974.42,20190718004951,25
2,1293,1293,336,336,52843.7,52008.84,51188.91,51208.16,49614.75,49338.4,48908.66,46938.57,20190718004951,25
3,2409,2409,301,307,3202.77,4125.43,1697.55,5479.04,6137.2,6558.86,6213.52,6302.43,20190718004951,25
4,1205,1205,311,312,115677.48,68613.26,70421.36,65151.21,52689.81,53573.8,37621.72,67058.01,20190718004951,25


In [79]:
df.sort_values(['store_id', 'department_id'])

Unnamed: 0,store_id,comb_store_id,department_id,dept_section_id,p2018_01,p2018_02,p2018_03,p2018_04,p2018_05,p2018_06,p2018_07,p2018_08,batch_id,division_id
1018,111,111,301,335,41777.10,41799.84,41509.66,39864.61,44840.30,41089.74,40019.32,41070.16,20190718004951,25
1137,111,111,301,313,59532.47,67599.15,74185.69,74172.40,97159.54,96135.37,85714.08,74628.44,20190718004951,25
1160,111,111,301,327,31214.73,28548.51,29503.92,25137.48,28895.99,28846.73,28529.88,30626.50,20190718004951,25
1742,111,111,301,318,21655.53,23036.31,19284.38,14502.99,16313.28,16175.52,16182.03,19360.67,20190718004951,25
2423,111,111,301,302,99906.79,92578.49,84122.58,67239.05,82838.99,79649.06,84539.61,94963.62,20190718004951,25
2478,111,111,301,301,99196.35,95517.22,92104.43,78480.29,87537.88,89781.71,92853.04,95141.07,20190718004951,25
2483,111,111,301,338,57912.80,56203.89,57358.82,56157.07,61828.29,58313.73,60357.63,56879.14,20190718004951,25
2616,111,111,301,307,3994.51,3868.84,3662.35,4608.01,4708.97,4696.71,4622.44,5151.12,20190718004951,25
1610,111,111,303,308,67910.20,67285.67,68575.31,65299.31,72664.15,68418.78,69070.70,72324.32,20190718004951,25
1701,111,111,303,303,59285.36,61025.51,63974.61,81185.08,72184.40,68919.47,66858.09,65152.11,20190718004951,25


In [81]:
df.store_id.nunique(), df.division_id.nunique()

(498, 2)

## Inventory

In [101]:
df = read_table('t_shk_rtl_inv')
df['rtl_inv_amt'] = pd.to_numeric(df['rtl_inv_amt'], downcast='float')
df_stats(df)

Number of Stores: 678
Number of Divisions: 2


In [96]:
df.dtypes

division_id          int32
comb_store_id        int32
department_id        int32
dept_section_id      int32
period_id            int32
rtl_inv_amt        float32
batch_id             int64
store_id             int32
dtype: object

In [97]:
df.sort_values(['store_id', 'period_id', 'department_id', 'dept_section_id'])

Unnamed: 0,division_id,comb_store_id,department_id,dept_section_id,period_id,rtl_inv_amt,batch_id,store_id
524527,29,108,301,301,201601,0.0,20190718005715,108
524424,29,108,303,303,201601,0.0,20190718005715,108
524887,29,108,304,304,201601,0.0,20190718005715,108
524693,29,108,309,309,201601,0.0,20190718005715,108
524839,29,108,311,311,201601,0.0,20190718005715,108
524918,29,108,314,314,201601,0.0,20190718005715,108
524936,29,108,315,315,201601,0.0,20190718005715,108
524580,29,108,316,316,201601,0.0,20190718005715,108
524571,29,108,317,317,201601,0.0,20190718005715,108
524736,29,108,329,329,201601,0.0,20190718005715,108


In [102]:
df = df.pivot_table('rtl_inv_amt', ['store_id', 'department_id', 'dept_section_id'], 'period_id')

In [103]:
df.columns

Int64Index([201601, 201602, 201603, 201604, 201605, 201606, 201607, 201608,
            201609, 201610, 201611, 201612, 201613, 201701, 201702, 201703,
            201704, 201705, 201706, 201707, 201708, 201709, 201710, 201711,
            201712, 201713, 201801, 201802, 201803, 201804, 201805, 201806,
            201807, 201808],
           dtype='int64', name='period_id')

In [108]:
df = df.drop(columns = [col for col in df.columns if str(col)[0:4] != '2018']).reset_index()

In [109]:
df

period_id,store_id,department_id,dept_section_id,201801,201802,201803,201804,201805,201806,201807,201808
0,108,301,301,-244306.000000,-225505.000000,-215893.000000,-220777.000000,-209291.000000,-215925.000000,-210779.000000,-201926.000000
1,108,301,302,-318572.000000,-304130.000000,-300808.000000,-291773.000000,-295685.000000,-283595.000000,-284081.000000,-290271.000000
2,108,301,313,-109788.000000,-112395.000000,-120011.000000,-105697.000000,-123099.000000,-125551.000000,-127972.000000,-118776.000000
3,108,301,318,-100012.000000,-80330.000000,-64847.000000,-78263.000000,-72465.000000,-73043.000000,-111285.000000,-105061.000000
4,108,301,327,-66412.000000,-65705.000000,-65763.000000,-65323.000000,-64733.000000,-63955.000000,-57096.000000,-55156.000000
5,108,301,335,-87827.000000,-80251.000000,-75525.000000,-70216.000000,-75822.000000,-79299.000000,-83162.000000,-78418.000000
6,108,301,338,-95122.000000,-93787.000000,-90751.000000,-84301.000000,-92345.000000,-100435.000000,-99198.000000,-93352.000000
7,108,303,303,-119056.000000,-133681.000000,-119737.000000,-124712.000000,-137656.000000,-135025.000000,-123317.000000,-110373.000000
8,108,303,305,-64908.000000,-72719.000000,-75050.000000,-71442.000000,-80019.000000,-83093.000000,-80863.000000,-71477.000000
9,108,303,308,-102933.000000,-100491.000000,-101954.000000,-106207.000000,-123096.000000,-122656.000000,-158429.000000,-148368.000000


## Inv Turns

In [127]:
df = read_table('t_shk_inv_turns_dept_section')

In [128]:
df.dtypes

store_id             int32
comb_store_id        int32
department_id        int32
dept_section_id      int32
period_id            int32
retail_sales_amt    object
rtl_inv_amt         object
inv_turn            object
batch_id             int64
division_id          int32
dtype: object

In [129]:
for col in ['retail_sales_amt', 'rtl_inv_amt', 'inv_turn']:
    df[col] = pd.to_numeric(df[col], downcast='float')

In [130]:
df.dtypes

store_id              int32
comb_store_id         int32
department_id         int32
dept_section_id       int32
period_id             int32
retail_sales_amt    float32
rtl_inv_amt         float32
inv_turn            float32
batch_id              int64
division_id           int32
dtype: object

In [131]:
df_stats(df)

Number of Stores: 646
Number of Divisions: 2


In [121]:
df

Unnamed: 0,store_id,comb_store_id,department_id,dept_section_id,period_id,retail_sales_amt,rtl_inv_amt,inv_turn,batch_id,division_id
0,708,708,303,305,201804,2.712772e+07,-59229.000000,5954.180176,20190718010257,29
1,1165,1165,303,305,201802,2.625145e+07,-60943.000000,5599.799805,20190718010257,29
2,1735,1735,307,307,201607,4.747000e+03,-9871.500000,6.250000,20190718010257,29
3,3076,3076,303,308,201603,1.126738e+05,-132076.500000,11.090000,20190718010257,29
4,2134,2134,330,330,201711,1.105180e+04,-24854.580078,5.780000,20190718010257,29
5,1394,1394,309,309,201612,1.149319e+05,0.000000,,20190718010257,29
6,1348,1348,301,302,201804,4.360657e+07,-234123.000000,2421.310059,20190718010257,29
7,2147,2147,330,330,201602,1.563590e+04,-37624.449219,5.400000,20190718010257,29
8,1962,1962,301,313,201606,1.178215e+05,-63679.000000,24.049999,20190718010257,29
9,2071,2071,317,317,201805,5.472740e+07,-93465.500000,7611.970215,20190718010257,29


In [132]:
inv_turn_df = df.pivot_table('inv_turn', ['store_id', 'department_id', 'dept_section_id'], 'period_id')
sales_df = df.pivot_table('retail_sales_amt', ['store_id', 'department_id', 'dept_section_id'], 'period_id')
inv_amt_df = df.pivot_table('rtl_inv_amt', ['store_id', 'department_id', 'dept_section_id'], 'period_id')

In [139]:
inv_turn_df = inv_turn_df.drop(columns = [col for col in df.columns if str(col)[0:4] != '2018']).reset_index()
sales_df = sales_df.drop(columns = [col for col in df.columns if str(col)[0:4] != '2018']).reset_index()
inv_amt_df = inv_amt_df.drop(columns = [col for col in df.columns if str(col)[0:4] != '2018']).reset_index()

## t_shk_rtl_sales

In [141]:
df = read_table('t_shk_rtl_sales')

In [142]:
df.dtypes

division_id          int32
comb_store_id        int32
department_id        int32
dept_section_id      int32
period_id            int32
retail_sales_amt    object
batch_id             int64
store_id             int32
dtype: object

In [145]:
df = to_num(df, 'retail_sales_amt')

In [147]:
df.dtypes

division_id           int32
comb_store_id         int32
department_id         int32
dept_section_id       int32
period_id             int32
retail_sales_amt    float32
batch_id              int64
store_id              int32
dtype: object

In [148]:
df.head()

Unnamed: 0,division_id,comb_store_id,department_id,dept_section_id,period_id,retail_sales_amt,batch_id,store_id
0,25,2697,314,323,201609,57179.75,20190718011027,2697
1,25,2697,304,304,201806,61536200.0,20190718011027,2697
2,25,2697,333,334,201612,22026.15,20190718011027,2697
3,25,2697,301,302,201702,211162.5,20190718011027,2697
4,25,2697,301,318,201604,59337.05,20190718011027,2697


In [150]:
df = df[df.period_id > 201800]

In [152]:
df_stats(df)

Number of Stores: 636
Number of Divisions: 2


In [153]:
df.head()

Unnamed: 0,division_id,comb_store_id,department_id,dept_section_id,period_id,retail_sales_amt,batch_id,store_id
1,25,2697,304,304,201806,61536200.0,20190718011027,2697
5,25,2697,311,312,201803,44261148.0,20190718011027,2697
8,25,2697,336,336,201802,23524832.0,20190718011027,2697
13,25,2697,330,330,201801,17631654.0,20190718011027,2697
15,25,2697,301,302,201802,74842248.0,20190718011027,2697


In [154]:
df.period_id.max()

201808

## t_shk_rtl_inv_2017_2018

In [155]:
df = read_table('t_shk_rtl_inv_2017_2018')

In [158]:
df.dtypes

store_id            int32
comb_store_id       int32
department_id       int32
dept_section_id     int32
p2017_07           object
p2017_08           object
p2017_09           object
p2017_10           object
p2017_11           object
p2017_12           object
p2017_13           object
p2018_01           object
p2018_02           object
p2018_03           object
p2018_04           object
p2018_05           object
p2018_06           object
p2018_07           object
p2018_08           object
batch_id            int64
division_id         int32
dtype: object

In [157]:
df.head()

Unnamed: 0,store_id,comb_store_id,department_id,dept_section_id,p2017_07,p2017_08,p2017_09,p2017_10,p2017_11,p2017_12,...,p2018_01,p2018_02,p2018_03,p2018_04,p2018_05,p2018_06,p2018_07,p2018_08,batch_id,division_id
0,975,975,301,318,-101334.0,-100132.0,-87501.0,-61418.0,-75681.0,-77361.0,...,-64169.0,-54407.0,-78401.0,-51950.0,-46985.0,-76418.0,-72887.46,-72887.46,20190718011652,25
1,909,909,309,309,-42883.72,-40782.51,-44952.76,-48254.91,-42983.76,-45692.86,...,-39753.15,-39815.93,-38306.89,-40038.82,-41053.26,-41992.35,-42291.5,-42291.5,20190718011652,25
2,2747,2747,301,302,-353555.0,-346343.0,-416499.0,-424100.0,-448606.0,-399288.0,...,-401791.0,-397859.0,-375501.0,-383879.0,-369250.0,-347195.0,-389518.69,-389518.69,20190718011652,25
3,3092,3092,333,333,-143432.98,-125832.45,-160187.37,-215853.47,-171848.79,-162300.88,...,-180981.68,-170236.1,-189010.6,-217174.74,-167014.74,-135460.48,-167241.19,-167241.19,20190718011652,25
4,2409,2409,336,336,-10769.16,-11919.05,-11919.05,-11919.05,-14446.34,-12280.09,...,-11662.38,-10334.97,-12047.1,-9969.0,-11887.25,-12902.07,-11871.97,-11871.97,20190718011652,25


In [159]:
for col in [col for col in df.columns if "p201" in col]:
    df = to_num(df, col)

In [161]:
df.dtypes

store_id             int32
comb_store_id        int32
department_id        int32
dept_section_id      int32
p2017_07           float32
p2017_08           float32
p2017_09           float32
p2017_10           float32
p2017_11           float32
p2017_12           float32
p2017_13           float32
p2018_01           float32
p2018_02           float32
p2018_03           float32
p2018_04           float32
p2018_05           float32
p2018_06           float32
p2018_07           float32
p2018_08           float32
batch_id             int64
division_id          int32
dtype: object

In [162]:
df.drop(columns=[col for col in df.columns if 'p2017' in col], inplace = True)

In [164]:
df_stats(df)

Number of Stores: 498
Number of Divisions: 2


## lu_store_finance_om

In [213]:
df = read_table('lu_store_finance_om')

In [214]:
df = df[df.columns[df.nunique() > 1]]

In [215]:
date_cols = [col for col in df.columns if "_dt" in col]

In [216]:
disp_all(df)

Unnamed: 0,store_id,store_cd,store_nm,company_id,company_nm,region_id,region_cd,region_nm,parent_op_area_id,parent_op_area_cd,parent_op_area_nm,op_area_finance_id,op_area_finance_cd,op_area_finance_nm,district_finance_id,district_finance_cd,district_finance_nm,store_addr_line1_txt,store_addr_line2_txt,store_city_nm,store_zip5_id,store_state_id,store_country_id,opened_dt,closed_dt,start_eff_dt,end_eff_dt,total_building_size_amt,total_selling_area_amt,store_type_cd,str_ord_start_dt,str_ord_end_dt,tmp_no_str_ord_start_dt,tmp_no_str_ord_end_dt,subsidiary_cd,banner_id,banner_nm,prm_banner_id,prm_banner_nm,convert_dt,parent_store_id,parent_override_ind,dw_create_ts,dw_create_user_id,dw_last_updt_ts,dw_batch_id,district_cd,district_nm,op_area_id,op_area_nm,division_id,division_nm,rog_cd,store_voice_phone_nbr,last_remodel_dt,store_time_zone_cd,dnd_ind,never_dst_ind,store_sub_type_cd,non_lem_ind,store_status_dt,store_status_id,hours_from_host_tm,store_fulfill_type_cd,rog_id
0,2983,2983,02983 FAC SCOTTSDALE AZ,1101,Safeway - U. S. Retail,2,EAST,EAST,8,17,Southwest,435,0016,Southwest ABS Retail Div #16,26674,101,101 METRO PHX,11475 EAST VIA LINDA,,Scottsdale,85259,AZ,US,1998-08-12,2015-05-26,,,58836,0,RT,,,,,ABS LLC,3.0,ALBERTSONS,2.0,Albertsons,,2983,N,2015-08-31 15:40:03,dwadmin,2018-03-21 22:17:23,7751,,,,,,,,,,,,,,,,,,,
1,4157,4157,04157 FAC STORE,1101,Safeway - U. S. Retail,1,WEST,WEST,12,25,Northern California,45,0025,Norcal SWY Retail Div #25,63,014,014 RENO / TAHOE,,,,,,,,1998-01-03,1964-06-24,1998-01-03,18068,11963,RT,,,,,SWY,0.0,,0.0,,,4157,N,2015-08-31 15:40:03,dwadmin,2019-08-19 08:22:41,8266,,,,,,,,,,,,,,,,,,,
2,1168,1168,01168 FAC LOS ANGELES CA,1101,Safeway - U. S. Retail,1,WEST,WEST,2,29,Southern California,441,0028,SoCal ABS Retail Div #28,89,048,048 LOS ANGELES,3443 SOUTH SEPULVEDA BLVD,,Los Angeles,90034,CA,US,1978-07-17,2015-04-28,,,38639,0,RT,,,,,ABS LLC,3.0,ALBERTSONS,2.0,Albertsons,,1168,N,2015-08-31 15:40:03,dwadmin,2018-02-07 02:25:22,7708,,,,,,,,,,,,,,,,,,,
3,3888,3888,03888 FAC PHOENIX AZ,1101,Safeway - U. S. Retail,2,EAST,EAST,8,17,Southwest,435,0016,Southwest ABS Retail Div #16,0,,,4747 EAST GREENWAY ROAD,,Phoenix,85032,AZ,US,1992-09-16,2014-02-08,,,0,0,,,,,,ABS LLC,,ALBERTSONS,,Albertsons,,3888,N,2015-10-06 11:03:12,DWADMIN,2017-07-21 04:35:41,7507,,,,,,,,,,,,,,,,,,,
4,586,0586,00586 FAC PORTLAND OR,1101,Safeway - U. S. Retail,1,WEST,WEST,4,19,Portland,436,0018,Portland ABS Retail Div #18,34,072,072 WEST PORTLAND/NORTH COAST,11070-C S W BARNES RD,,Portland,97225,OR,US,1995-06-21,2015-06-20,,,50000,0,RT,,,,,ABS LLC,3.0,ALBERTSONS,2.0,Albertsons,,586,N,2015-08-31 15:40:03,dwadmin,2018-02-07 02:25:22,7708,,,,,,,,,,,,,,,,,,,
5,685,0685,00685 FAC FRISCO TX,1101,Safeway - U. S. Retail,2,EAST,EAST,11,15,United,434,0015,United Retail Div #15,26690,556,556 DALLAS FT WORTH,4268 LEGACY DRIVE,,FRISCO,75034,TX,US,2018-01-26,9999-12-31,,,0,0,RT,,,,,ABS LLC,19.0,MARKET STREET,2.0,Albertsons,,685,N,2016-12-02 10:02:04,DWADMIN,2019-08-09 09:00:21,8256,,,,,,,,,,,,,,,,,,,
6,483,0483,00483 FAC STORE,1101,Safeway - U. S. Retail,1,WEST,WEST,14,27,Seattle,46,0027,Seattle SWY Retail Div #27,182,130,130 OLYMPIA,,,,,,,,1997-07-12,1972-10-19,1997-07-12,23246,16050,RT,,,,,SWY,0.0,,0.0,,,483,N,2015-08-31 15:40:03,dwadmin,2019-08-19 08:22:41,8266,,,,,,,,,,,,,,,,,,,
7,4117,4117,04117 FAC STORE,1101,Safeway - U. S. Retail,8,ACME/EASTE,ACME/EASTERN,6,35,Eastern,48,0035,Eastern NAI Retail Div #35,103,183,183 SUBURBAN MD,,,,,,,,1997-11-29,1983-08-03,1997-11-29,28510,22357,RT,,,,,NAI,0.0,,0.0,,,4117,N,2015-08-31 15:40:03,dwadmin,2019-08-19 08:22:41,8266,,,,,,,,,,,,,,,,,,,
8,3879,3879,03879 FAC PENDLETON OR,1101,Safeway - U. S. Retail,2,EAST,EAST,5,30,Intermountain,442,0030,Intermtn ABS Retail Div #30,0,,,1300 SW Court Ave,,Pendleton,97801,OR,US,1983-08-17,2014-02-16,,,0,0,,,,,,ABS LLC,3.0,ALBERTSONS,2.0,Albertsons,,3879,N,2015-10-06 11:03:12,DWADMIN,2018-09-17 13:49:08,7931,,,,,,,,,,,,,,,,,,,
9,924,0924,00924 FAC ALBUQUERQUE NM,1101,Safeway - U. S. Retail,2,EAST,EAST,11,15,United,434,0015,United Retail Div #15,26756,557,557 ALBUQUERQUE,8100 Ventura Street NE,,Albuquerque,87122,NM,US,2007-08-01,9999-12-31,1940-01-01,9999-12-31,50186,0,RT,2017-01-01,9999-12-31,9999-12-31,9999-12-31,ABS LLC,3.0,ALBERTSONS,2.0,Albertsons,,924,N,2015-08-31 15:40:03,dwadmin,2019-08-19 08:22:41,8266,,,,,,,,,,,,,,,,,,,


In [217]:
for col in date_cols:
    df[col] = pd.to_datetime(df[col], errors='coerce')

In [221]:
mask = df.closed_dt.isna()
df = df[mask]

In [223]:
df_stats(df)

Number of Stores: 2359
Number of Divisions: 14


In [177]:
disp_all(df)

Unnamed: 0,store_id,store_cd,store_nm,company_id,company_nm,region_id,region_cd,region_nm,parent_op_area_id,parent_op_area_cd,parent_op_area_nm,op_area_finance_id,op_area_finance_cd,op_area_finance_nm,district_finance_id,district_finance_cd,district_finance_nm,store_addr_line1_txt,store_addr_line2_txt,store_city_nm,store_zip5_id,store_state_id,store_country_id,opened_dt,closed_dt,start_eff_dt,end_eff_dt,total_building_size_amt,total_selling_area_amt,store_type_cd,str_ord_start_dt,str_ord_end_dt,tmp_no_str_ord_start_dt,tmp_no_str_ord_end_dt,subsidiary_cd,banner_id,banner_nm,prm_banner_id,prm_banner_nm,convert_dt,parent_store_id,parent_override_ind,dw_create_ts,dw_create_user_id,dw_last_updt_ts,dw_batch_id,district_cd,district_nm,op_area_id,op_area_nm,division_id,division_nm,rog_cd,store_voice_phone_nbr,last_remodel_dt,store_time_zone_cd,dnd_ind,never_dst_ind,store_sub_type_cd,non_lem_ind,store_status_dt,store_status_id,hours_from_host_tm,store_fulfill_type_cd,rog_id
0,2983,2983,02983 FAC SCOTTSDALE AZ,1101,Safeway - U. S. Retail,2,EAST,EAST,8,17,Southwest,435,0016,Southwest ABS Retail Div #16,26674,101,101 METRO PHX,11475 EAST VIA LINDA,,Scottsdale,85259,AZ,US,1998-08-12,2015-05-26,,,58836,0,RT,,,,,ABS LLC,3.0,ALBERTSONS,2.0,Albertsons,,2983,N,2015-08-31 15:40:03,dwadmin,2018-03-21 22:17:23,7751,,,,,,,,,,,,,,,,,,,
1,4157,4157,04157 FAC STORE,1101,Safeway - U. S. Retail,1,WEST,WEST,12,25,Northern California,45,0025,Norcal SWY Retail Div #25,63,014,014 RENO / TAHOE,,,,,,,,1998-01-03,1964-06-24,1998-01-03,18068,11963,RT,,,,,SWY,0.0,,0.0,,,4157,N,2015-08-31 15:40:03,dwadmin,2019-08-19 08:22:41,8266,,,,,,,,,,,,,,,,,,,
2,1168,1168,01168 FAC LOS ANGELES CA,1101,Safeway - U. S. Retail,1,WEST,WEST,2,29,Southern California,441,0028,SoCal ABS Retail Div #28,89,048,048 LOS ANGELES,3443 SOUTH SEPULVEDA BLVD,,Los Angeles,90034,CA,US,1978-07-17,2015-04-28,,,38639,0,RT,,,,,ABS LLC,3.0,ALBERTSONS,2.0,Albertsons,,1168,N,2015-08-31 15:40:03,dwadmin,2018-02-07 02:25:22,7708,,,,,,,,,,,,,,,,,,,
3,3888,3888,03888 FAC PHOENIX AZ,1101,Safeway - U. S. Retail,2,EAST,EAST,8,17,Southwest,435,0016,Southwest ABS Retail Div #16,0,,,4747 EAST GREENWAY ROAD,,Phoenix,85032,AZ,US,1992-09-16,2014-02-08,,,0,0,,,,,,ABS LLC,,ALBERTSONS,,Albertsons,,3888,N,2015-10-06 11:03:12,DWADMIN,2017-07-21 04:35:41,7507,,,,,,,,,,,,,,,,,,,
4,586,0586,00586 FAC PORTLAND OR,1101,Safeway - U. S. Retail,1,WEST,WEST,4,19,Portland,436,0018,Portland ABS Retail Div #18,34,072,072 WEST PORTLAND/NORTH COAST,11070-C S W BARNES RD,,Portland,97225,OR,US,1995-06-21,2015-06-20,,,50000,0,RT,,,,,ABS LLC,3.0,ALBERTSONS,2.0,Albertsons,,586,N,2015-08-31 15:40:03,dwadmin,2018-02-07 02:25:22,7708,,,,,,,,,,,,,,,,,,,
5,685,0685,00685 FAC FRISCO TX,1101,Safeway - U. S. Retail,2,EAST,EAST,11,15,United,434,0015,United Retail Div #15,26690,556,556 DALLAS FT WORTH,4268 LEGACY DRIVE,,FRISCO,75034,TX,US,2018-01-26,9999-12-31,,,0,0,RT,,,,,ABS LLC,19.0,MARKET STREET,2.0,Albertsons,,685,N,2016-12-02 10:02:04,DWADMIN,2019-08-09 09:00:21,8256,,,,,,,,,,,,,,,,,,,
6,483,0483,00483 FAC STORE,1101,Safeway - U. S. Retail,1,WEST,WEST,14,27,Seattle,46,0027,Seattle SWY Retail Div #27,182,130,130 OLYMPIA,,,,,,,,1997-07-12,1972-10-19,1997-07-12,23246,16050,RT,,,,,SWY,0.0,,0.0,,,483,N,2015-08-31 15:40:03,dwadmin,2019-08-19 08:22:41,8266,,,,,,,,,,,,,,,,,,,
7,4117,4117,04117 FAC STORE,1101,Safeway - U. S. Retail,8,ACME/EASTE,ACME/EASTERN,6,35,Eastern,48,0035,Eastern NAI Retail Div #35,103,183,183 SUBURBAN MD,,,,,,,,1997-11-29,1983-08-03,1997-11-29,28510,22357,RT,,,,,NAI,0.0,,0.0,,,4117,N,2015-08-31 15:40:03,dwadmin,2019-08-19 08:22:41,8266,,,,,,,,,,,,,,,,,,,
8,3879,3879,03879 FAC PENDLETON OR,1101,Safeway - U. S. Retail,2,EAST,EAST,5,30,Intermountain,442,0030,Intermtn ABS Retail Div #30,0,,,1300 SW Court Ave,,Pendleton,97801,OR,US,1983-08-17,2014-02-16,,,0,0,,,,,,ABS LLC,3.0,ALBERTSONS,2.0,Albertsons,,3879,N,2015-10-06 11:03:12,DWADMIN,2018-09-17 13:49:08,7931,,,,,,,,,,,,,,,,,,,
9,924,0924,00924 FAC ALBUQUERQUE NM,1101,Safeway - U. S. Retail,2,EAST,EAST,11,15,United,434,0015,United Retail Div #15,26756,557,557 ALBUQUERQUE,8100 Ventura Street NE,,Albuquerque,87122,NM,US,2007-08-01,9999-12-31,1940-01-01,9999-12-31,50186,0,RT,2017-01-01,9999-12-31,9999-12-31,9999-12-31,ABS LLC,3.0,ALBERTSONS,2.0,Albertsons,,924,N,2015-08-31 15:40:03,dwadmin,2019-08-19 08:22:41,8266,,,,,,,,,,,,,,,,,,,


In [178]:
df.company_nm.value_counts()

Safeway - U. S. Retail    3426
Safeway.com                  1
Name: company_nm, dtype: int64

## lu_dept_section

In [224]:
df = read_table('lu_dept_section')

In [225]:
df.head()

Unnamed: 0,dept_section_nm,department_id,feed_cd,last_update_ts,batch_id,dept_section_id
0,RETAIL SECTION 0,0,UPC,2003-02-04 17:12:01,20190716011137,0
1,GROCERY MAIN MEAL,301,UPC,2006-01-11 16:44:30,20190716011137,301
2,MEALS INGREDIENTS,301,UPC,2006-11-27 16:43:39,20190716011137,302
3,PHOTO AND VIDEO,311,UPC,2017-07-09 17:58:05,20190716011137,310
4,GENERAL MERCHANDISE,311,UPC,2003-02-04 17:12:01,20190716011137,311


In [226]:
df.dtypes

dept_section_nm            object
department_id               int32
feed_cd                    object
last_update_ts     datetime64[ns]
batch_id                    int64
dept_section_id             int32
dtype: object

## item_distress_agg

In [235]:
#df = read_table('item_distress_agg')
# TOO BIG FOR PANDAS

## store_item_rcvry_cntr_per

In [237]:
df = read_table('store_item_rcvry_cntr_per')

In [238]:
df.head()

Unnamed: 0,upc_id,period_id,prod_rcvry_cntr_type_cd,vend_nbr,wims_sub_vend_nbr,old_category_id,old_rtl_sect_id,prod_rcvry_cntr_item_qty,prod_rcvry_cntr_ext_cst,prod_rcvry_cntr_rtl_amt,...,prod_rcvry_cntr_remodel_ind,genrt_cic_id,rtl_only_credit_ind,feed_cd,batch_id,last_update_ts,input_facility_cd,unbillable_prod_rcvry_cntr_cst,batchid,store_id


In [239]:
df.dtypes

upc_id                            object
period_id                         object
prod_rcvry_cntr_type_cd           object
vend_nbr                          object
wims_sub_vend_nbr                 object
old_category_id                   object
old_rtl_sect_id                   object
prod_rcvry_cntr_item_qty          object
prod_rcvry_cntr_ext_cst           object
prod_rcvry_cntr_rtl_amt           object
prod_rcvry_cntr_reset_ind         object
prod_rcvry_cntr_udi_txt           object
prod_rcvry_cntr_remodel_ind       object
genrt_cic_id                      object
rtl_only_credit_ind               object
feed_cd                           object
batch_id                          object
last_update_ts                    object
input_facility_cd                 object
unbillable_prod_rcvry_cntr_cst    object
batchid                           object
store_id                          object
dtype: object

In [240]:
df.shape

(0, 22)

In [241]:
import sys
sys.getsizeof(df)

24

## F_RTL_OPS_WK_DEPT

In [28]:
df = spark.sql('SELECT * from db_enriched.F_RTL_OPS_WK_DEPT WHERE wk_id >= "201800" AND wk_id < "201900"')

In [30]:
gb = df.groupBy(['fac_sk', 'dept_nbr'])

In [32]:
ttl_sales_by_dept = gb.agg({'sls_amt' : 'sum'})

In [35]:
ttl_sales_by_dept = ttl_sales_by_dept.toPandas()

In [38]:
ttl_sales_by_dept.sort_values(['fac_sk', 'dept_nbr'])

Unnamed: 0,fac_sk,dept_nbr,sum(sls_amt)
2376,10,301,5857892.11
11176,10,303,1742576.99
13150,10,304,3105788.87
23877,10,306,511745.69
8529,10,309,526037.49
29397,10,311,1345481.86
20743,10,315,271978.50
5504,10,316,610784.81
14633,10,329,1745544.14
14625,10,330,240605.26


In [47]:
# Merge in department names
dept_key = spark.sql('SELECT * FROM db_enriched.lu_dept_section').toPandas()

In [48]:
dept_key.rename(columns={'department_id' : 'dept_nbr'}, inplace=True)

In [None]:
dept_key = dept_key[['']]

In [49]:
dept_key.sort_values('dept_nbr')

Unnamed: 0,dept_section_nm,dept_nbr,feed_cd,last_update_ts,batch_id,dept_section_id
0,RETAIL SECTION 0,0,UPC,2003-02-04 17:12:01,20190716011137,0
37,CANDY,301,UPC,2006-01-11 16:44:30,20190716011137,318
36,SNACKS,301,UPC,2006-01-11 16:44:30,20190716011137,338
15,HOME CARE GROCERY,301,UPC,2006-01-11 16:44:30,20190716011137,335
12,FAMILY CARE GROCERY,301,UPC,2006-01-11 16:44:30,20190716011137,327
21,SOFT DRINKS,301,UPC,2003-09-24 17:48:51,20190716011137,313
2,MEALS INGREDIENTS,301,UPC,2006-11-27 16:43:39,20190716011137,302
1,GROCERY MAIN MEAL,301,UPC,2006-01-11 16:44:30,20190716011137,301
39,WINE,303,UPC,2003-09-24 17:48:51,20190716011137,308
38,BEER,303,UPC,2003-09-24 17:48:51,20190716011137,305


In [50]:
ttl_sales_by_dept.merge(dept_key, how='left')

Unnamed: 0,fac_sk,dept_nbr,sum(sls_amt),dept_section_nm,feed_cd,last_update_ts,batch_id,dept_section_id
0,1059,306,527659.52,FOOD SERVICE,UPC,2003-09-24 17:48:51,2.019072e+13,306.0
1,1994,316,1288375.59,IN-STORE BAKERY,UPC,2003-02-04 17:12:01,2.019072e+13,316.0
2,3298,328,177449.98,COFFEE KIOSK,UPC,2008-07-09 17:29:33,2.019072e+13,328.0
3,3122,306,270963.98,FOOD SERVICE,UPC,2003-09-24 17:48:51,2.019072e+13,306.0
4,2529,328,0.00,COFFEE KIOSK,UPC,2008-07-09 17:29:33,2.019072e+13,328.0
5,1315,328,791195.01,COFFEE KIOSK,UPC,2008-07-09 17:29:33,2.019072e+13,328.0
6,2882,309,324690.94,DELICATESSEN,UPC,2003-09-24 17:48:51,2.019072e+13,309.0
7,332,349,0.00,,,NaT,,
8,1319,311,539862.82,PHOTO AND VIDEO,UPC,2017-07-09 17:58:05,2.019072e+13,310.0
9,1319,311,539862.82,GENERAL MERCHANDISE,UPC,2003-02-04 17:12:01,2.019072e+13,311.0


In [25]:
df.head()

Unnamed: 0,fac_sk,dept_nbr,wk_id,sls_amt,ly_sls_amt,sls_plan_amt,ly_sls_plan_amt,sls_prod_cnt,ly_sls_prod_cnt,sls_trips_cnt,...,plan_tot_shrink_amt,ly_plan_tot_shrink_amt,id_sls_ind,dw_crt_ts,dw_crt_usr_id,dw_lst_updt_ts,dw_lst_updt_usr_id,dw_src_sys_cd,batch_id,dw_batch_id


In [26]:
df.dtypes

fac_sk                    object
dept_nbr                  object
wk_id                     object
sls_amt                   object
ly_sls_amt                object
sls_plan_amt              object
ly_sls_plan_amt           object
sls_prod_cnt              object
ly_sls_prod_cnt           object
sls_trips_cnt             object
ly_sls_trips_cnt          object
wages_amt                 object
ly_wages_amt              object
plan_wages_amt            object
ly_plan_wages_amt         object
labor_manhrs_qty          object
ly_labor_manhrs_qty       object
plan_manhrs_qty           object
ly_plan_manhrs_qty        object
inv_cst_amt               object
ly_inv_cst_amt            object
bk_shrink_amt             object
ly_bk_shrink_amt          object
donate_amt                object
ly_donate_amt             object
tot_shrink_amt            object
ly_tot_shrink_amt         object
plan_tot_shrink_amt       object
ly_plan_tot_shrink_amt    object
id_sls_ind                object
dw_crt_ts 

In [27]:
df.shape

(0, 37)

## F_RTL_OPS_WK_FAC

In [7]:
df = spark.sql('SELECT * from db_enriched.F_RTL_OPS_WK_FAC WHERE wk_id >= "201800" AND wk_id < "201900"').toPandas()

In [8]:
df = df[df.columns[df.nunique() > 1]]

In [9]:
df.shape

(250251, 30)

In [10]:
df.dtypes

fac_sk                    object
fuel_rx_sk                 int32
wk_id                      int32
sls_amt                   object
ly_sls_amt                object
sls_plan_amt              object
ly_sls_plan_amt           object
sls_prod_cnt              object
ly_sls_prod_cnt           object
sls_trips_cnt             object
ly_sls_trips_cnt          object
wages_amt                 object
ly_wages_amt              object
plan_wages_amt            object
ly_plan_wages_amt         object
labor_manhrs_qty          object
ly_labor_manhrs_qty       object
plan_manhrs_qty           object
ly_plan_manhrs_qty        object
inv_cst_amt               object
ly_inv_cst_amt            object
bk_shrink_amt             object
ly_bk_shrink_amt          object
donate_amt                object
ly_donate_amt             object
tot_shrink_amt            object
ly_tot_shrink_amt         object
plan_tot_shrink_amt       object
ly_plan_tot_shrink_amt    object
id_ind                    object
dtype: obj

In [16]:
disp_all(df.head(15))

Unnamed: 0,fac_sk,fuel_rx_sk,wk_id,sls_amt,ly_sls_amt,sls_plan_amt,ly_sls_plan_amt,sls_prod_cnt,ly_sls_prod_cnt,sls_trips_cnt,ly_sls_trips_cnt,wages_amt,ly_wages_amt,plan_wages_amt,ly_plan_wages_amt,labor_manhrs_qty,ly_labor_manhrs_qty,plan_manhrs_qty,ly_plan_manhrs_qty,inv_cst_amt,ly_inv_cst_amt,bk_shrink_amt,ly_bk_shrink_amt,donate_amt,ly_donate_amt,tot_shrink_amt,ly_tot_shrink_amt,plan_tot_shrink_amt,ly_plan_tot_shrink_amt,id_ind
0,2169,2,201840,65355.199219,66653.507812,67209.257812,71160.679688,743,716,453,441,7595.189941,7855.540039,7622.109863,9016.459961,229.289993,280.519989,244.009995,249.339996,0.0,0.0,946.919983,-1159.689941,0.0,0.0,946.919983,-1159.689941,780.73999,-86.879997,1
1,1548,2,201820,45889.511719,49265.359375,48264.730469,66889.46875,692,762,386,436,9370.879883,5502.009766,6012.740234,7333.640137,220.559998,178.990005,155.070007,186.729996,0.0,0.0,-922.780029,1217.310059,0.0,0.0,-922.780029,1217.310059,-317.790009,5244.27002,1
2,1870,2,201822,34148.75,57690.910156,58331.320312,45525.378906,616,699,412,428,8512.009766,7144.879883,8666.790039,5481.52002,214.940002,179.679993,201.199997,127.419998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
3,73,3,201852,483067.59375,509514.53125,481012.09375,492251.375,138464,145915,17234,16921,53281.621094,49871.671875,47238.269531,52013.238281,3613.879883,3515.780029,3022.370117,3300.300049,0.0,0.0,-67542.75,-75293.453125,-2202.27002,-3673.439941,-69745.023438,-78966.890625,-64396.75,-83874.0,1
4,1429,2,201810,37776.109375,42372.511719,43740.800781,52690.019531,579,545,340,292,5849.430176,6083.799805,5948.25,6117.02002,132.990005,138.570007,131.779999,142.240005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
5,916,3,201841,422288.625,473272.34375,411493.96875,513350.6875,124463,138027,14644,16896,45712.238281,46733.101562,40719.089844,46527.570312,3270.719971,3487.790039,2756.330078,3393.120117,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
6,1501,3,201801,602928.8125,579113.375,620217.5,590913.9375,161706,159760,14689,14797,52791.589844,48078.191406,50113.808594,48944.890625,3343.669922,3295.679932,3077.120117,2857.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
7,1264,3,201848,186311.5625,252566.3125,238140.375,239061.59375,60585,82922,5634,8208,32560.300781,0.0,31219.109375,30084.949219,2003.119995,0.0,1732.819946,1544.319946,0.0,-11896.0,-65114.230469,-69588.828125,0.0,0.0,-65114.230469,-81484.828125,-57237.179688,-52845.480469,1
8,1665,3,201840,265960.375,252259.546875,255096.484375,266934.3125,69317,72580,10381,10732,26817.119141,27408.779297,26779.949219,28622.279297,1612.180054,1687.280029,1799.75,1713.939941,8050.200195,-17018.529297,-32723.039062,-47021.75,-5022.839844,-6594.569824,-29695.679688,-70634.851562,-42596.058594,-43307.808594,1
9,828,3,201805,274192.21875,246961.703125,260994.265625,237993.140625,81055,72522,10330,10190,36049.191406,35801.71875,35064.210938,29748.689453,2351.709961,2603.0,2137.5,1873.199951,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [12]:
cnt_cols = [col for col in df.columns if "_cnt" in col]
amt_cols = [col for col in df.columns if "_amt" in col]
qty_cols = [col for col in df.columns if "_qty" in col]

for col in cnt_cols:
    df[col] = pd.to_numeric(df[col], downcast='integer')
    
for col in amt_cols:
    df = to_num(df, col)
    
for col in qty_cols:
    df = to_num(df, col)
    


In [13]:
df.dtypes

fac_sk                     object
fuel_rx_sk                  int32
wk_id                       int32
sls_amt                   float32
ly_sls_amt                float32
sls_plan_amt              float32
ly_sls_plan_amt           float32
sls_prod_cnt                int32
ly_sls_prod_cnt             int32
sls_trips_cnt               int32
ly_sls_trips_cnt            int32
wages_amt                 float32
ly_wages_amt              float32
plan_wages_amt            float32
ly_plan_wages_amt         float32
labor_manhrs_qty          float32
ly_labor_manhrs_qty       float32
plan_manhrs_qty           float32
ly_plan_manhrs_qty        float32
inv_cst_amt               float32
ly_inv_cst_amt            float32
bk_shrink_amt             float32
ly_bk_shrink_amt          float32
donate_amt                float32
ly_donate_amt             float32
tot_shrink_amt            float32
ly_tot_shrink_amt         float32
plan_tot_shrink_amt       float32
ly_plan_tot_shrink_amt    float32
id_ind        

In [18]:
df.inv_cst_amt.describe()

count    250251.000000
mean       -353.429443
std        8184.511230
min     -686640.062500
25%           0.000000
50%           0.000000
75%           0.000000
max      755566.000000
Name: inv_cst_amt, dtype: float64

In [20]:
ttl_sales_by_store = df.groupby('fac_sk').agg({'sls_amt' : 'sum'})

In [22]:
ttl_sales_by_store.describe()

Unnamed: 0,sls_amt
count,2446.0
mean,24370950.0
std,12722000.0
min,-533.8
25%,16467740.0
50%,22550380.0
75%,31252370.0
max,92739300.0


## rpt_paxar_cat_wave_index

In [270]:
df = read_table('rpt_paxar_cat_wave_index')

In [271]:
df.shape

(0, 27)

In [272]:
df.head()

Unnamed: 0,store_id,ctgry_director_resp_nbr,category_id,category_nm,dept_id,dept_section_id,vend_nbr,division_id,prc_cd,wave,...,redfacing_discount_qty,nochange_discount_amt,nochange_discount_qty,nochange_reset_discount_amt,nochange_reset_discount_qty,unknown_discount_amt,unknown_discount_qty,net_sales,batch_id,period_id


In [273]:
df.dtypes

store_id                       object
ctgry_director_resp_nbr        object
category_id                    object
category_nm                    object
dept_id                        object
dept_section_id                object
vend_nbr                       object
division_id                    object
prc_cd                         object
wave                           object
total_discount_amt             object
total_discount_qty             object
del_discount_amt               object
del_discount_qty               object
unc_discount_amt               object
unc_discount_qty               object
redfacing_discount_amt         object
redfacing_discount_qty         object
nochange_discount_amt          object
nochange_discount_qty          object
nochange_reset_discount_amt    object
nochange_reset_discount_qty    object
unknown_discount_amt           object
unknown_discount_qty           object
net_sales                      object
batch_id                       object
period_id   

## by_store_2yr_sd_tenure

In [277]:
df = read_table('by_store_2yr_sd_tenure', db='landing')

In [287]:
np.array([np.random.uniform(0, 1) for i in range(0,100)]).mean()

0.4817541329696666

# Sandbox