In [1]:
import pandas as pd
import numpy as np
import itertools
import sys
from ast import literal_eval

In [2]:
prod_num_map = pd.read_csv('../../../data/cabot_data/sprint_2/prod_num_map.csv',
                           header=None, index_col=4)

In [3]:
# load bar helper function
def progress(count, total, status=''):
    bar_len = 60
    filled_len = int(round(bar_len * count / float(total)))

    percents = round(100.0 * count / float(total), 1)
    bar = '=' * filled_len + '-' * (bar_len - filled_len)

    sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', status))
    sys.stdout.flush()

In [4]:
# format product number map
prod_num_map['prod_num'] = prod_num_map.index
prod_num_map.index = prod_num_map['prod_num']
prod_num_map = prod_num_map.drop(['prod_num'], axis=1)
prod_cols = prod_num_map.index.tolist()
num_products = max(prod_num_map.index.tolist())+1
prod_cols.append(num_products)

In [5]:
# helper vars to store ranges
units = prod_num_map[0].unique().tolist()
weeks = prod_num_map[1].unique().tolist()
dows = prod_num_map[2].unique().tolist()
lens = prod_num_map[3].unique().tolist()

In [6]:
# blank cust types df
num_rows = len(units)*len(weeks)*len(dows)*len(lens)
cust_types = pd.DataFrame(index=range(1, num_rows+1),
                          columns=prod_cols)

# blank row
blank_type = pd.Series([0 for x in range(1, num_products+1)],
                       index = prod_cols).astype(tuple)

In [7]:
# populate cust types with independent types
idx_counter = 0
total = len(cust_types)
for unit in units:
    for week in weeks:
        for dow in dows:
            for stay_len in lens:
                progress(idx_counter, total, status='Filling in types')
                new_row = blank_type.copy()
                new_row.iloc[0] = (unit, week, dow, stay_len)
                cust_types.iloc[idx_counter] = new_row # remember that df is 1 indexed
                idx_counter += 1



In [8]:
row = cust_types.iloc[0,:]

In [227]:
# filter out non-weekend arrivals
# for index, row in cust_types.iterrows():
#     if row[1][2] < 5:
#         cust_types = cust_types.drop(index)
# cust_types_wend = cust_types.reset_index()
# cust_types_wend.index += 1
# cust_types_wend = cust_types_wend.drop('index', axis=1)

In [230]:
# add orderings (turned off atm for independent model)

# # week orderings
# week_orderings = []
# for i in range(0, len(weeks)-1):
#     week_orderings.append((weeks[i], weeks[i+1])) # normal week ordering
#     week_orderings.append((weeks[i+1], weeks[i])) # reverse week ordering
    
# # unit orderings
# unit_part_orderings = [('CD', 'CK', 'DD', 'DK', 'DKB'),
#                        ('CD', 'DD'),
#                        ('CD', 'DK', 'DKB')]
# unit_orderings = []
# for o in unit_part_orderings:
#     unit_orderings.append(o)
#     unit_orderings.append(o[::-1])
# unit_orderings.append(['2BV'])
# unit_orderings.append(['4BV'])

# # combined orderings using cartesian products
# comb_ordering = []
# for cart_prod_1 in itertools.product(week_orderings, unit_orderings):
        
#     # week priority
#     temp_order = []
#     for cart_prod_2 in itertools.product(cart_prod_1[0], cart_prod_1[1]):
#         temp_order.append(cart_prod_2)
#     comb_ordering.append(temp_order)
    
#     # include unit priority if there's more than 1 unit selected
#     if (len(cart_prod_1[1]) > 1): 
#         temp_order = []
#         for cart_prod_2 in itertools.product(cart_prod_1[1], cart_prod_1[0]):
#             temp_order.append(cart_prod_2[::-1])
#         comb_ordering.append(temp_order)

# # populate cust types (non-independent)
# idx_counter = 1
# for stay_length in lens:
#     for dow in dows:
#         for order in comb_ordering:
#             progress(idx_counter, total, status='Filling in types')
#             # making a series first makes it go faster
#             new_row = blank_type.copy()
#             for col_index, value in enumerate(order):
#                 new_row.loc[col_index+1] = (value[1], value[0], dow, stay_length)
#             cust_types.loc[idx_counter] = new_row
#             idx_counter += 1

In [9]:
cust_types.to_csv('../../../data/cabot_data/sprint_3/types_s3_raw.csv')

In [10]:
cust_types_m = pd.read_csv('../../../data/cabot_data/sprint_3/types_s3_raw.csv', index_col=[0])

In [11]:
# import col tuple to number map
prod_num_map = pd.read_csv('../../../data/cabot_data/sprint_2/prod_num_map.csv',
                           header=None,
                           index_col=[0,1,2,3])

prod_num_map = pd.Series(prod_num_map.iloc[:,0])
prod_num_map = prod_num_map.to_dict()

In [12]:
# convert strings to tuple and map to product number
i = 1
while len(cust_types_m[str(i)].nonzero()[0]) > 0:
    cust_types_m[str(i)] = [literal_eval(x) for x in cust_types_m[str(i)]]
    cust_types_m[str(i)] = cust_types_m[str(i)].map(prod_num_map)
    i = i + 1

In [13]:
# misc postprocessing
cust_types_m = cust_types_m.fillna(0).astype(int)
cust_types_m.index.names = ['cust_types']

In [14]:
cust_types_m.to_csv('../../../data/cabot_data/sprint_3/types_s3.csv')

In [15]:
cust_types_m_view = pd.read_csv('../../../data/cabot_data/sprint_3/types_s3.csv', index_col='cust_types')

In [238]:
cust_types_toy = cust_types_m_view.head(20)

In [239]:
cust_types_toy.to_csv('../../../data/cabot_data/sprint_3/types_s3_toy.csv')