# Supermarket Optimization

In [1]:
from itertools import combinations
from collections import Counter
import csv

#### Open the data file

In [2]:
with open('retail_25k.dat','r') as f:
    data = [line.rstrip().split() for line in f]

In [3]:
[item[:10] for item in data[:10]]

[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
 ['30', '31', '32'],
 ['33', '34', '35'],
 ['36', '37', '38', '39', '40', '41', '42', '43', '44', '45'],
 ['38', '39', '47', '48'],
 ['38', '39', '48', '49', '50', '51', '52', '53', '54', '55'],
 ['32', '41', '59', '60', '61', '62'],
 ['3', '39', '48'],
 ['63', '64', '65', '66', '67', '68'],
 ['32', '69']]

#### Set the model parameters

In [4]:
item_set_size = 3
sigma = 4

#### Find all unique combinations in each list (duplicates allowed between lists)

In [5]:
sets = []
for line in data:
    sets.append(list(set(combinations(line, item_set_size))))

In [6]:
[item[:10] for item in sets[:10]]

[[('5', '21', '25'),
  ('2', '15', '19'),
  ('4', '8', '17'),
  ('4', '26', '28'),
  ('7', '17', '18'),
  ('8', '14', '17'),
  ('3', '4', '8'),
  ('2', '21', '24'),
  ('8', '17', '19'),
  ('14', '25', '27')],
 [('30', '31', '32')],
 [('33', '34', '35')],
 [('38', '43', '46'),
  ('37', '39', '41'),
  ('37', '40', '44'),
  ('37', '43', '46'),
  ('36', '39', '42'),
  ('38', '42', '46'),
  ('43', '45', '46'),
  ('36', '37', '39'),
  ('42', '45', '46'),
  ('37', '38', '44')],
 [('38', '39', '48'),
  ('38', '39', '47'),
  ('38', '47', '48'),
  ('39', '47', '48')],
 [('39', '57', '58'),
  ('49', '51', '53'),
  ('38', '52', '57'),
  ('38', '39', '52'),
  ('38', '49', '57'),
  ('39', '55', '56'),
  ('39', '53', '57'),
  ('38', '49', '53'),
  ('39', '50', '56'),
  ('48', '52', '56')],
 [('59', '60', '61'),
  ('32', '41', '60'),
  ('41', '59', '61'),
  ('41', '59', '60'),
  ('59', '60', '62'),
  ('32', '61', '62'),
  ('32', '59', '62'),
  ('41', '60', '61'),
  ('32', '60', '62'),
  ('32', '59', '

#### Flattened the above **_sets_** list

In [7]:
flattened_list = [elem for sublist in sets for elem in sublist]

In [8]:
flattened_list[:10]

[('5', '21', '25'),
 ('2', '15', '19'),
 ('4', '8', '17'),
 ('4', '26', '28'),
 ('7', '17', '18'),
 ('8', '14', '17'),
 ('3', '4', '8'),
 ('2', '21', '24'),
 ('8', '17', '19'),
 ('14', '25', '27')]

#### Use a **_Counter_** object to count each unique combination

In [9]:
counter = Counter(flattened_list).most_common()

In [10]:
counter[:10]

[(('39', '41', '48'), 3352),
 (('38', '39', '48'), 1580),
 (('32', '39', '48'), 1577),
 (('38', '39', '41'), 1426),
 (('32', '39', '41'), 1168),
 (('38', '41', '48'), 1052),
 (('32', '41', '48'), 995),
 (('38', '39', '170'), 630),
 (('36', '38', '39'), 530),
 (('32', '38', '39'), 524)]

#### Format the output as a list-of-lists to prepare to save as a .csv file

In [11]:
# <item set size (N)>, <co-occurrence frequency>, <item 1 id >, <item 2 id>, …. <item N id>

output = [['item set size (N)', 'co-occurrence frequency']]
output[0].extend(['item {} id'.format(N + 1) for N in range(item_set_size)])

for item_set in counter:
    if item_set[1] >= sigma:
        row = [item_set_size, item_set[1]]
        row.extend(item_set[0])
        output.append(row)

In [12]:
output[:10]

[['item set size (N)',
  'co-occurrence frequency',
  'item 1 id',
  'item 2 id',
  'item 3 id'],
 [3, 3352, '39', '41', '48'],
 [3, 1580, '38', '39', '48'],
 [3, 1577, '32', '39', '48'],
 [3, 1426, '38', '39', '41'],
 [3, 1168, '32', '39', '41'],
 [3, 1052, '38', '41', '48'],
 [3, 995, '32', '41', '48'],
 [3, 630, '38', '39', '170'],
 [3, 530, '36', '38', '39']]

#### Save the final output to **_output.csv_**

In [13]:
with open("output.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerows(output)