In [14]:
import pandas as pd

In [15]:
dataset = pd.read_csv('supermarkets.csv', sep=',')

In [16]:
dataset.head()

Unnamed: 0,FacilityName,Municipality,LONGITUDE,LATITUDE,Address,GSmartID
0,Sobeys,St. Catharines,-79.229554,43.187516,400 Scott Street,11502418
1,Sobeys,Welland,-79.287023,43.004156,609 South Pelham Road,11381353
2,Real Canadian Superstore,Grimsby,-79.592071,43.20525,361 South Service Road,11469036
3,Outlet Collection at Niagara- Davids Tea,Niagara-on-the-Lake,-79.172006,43.158877,300 Taylor Road,11504411
4,No Frills - Supermarket,Fort Erie,-78.943567,42.907641,1135 Thompson Road,11490330


In [17]:
dataset.shape

(55, 6)

In [18]:
dataset.isna().any().values

array([False, False, False, False, False, False])

In [19]:
dataset['FacilityName'] = dataset['FacilityName'].str.strip()

In [20]:
basket = dataset[dataset['FacilityName']=='Sobeys']
basket

Unnamed: 0,FacilityName,Municipality,LONGITUDE,LATITUDE,Address,GSmartID
0,Sobeys,St. Catharines,-79.229554,43.187516,400 Scott Street,11502418
1,Sobeys,Welland,-79.287023,43.004156,609 South Pelham Road,11381353
16,Sobeys,Port Colborne,-79.266437,42.904973,287 West Side Road,11322964
18,Sobeys,Pelham,-79.27872,43.047074,110 Highway 20 East,11279813
29,Sobeys,St. Catharines,-79.215968,43.13724,343 Glendale Avenue,11335072
35,Sobeys,Grimsby,-79.568409,43.197482,34 Livingston Avenue,11492689
49,Sobeys,Lincoln,-79.478732,43.181197,4610 Ontario Street,11358340
52,Sobeys,Niagara Falls,-79.098628,43.118691,3714 Portage Road,11340101


In [21]:
basket.shape

(8, 6)

In [22]:
from itertools import combinations

combs = []
for _, row in basket.iterrows():
    elements = row.tolist()
    combs_row = list(combinations(elements,2))
    combs.extend(combs_row)

In [23]:
combs

[('Sobeys', 'St. Catharines'),
 ('Sobeys', -79.2295542208932),
 ('Sobeys', 43.18751636685452),
 ('Sobeys', '400 Scott Street'),
 ('Sobeys', 11502418),
 ('St. Catharines', -79.2295542208932),
 ('St. Catharines', 43.18751636685452),
 ('St. Catharines', '400 Scott Street'),
 ('St. Catharines', 11502418),
 (-79.2295542208932, 43.18751636685452),
 (-79.2295542208932, '400 Scott Street'),
 (-79.2295542208932, 11502418),
 (43.18751636685452, '400 Scott Street'),
 (43.18751636685452, 11502418),
 ('400 Scott Street', 11502418),
 ('Sobeys', 'Welland'),
 ('Sobeys', -79.28702307007116),
 ('Sobeys', 43.00415606496388),
 ('Sobeys', '609 South Pelham Road'),
 ('Sobeys', 11381353),
 ('Welland', -79.28702307007116),
 ('Welland', 43.00415606496388),
 ('Welland', '609 South Pelham Road'),
 ('Welland', 11381353),
 (-79.28702307007116, 43.00415606496388),
 (-79.28702307007116, '609 South Pelham Road'),
 (-79.28702307007116, 11381353),
 (43.00415606496388, '609 South Pelham Road'),
 (43.00415606496388, 1138

In [24]:
sup_threshold = 0.019
n_rows = len(basket)
combinations_support = {}
for combination in combs:
    count = basket.apply(lambda row: all(elem in row.tolist() for elem in combination), axis=1).sum()
    support = count / n_rows
    if support >= sup_threshold:
        combinations_support[combination] = support
combinations_support

{('Sobeys', 'St. Catharines'): 0.25,
 ('Sobeys', -79.2295542208932): 0.125,
 ('Sobeys', 43.18751636685452): 0.125,
 ('Sobeys', '400 Scott Street'): 0.125,
 ('Sobeys', 11502418): 0.125,
 ('St. Catharines', -79.2295542208932): 0.125,
 ('St. Catharines', 43.18751636685452): 0.125,
 ('St. Catharines', '400 Scott Street'): 0.125,
 ('St. Catharines', 11502418): 0.125,
 (-79.2295542208932, 43.18751636685452): 0.125,
 (-79.2295542208932, '400 Scott Street'): 0.125,
 (-79.2295542208932, 11502418): 0.125,
 (43.18751636685452, '400 Scott Street'): 0.125,
 (43.18751636685452, 11502418): 0.125,
 ('400 Scott Street', 11502418): 0.125,
 ('Sobeys', 'Welland'): 0.125,
 ('Sobeys', -79.28702307007116): 0.125,
 ('Sobeys', 43.00415606496388): 0.125,
 ('Sobeys', '609 South Pelham Road'): 0.125,
 ('Sobeys', 11381353): 0.125,
 ('Welland', -79.28702307007116): 0.125,
 ('Welland', 43.00415606496388): 0.125,
 ('Welland', '609 South Pelham Road'): 0.125,
 ('Welland', 11381353): 0.125,
 (-79.28702307007116, 43.004

In [25]:
confidence_threshold = 0.25
strong_rules = []
for combination, support in combinations_support.items():
    itemset_count = basket.apply(lambda row: all(elem in row.tolist() for elem in combination), axis=1).sum()
    for item in combination:
        antecedent = (item,)
        consequent = tuple([elem for elem in combination if elem != item])
        antecedent_count = basket.apply(lambda row: all(elem in row.tolist() for elem in antecedent), axis=1).sum()
        consequent_count = basket.apply(lambda row: all(elem in row.tolist() for elem in consequent), axis=1).sum()
        confidence = itemset_count / antecedent_count
        lift = itemset_count / (antecedent_count*consequent_count)
        if confidence >= confidence_threshold and lift>=1:
            strong_rules.append((antecedent, consequent, confidence, lift))


In [26]:
for rule in strong_rules:
    antecedent = rule[0]
    consequent = rule[1]
    confidence = rule[2]
    lift = rule[3]
    print(f"Rule: {antecedent} -> {consequent} (Confidenza: {confidence} Lift: {lift})")

Rule: (-79.2295542208932,) -> (43.18751636685452,) (Confidenza: 1.0 Lift: 1.0)
Rule: (43.18751636685452,) -> (-79.2295542208932,) (Confidenza: 1.0 Lift: 1.0)
Rule: (-79.2295542208932,) -> ('400 Scott Street',) (Confidenza: 1.0 Lift: 1.0)
Rule: ('400 Scott Street',) -> (-79.2295542208932,) (Confidenza: 1.0 Lift: 1.0)
Rule: (-79.2295542208932,) -> (11502418,) (Confidenza: 1.0 Lift: 1.0)
Rule: (11502418,) -> (-79.2295542208932,) (Confidenza: 1.0 Lift: 1.0)
Rule: (43.18751636685452,) -> ('400 Scott Street',) (Confidenza: 1.0 Lift: 1.0)
Rule: ('400 Scott Street',) -> (43.18751636685452,) (Confidenza: 1.0 Lift: 1.0)
Rule: (43.18751636685452,) -> (11502418,) (Confidenza: 1.0 Lift: 1.0)
Rule: (11502418,) -> (43.18751636685452,) (Confidenza: 1.0 Lift: 1.0)
Rule: ('400 Scott Street',) -> (11502418,) (Confidenza: 1.0 Lift: 1.0)
Rule: (11502418,) -> ('400 Scott Street',) (Confidenza: 1.0 Lift: 1.0)
Rule: ('Welland',) -> (-79.28702307007116,) (Confidenza: 1.0 Lift: 1.0)
Rule: (-79.28702307007116,) 