# Laboratorium 1 - analiza koszykowa

## Przygotowanie

 * pobierz i wypakuj dataset: https://kaggle.com/datasets/rashikrahmanpritom/groceries-dataset-for-market-basket-analysismba?resource=download&select=basket.csv
   * alternatywnie, pobierz plik `basket.csv` z Teamsów
 * [opcjonalnie] Utwórz wirtualne środowisko
 `python3 -m venv ./recsyslab1`
 * zainstaluj potrzebne biblioteki:
 `pip install more-itertools`

In [1]:
import colorama

from dataclasses import dataclass
from functools import cache, reduce
from itertools import groupby
from statistics import mean
from typing import Literal

from more_itertools import powerset

In [2]:
@dataclass(repr=False, frozen=True)
class Recommendation:
	product: str
	basket: frozenset[str]
	confidence: float
	lift: float

	def __repr__(self) -> str:
		products = (
			colorama.Fore.GREEN + 
			', '.join(iter(self.basket - frozenset((self.product,)))) +
			' + ' +
			colorama.Fore.LIGHTBLUE_EX + self.product +
			colorama.Fore.RESET
		)

		return (
			'Basket with: ' + products + '\n' +
			colorama.Fore.BLUE + 'confidence: ' + colorama.Fore.RESET + f'{self.confidence:.2f}\n' +
			colorama.Fore.BLUE + 'lift: ' + colorama.Fore.RESET + f'{self.lift:.2f}' +
			colorama.Fore.RESET
		)

In [3]:
class Database:
	SUPPORT_EPSILON = 1e-3
	MAX_SUBBASKET_CARDINALITY = 4

	products: list[str]
	baskets: list[frozenset[str]]

	total_baskets: int

	def __init__(self, baskets: list[frozenset[str]], products: list[str]) -> None:
		self.baskets = baskets
		self.products = products
		self.total_baskets = len(baskets)

	@staticmethod
	def load(path: str) -> 'Database':
		with open(path) as file:
			raw_data = file.read()
		
		baskets = [
			frozenset((y.lower() for y in x.split(',') if len(y) > 0))
			for x in raw_data.split('\n')[1:]
			if len(x) > 0
		]

		products = list(reduce(frozenset.union, baskets, frozenset()))

		return Database(baskets, products)
	
	@cache
	def support(self, basket: frozenset[str]) -> float:
		count = 0.0

		for historic_basket in self.baskets:
			if basket.issubset(historic_basket):
				count += 1.0

		return count / self.total_baskets + self.SUPPORT_EPSILON
	
	@cache
	def confidence(self, basket: frozenset[str], recommended_product: str) -> float:
		recommended_basket = basket | frozenset((recommended_product,))
		return self.support(recommended_basket) / self.support(basket)
	
	@cache
	def lift(self, basket: frozenset[str], recommended_product: str) -> float:
		basket_with_recommended_product = frozenset((recommended_product,))
		recommended_basket = basket | basket_with_recommended_product
		
		return self.support(recommended_basket) / (
			self.support(basket) * 
			self.support(basket_with_recommended_product)
		)
	
	def __recommend_basic(self, basket: frozenset[str]) -> list[Recommendation]:
		candidates: list[Recommendation] = []

		for subbasket in map(frozenset, powerset(basket)):
			if len(subbasket) > self.MAX_SUBBASKET_CARDINALITY:
				break

			for product in self.products:
				candidate = basket | frozenset((product,))

				if candidate == basket:
					continue

				confidence = self.confidence(subbasket, product)
				lift = self.lift(subbasket, product)

				if lift <= 1.0:
					continue

				candidates.append(Recommendation(product, candidate, confidence, lift))

		return sorted(
			candidates,
			key=lambda c: c.confidence,
			reverse=True
		)

	def __recommend_advanced(self, basket: frozenset[str]) -> list[Recommendation]:
		candidates: list[Recommendation] = []

		for subbasket in map(frozenset, powerset(basket)):
			if len(subbasket) > self.MAX_SUBBASKET_CARDINALITY:
				break
	
			for product in self.products:
				candidate_basket = basket | frozenset((product,))

				if candidate_basket == basket:
					continue

				confidence = self.confidence(subbasket, product)
				lift = self.lift(subbasket, product)

				if lift <= 1.0:
					continue

				candidates.append(Recommendation(product, candidate_basket, confidence, lift))

		candidates = [
			Recommendation(
				(group := list(grouper))[0].product,
				group[0].basket,
				mean(map(lambda candidate: candidate.confidence, group)),
				mean(map(lambda candidate: candidate.lift, group))
			)
			for _, grouper in groupby(candidates, lambda candidate: candidate.product)
		]

		return sorted(
			candidates,
			key=lambda c: c.confidence * c.lift,
			reverse=True
		)

	@cache
	def recommend(
			self,
			basket: frozenset[str],
			*,
			max_candidates: int = 5,
			strategy: Literal['basic', 'advanced'] = 'basic'
	) -> list[Recommendation]:
		match strategy:
			case 'basic':
				return self.__recommend_basic(basket)[:max_candidates]

			case 'advanced':
				return self.__recommend_advanced(basket)[:max_candidates]


In [4]:
def show_recommendations(db: Database, basket: frozenset[str], *, strategy: Literal['basic', 'advanced'], max_candidates: int = 5) -> None:
	candidates = db.recommend(basket, strategy=strategy, max_candidates=max_candidates)

	basket_representation = ', '.join(iter(basket))

	print(
		'For ' + 
		colorama.Fore.GREEN + basket_representation +
		colorama.Fore.RESET + ' in basket, system with strategy ' +
		colorama.Fore.BLUE + '"' + strategy + '"' +
		colorama.Fore.RESET + ' recommends:\n' +
		colorama.Fore.RESET
	)

	print(*candidates, sep='\n\n')

In [5]:
db = Database.load('basket.csv')

In [6]:
show_recommendations(db, db.baskets[5], strategy='basic', max_candidates=5)

For [32mrolls/buns, whole milk, sausage[39m in basket, system with strategy [34m"basic"[39m recommends:
[39m
Basket with: [32mrolls/buns, whole milk, sausage + [94myogurt[39m
[34mconfidence: [39m0.62
[34mlift: [39m7.19[39m

Basket with: [32mrolls/buns, whole milk, sausage + [94mother vegetables[39m
[34mconfidence: [39m0.59
[34mlift: [39m4.82[39m

Basket with: [32mrolls/buns, whole milk, sausage + [94mpastry[39m
[34mconfidence: [39m0.56
[34mlift: [39m10.66[39m

Basket with: [32mrolls/buns, whole milk, sausage + [94mcurd[39m
[34mconfidence: [39m0.56
[34mlift: [39m16.20[39m

Basket with: [32mrolls/buns, whole milk, sausage + [94msoda[39m
[34mconfidence: [39m0.56
[34mlift: [39m5.73[39m


In [7]:
show_recommendations(db, db.baskets[5], strategy='advanced', max_candidates=5)

For [32mrolls/buns, whole milk, sausage[39m in basket, system with strategy [34m"advanced"[39m recommends:
[39m
Basket with: [32mrolls/buns, whole milk, sausage + [94mpreservation products[39m
[34mconfidence: [39m0.47
[34mlift: [39m438.81[39m

Basket with: [32mrolls/buns, whole milk, sausage + [94mkitchen utensil[39m
[34mconfidence: [39m0.47
[34mlift: [39m438.81[39m

Basket with: [32mrolls/buns, whole milk, sausage + [94mbaby cosmetics[39m
[34mconfidence: [39m0.47
[34mlift: [39m389.95[39m

Basket with: [32mrolls/buns, whole milk, sausage + [94mbags[39m
[34mconfidence: [39m0.47
[34mlift: [39m369.39[39m

Basket with: [32mrolls/buns, whole milk, sausage + [94mfrozen chicken[39m
[34mconfidence: [39m0.47
[34mlift: [39m350.88[39m
