# Lista 6

## Algorytm Apriori

In [18]:
import numpy as np

class Apriori:

    """klasa generująca reguły asocjacyjne"""

    def __init__(self, min_supp, products, n_products):
        self.min_supp = min_supp
        self.products = products
        self.n_baskets = len(products)
        self.products_in_sets = self.make_sets()
        self.n_products = n_products

        ones_data = self.generate_ones()

        self.good_sets = ones_data
        self.sets = ones_data
        self.n_sets = len(self.sets)
        self.all_rules = []

    def make_sets(self):

        """zmiana tablicy tablic produktów
        na tablicę zbiorów produktów"""

        products = []

        for basket in self.products:
            products.append(set(basket))

        return products

    def generate_ones(self):

        """znajdowanie zbiorów częstych
        1-elementowych"""

        ones = []

        for product in range(1, self.n_products+1):
            count_baskets = 0

            for basket in self.products:
                if product in basket:
                    count_baskets += 1

            if count_baskets / self.n_baskets >= self.min_supp:
                ones.append([product])

        return ones

    def count_supp(self, candidate_set):

        """wyliczenie wskaźnika support
        dla zbioru"""

        count_candidate_set = 0
        candidate = set(candidate_set)

        for basket in self.products_in_sets:
            if candidate.issubset(basket):
                count_candidate_set += 1

        return (count_candidate_set / self.n_baskets)

    def find_new_sets(self):

        """pojedyncza iteracja znajdywania zbiorów częstych"""

        new_sets = []

        for first_set_index in range(self.n_sets):
            for second_set_index in range(first_set_index + 1,
                                          self.n_sets):

                if self.sets[first_set_index][:-1] == \
                        self.sets[second_set_index][:-1]:

                    new_set = self.sets[first_set_index] + \
                              [self.sets[second_set_index][-1]]

                    # kandydat

                    if self.count_supp(new_set) >= self.min_supp:
                        # właściwy kandydat
                        new_sets.append(new_set)

        self.sets = new_sets
        self.n_sets = len(self.sets)
        self.good_sets += self.sets

    def find_all_sets(self):

        """znalezienie wszystkich zbiorów częstych"""

        while self.sets:
            self.find_new_sets()

    def count_confidence(self, set1, set2):

        """obliczenie wiarygodności reguły"""

        return self.count_supp(set1 + set2) / self.count_supp(set1)

    def find_single_rules(self, set1, set2, confidence):

        """znalezienie regul asocjacyjnych
        ze zbioru częstego"""

        d = len(set1)

        for element_index in range(d):
            new_set1 = set1[:element_index] + set1[element_index + 1:]
            new_set2 = set2 + [set1[element_index]]

            if self.count_confidence(new_set1, new_set2) \
                    >= confidence:
                self.all_rules.append([new_set1, new_set2])

                self.find_single_rules(new_set1, new_set2, confidence)

    def find_all_rules(self, confidence):

        """znalezienie reguł asocjacyjnych
        ze wszystkich zbiorów częstych"""

        for good_set in self.good_sets:
            self.find_single_rules(good_set, [], confidence)
            
    def count_lift(self, set1, set2):
        
        """obliczanie wskaźnika lift"""
        
        return self.count_supp(set1 + set2) / self.count_supp(set2)
    
    def count_leverage(self, set1, set2):
        
        """obliczanie wskaźnika leverage"""
        
        return self.count_supp(set1 + set2) - \
               self.count_supp(set1) * self.count_supp(set2)
        

    def print_rules(self):

        """przejrzyste wypisanie reguł asocjacyjnych"""

        for rule in self.all_rules:
            if rule[0] and rule[1]:
                print(str(rule[0]) + '    ' + str(rule[1]) + 
                     '    ' + str(self.count_lift(rule[0], rule[1])) +
                     '    ' + str(self.count_leverage(rule[0], rule[1])))
                

### test na przykładowych danych

In [19]:
apriori = Apriori(0.5, [[1], [1, 2, 3, 4], [2, 3], [4], [1, 2, 4], [1, 4]], 4)
apriori.find_all_sets()
apriori.find_all_rules(0.7)
apriori.print_rules()

[4]    [1]    0.75    0.05555555555555558
[1]    [4]    0.75    0.05555555555555558


## Retails

In [3]:
with open('retail.dat', 'r') as data:
    transactions = []
    for line in data:
        transactions.append(list(map(int, line.split())))    

### największy identyfikator

In [4]:
max([max(transaction) for transaction in transactions])

16469

In [20]:
apriori = Apriori(0.01, transactions[:1000], 16469)
apriori.find_all_sets()
apriori.find_all_rules(0.5)
apriori.print_rules()

[32]    [39]    0.11184210526315791    -0.00556799999999999
[36]    [38]    0.16803278688524592    0.030264000000000003
[36]    [39]    0.049342105263157895    0.003248000000000001
[37]    [38]    0.0778688524590164    0.014364
[37]    [39]    0.01973684210526316    0.0004480000000000005
[37]    [48]    0.02288329519450801    0.0016970000000000006
[38]    [39]    0.2532894736842105    0.005648000000000014
[55]    [38]    0.04098360655737705    0.007560000000000001
[105]    [38]    0.045081967213114756    0.008072
[110]    [38]    0.1639344262295082    0.029996000000000002
[170]    [38]    0.21721311475409835    0.039824
[286]    [38]    0.06147540983606557    0.011096
[371]    [38]    0.045081967213114756    0.008316
[41]    [39]    0.30756578947368424    0.041688
[45]    [39]    0.01644736842105263    0.00392
[48]    [39]    0.5263157894736842    0.05430400000000002
[39]    [48]    0.7322654462242563    0.05430400000000002
[49]    [39]    0.02138157894736842    0.003271999999999999
[6

In [21]:
apriori = Apriori(0.01, transactions, 16469)
apriori.find_all_sets()
apriori.find_all_rules(0.5)
apriori.print_rules()

[32]    [39]    0.16684755796743955    -0.00298203936376798
[32]    [48]    0.19067283730865078    0.008907252795242723
[36]    [38]    0.1788920235957938    0.025755051659543816
[36]    [39]    0.040197335964479525    0.003963209061567403
[37]    [38]    0.06706847909720441    0.009709485579985008
[38]    [39]    0.20414405525407006    0.015658795901577288
[38]    [48]    0.18853684585261657    0.005560791937352594
[110]    [38]    0.1747242882790459    0.025302702904268913
[170]    [38]    0.19434470377019747    0.028161587775906993
[286]    [38]    0.0715568094383175    0.010284764319005903
[41]    [39]    0.22523926985693143    0.03202855813442558
[48]    [39]    0.5750764676862358    0.05584094479497037
[39]    [48]    0.6916340334638661    0.05584094479497037
[60]    [39]    0.019398125308337445    0.0014420219815132005
[65]    [39]    0.054997533300444004    0.0024559408266322316
[79]    [39]    0.021924025653675384    0.0021702025092023482
[89]    [39]    0.05424765663542181   

[48, 170]    [38, 39]    0.11532141130981151    0.011459589931095873
[39, 170]    [38, 48]    0.15017623363544816    0.011427485746829861
[38, 48, 170]    [39]    0.0235421805624075    0.0035045329009512696
[48, 170]    [39, 38]    0.11532141130981151    0.011459589931095873
[38, 39, 170]    [48]    0.028313753411653018    0.002586889941071868
[39, 170]    [48, 38]    0.15017623363544816    0.011427485746829861


In [22]:
apriori = Apriori(0.02, transactions, 16469)
apriori.find_all_sets()
apriori.find_all_rules(0.6)
apriori.print_rules()

[36]    [38]    0.1788920235957938    0.025755051659543816
[36]    [39]    0.040197335964479525    0.003963209061567403
[38]    [39]    0.20414405525407006    0.015658795901577288
[110]    [38]    0.1747242882790459    0.025302702904268913
[170]    [38]    0.19434470377019747    0.028161587775906993
[41]    [39]    0.22523926985693143    0.03202855813442558
[48]    [39]    0.5750764676862358    0.05584094479497037
[65]    [39]    0.054997533300444004    0.0024559408266322316
[89]    [39]    0.05424765663542181    0.006164956864250844
[170]    [39]    0.04063147508633448    0.003150030560338534
[225]    [39]    0.04639368524913666    0.005431994758287869
[237]    [39]    0.03806610754810064    0.002112295557075424
[310]    [39]    0.03654662062160829    0.004094553542347293
[41]    [48]    0.2140263438946244    0.021271988095733316
[89]    [48]    0.06640560104426249    0.010936617914439943
[32, 38]    [39]    0.036309817464232855    0.002400220418909013
[32, 41]    [39]    0.0465515540

In [23]:
apriori = Apriori(0.03, transactions, 16469)
apriori.find_all_sets()
apriori.find_all_rules(0.7)
apriori.print_rules()

[36]    [38]    0.1788920235957938    0.025755051659543816
[110]    [38]    0.1747242882790459    0.025302702904268913
[170]    [38]    0.19434470377019747    0.028161587775906993
[41]    [39]    0.22523926985693143    0.03202855813442558
[89]    [39]    0.05424765663542181    0.006164956864250844
[89]    [48]    0.06640560104426249    0.010936617914439943
[38, 41]    [39]    0.0602072027627035    0.009199284037643134
[38, 48]    [39]    0.120414405525407    0.017420605695253243
[41, 48]    [39]    0.14535767143561915    0.02475563785723845


## Kosarak

In [25]:
with open('kosarak.dat', 'r') as data:
    transactions = []
    for line in data:
        transactions.append(list(map(int, line.split())))

### największy identyfikator

In [26]:
max([max(transaction) for transaction in transactions])

41270

In [27]:
apriori = Apriori(0.01, transactions, 41270)
apriori.find_all_sets()
apriori.find_all_rules(0.5)
apriori.print_rules()

[1]    [6]    0.21968525410144105    0.01225129117726552
[90]    [1]    0.05613045635422889    0.006752775297401092
[2]    [6]    0.049252212433527225    0.003578893345136249
[3]    [6]    0.44095687542195044    -0.008272808360009987
[4]    [6]    0.07545554014639809    -0.0020836407231653864
[7]    [6]    0.1224030303937317    0.021034349629518757
[11]    [6]    0.5387878425073249    0.10390152154429044
[6]    [11]    0.889986678203068    0.10390152154429044
[14]    [6]    0.017208924895323045    0.003331669713880836
[25]    [6]    0.02031680784337201    0.0045685343460725595
[27]    [6]    0.09880373943668999    0.015757947473181287
[32]    [6]    0.01679487307399389    0.0034102621052427536
[40]    [6]    0.037021886546475244    0.007712565369054474
[49]    [6]    0.020301842114890237    0.004538581672480791
[55]    [6]    0.0646436327476745    -0.000868018468159773
[56]    [6]    0.02519397246971103    0.005638868201264531
[64]    [6]    0.06601050261567677    0.010485211595265932


[3, 218]    [6]    0.05650061359486776    0.010592091938856806
[3, 316]    [6]    0.01655542141828546    0.002483718159847109
[3, 7]    [11]    0.07013308063120596    0.011430759070654379
[3, 27]    [7]    0.20518308821837097    0.015170486010422118
[3, 27]    [11]    0.05666295853762377    0.008940726556154547
[3, 64]    [11]    0.0371307321494788    0.005788947797922215
[3, 77]    [11]    0.03703184870833505    0.006455378028443205
[3, 83]    [11]    0.02905525112273907    0.005433189134322308
[3, 148]    [11]    0.06649087388241111    0.013185220641234077
[3, 218]    [11]    0.07408841827695603    0.012880101209823169
[3, 218]    [148]    0.3627327593604302    0.0228601537533922
[3, 148]    [218]    0.2862705704417707    0.022877413316347062
[4, 11]    [6]    0.0405870556425785    0.007822104783627964
[4, 6]    [11]    0.06704297309546373    0.007798959725628291
[7, 11]    [6]    0.09284571664222266    0.0213793055115444
[7]    [6, 11]    0.1723233327057865    0.027671227852083487
[

[6, 303]    [11]    0.04273687390987873    0.00868224864428185
[303]    [11, 6]    0.04801967822278736    0.008692405364799496
[11, 314]    [6]    0.018032039961820765    0.0042096351227712235
[314]    [6, 11]    0.03346779295892449    0.0060468899054094355
[6, 314]    [11]    0.02978588988230124    0.00602021281801159
[314]    [11, 6]    0.03346779295892449    0.0060468899054094355
[11, 316]    [6]    0.028785747305337447    0.006541363820194988
[316]    [6, 11]    0.053426868675022304    0.008278218308562785
[6, 316]    [11]    0.04754920137887465    0.009140704147306411
[316]    [11, 6]    0.053426868675022304    0.008278218308562785
[11, 364]    [6]    0.017177330579639292    0.004070251416986131
[364]    [6, 11]    0.03188143685592862    0.0064120284637706705
[6, 364]    [11]    0.028374054083748778    0.006067863030545992
[364]    [11, 6]    0.03188143685592862    0.0064120284637706705
[11, 438]    [6]    0.027543591841349975    0.006533549000224289
[438]    [6, 11]    0.05112140

[987]    [11, 148]    0.204523036639825    0.010506282998752475
[11, 987]    [148]    0.16309602128085582    0.01057810408801704
[987]    [148, 11]    0.204523036639825    0.010506282998752475
[27, 218]    [148]    0.15470095248991733    0.009967697347085639
[27, 148]    [218]    0.12209079211720354    0.009844736431797113
[1, 3, 11]    [6]    0.06276127667641103    0.01341645150984426
[1, 3, 6]    [11]    0.10367104775246178    0.016653294978966705
[1, 3, 148]    [6]    0.01946376131991074    0.004358985244771957
[1, 3, 218]    [6]    0.021716934885778236    0.004645924548638103
[1, 3, 148]    [11]    0.028961861206103303    0.006131727457743842
[1, 3, 218]    [11]    0.03069781495062695    0.006115233971685554
[1, 3, 218]    [148]    0.15567346471782845    0.010001284102514997
[1, 3, 148]    [218]    0.12285830379918283    0.009895253458582515
[1, 7, 11]    [6]    0.029989657018760375    0.007037673625940509
[1, 7]    [6, 11]    0.05566134692126551    0.010343462390416856
[1, 6, 7]  

[7, 27]    [6, 11]    0.09433880739353051    0.017574388821905894
[7, 11]    [6, 27]    0.5144400686660607    0.027415630686657297
[6, 11, 27]    [7]    0.3517572326175516    0.026892731134248685
[11, 27]    [7, 6]    0.4152560793370466    0.027413162685201626
[6, 27]    [7, 11]    0.5355678592704208    0.027415630686657297
[6, 7, 27]    [11]    0.08396028181780725    0.016984374115713255
[7, 27]    [11, 6]    0.09433880739353051    0.017574388821905894
[6, 27]    [11, 7]    0.5355678592704208    0.027415630686657297
[6, 7, 11]    [27]    0.42375301522167075    0.026766333281515294
[7, 11]    [27, 6]    0.5144400686660607    0.027415630686657297
[7, 11, 83]    [6]    0.021552311872478695    0.005125759597971116
[11, 83]    [6, 7]    0.1760766200244532    0.011283607049149018
[7, 83]    [6, 11]    0.04000148142204171    0.007745914728238643
[6, 11, 83]    [7]    0.1491518792147115    0.010974554968593436
[11, 83]    [7, 6]    0.1760766200244532    0.011283607049149018
[6, 7, 83]    [11]

[6, 205]    [11, 27]    0.24560223846604345    0.010520939023609207
[205]    [11, 27, 6]    0.25205351379026336    0.010457720594209383
[6, 11, 205]    [27]    0.1569717470263676    0.010283844240318002
[11, 205]    [27, 6]    0.19056514860816587    0.010476154239758501
[205]    [27, 6, 11]    0.25205351379026336    0.010457720594209383
[6, 205]    [27, 11]    0.24560223846604345    0.010520939023609207
[205]    [27, 11, 6]    0.25205351379026336    0.010457720594209383
[11, 27, 218]    [6]    0.01922597252292251    0.004557533345729744
[27, 218]    [6, 11]    0.035683753429646345    0.007236951125032749
[6, 27, 218]    [11]    0.03175806518066829    0.006837584840461873
[27, 218]    [11, 6]    0.035683753429646345    0.007236951125032749
[11, 64, 218]    [6]    0.016583690016528815    0.003932989877914858
[64, 218]    [6, 11]    0.030779629212408146    0.006132748380054978
[6, 64, 218]    [11]    0.02739345995907324    0.00577003367681212
[64, 218]    [11, 6]    0.030779629212408146  

[1, 27]    [11, 6, 7]    0.18531387122772452    0.009323917576011736
[1, 6, 27]    [11, 7]    0.1812909556014998    0.009413323278443183
[1, 27]    [11, 7, 6]    0.18531387122772452    0.009323917576011736
[1, 6, 7, 11]    [27]    0.14344137300024953    0.009124148426013402
[1, 7, 11]    [27, 6]    0.1741391497526002    0.009346921457150945
[1, 11, 148, 218]    [6]    0.03732120111611077    0.00886384250499768
[1, 148, 218]    [6, 11]    0.0692688256335394    0.014317632608017142
[1, 218]    [6, 11, 148]    0.4063733478182147    0.020860267392365763
[1, 148]    [6, 11, 218]    0.3701797789873    0.020935528122870004
[1, 11, 218]    [6, 148]    0.3466254826254826    0.020966859870575495
[1, 218]    [6, 148, 11]    0.4063733478182147    0.020860267392365763
[1, 11, 148]    [6, 218]    0.2889475378178307    0.020741276275342294
[1, 148]    [6, 218, 11]    0.3701797789873    0.020935528122870004
[1, 6, 148, 218]    [11]    0.06164833202862125    0.01349236748614507
[1, 148, 218]    [11, 6]

[7, 11, 87]    [27, 6]    0.16875357635733282    0.00935227284865025
[11, 87]    [27, 6, 7]    0.26812311147953044    0.009600333787001125
[87]    [27, 6, 7, 11]    0.32803350017993266    0.009551792471037249
[7, 87]    [27, 6, 11]    0.2232041493221735    0.009372031796466278
[87]    [27, 6, 11, 7]    0.32803350017993266    0.009551792471037249
[6, 11, 87]    [27, 7]    0.24921088604448863    0.00956601735795633
[11, 87]    [27, 7, 6]    0.26812311147953044    0.009600333787001125
[87]    [27, 7, 6, 11]    0.32803350017993266    0.009551792471037249
[6, 87]    [27, 7, 11]    0.32514024449560625    0.00959732145961497
[87]    [27, 7, 11, 6]    0.32803350017993266    0.009551792471037249
[6, 7, 87]    [27, 11]    0.21749126954861941    0.009402217137213068
[7, 87]    [27, 11, 6]    0.2232041493221735    0.009372031796466278
[87]    [27, 11, 6, 7]    0.32803350017993266    0.009551792471037249
[6, 87]    [27, 11, 7]    0.32514024449560625    0.00959732145961497
[87]    [27, 11, 7, 6]    

In [28]:
apriori = Apriori(0.03, transactions, 41270)
apriori.find_all_sets()
apriori.find_all_rules(0.7)
apriori.print_rules()

[7]    [6]    0.1224030303937317    0.021034349629518757
[11]    [6]    0.5387878425073249    0.10390152154429044
[27]    [6]    0.09880373943668999    0.015757947473181287
[64]    [6]    0.06601050261567677    0.010485211595265932
[77]    [6]    0.06261328225031346    0.011371680930617318
[83]    [6]    0.04985749300767908    0.010521098882161354
[148]    [6]    0.10767010213278261    0.022501038228546696
[218]    [6]    0.12916255109133418    0.0240973127972094
[148]    [11]    0.15315671652040155    0.030349197433993228
[148]    [218]    0.6639314657215739    0.053096345299724755
[1, 11]    [6]    0.1431588329392358    0.030584313031321357
[1, 218]    [6]    0.05018840189299837    0.010774263034257003
[3, 7]    [6]    0.05469973760089396    0.009506744900777644
[3, 11]    [6]    0.23892286663540493    0.04617087803605581
[3, 218]    [6]    0.05650061359486776    0.010592091938856806
[7, 11]    [6]    0.09284571664222266    0.0213793055115444
[6, 7]    [11]    0.15336547045170504    

In [29]:
apriori = Apriori(0.04, transactions, 41270)
apriori.find_all_sets()
apriori.find_all_rules(0.9)
apriori.print_rules()

[148]    [6]    0.10767010213278261    0.022501038228546696
[1, 11]    [6]    0.1431588329392358    0.030584313031321357
[7, 11]    [6]    0.09284571664222266    0.0213793055115444
[11, 27]    [6]    0.07470060228742846    0.0170887120218672
[11, 148]    [6]    0.09183968711650321    0.021575055766445125
[11, 218]    [6]    0.10081912420556925    0.023411298038124813
[148, 218]    [6]    0.09451356393858065    0.021319280124033703
[11, 148, 218]    [6]    0.08292011294136428    0.019630371741301278
[6, 11, 148]    [218]    0.5628343754938034    0.045376994693316505
