# Example 9

Copyright 2023 Bernardo C. Rodrigues

This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later
version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details. You should have received a copy of the GNU General Public License along with this program. If not, 
see <https://www.gnu.org/licenses/>. 

This notebook replicates the results from [1]'s Example 9 regarding GreConD's factor coverage.

[1] Radim Belohlavek, Vilem Vychodil, Discovery of optimal factors in binary data via a novel method of matrix
decomposition, Journal of Computer and System Sciences, Volume 76, Issue 1, 2010, Pages 3-20,  ISSN 0022-0000,
https://doi.org/10.1016/j.jcss.2009.05.002.(https://www.sciencedirect.com/science/article/pii/S0022000009000415)


In [52]:
import numpy as np

attribute_map = [
    ["b", "c", "x", "f", "k", "s"],                                 # cap-shape:                bell=b,conical=c,convex=x,flat=f, knobbed=k,sunken=s
    ["f", "g", "y", "s"],                                           # cap-surface:              fibrous=f,grooves=g,scaly=y,smooth=s
    ["n", "b", "c", "g", "r", "p", "u", "e", "w", "y"],             # cap-color:                brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y
    ["t", "f"],                                                     # bruises?:                 bruises=t,no=f
    ["a", "l", "c", "y", "f", "m", "n", "p", "s"],                  # odor:                     almond=a,anise=l,creosote=c,fishy=y,foul=f, musty=m,none=n,pungent=p,spicy=s
    ["a", "d", "f", "n"],                                           # gill-attachment:          attached=a,descending=d,free=f,notched=n
    ["c", "w", "d"],                                                # gill-spacing:             close=c,crowded=w,distant=d
    ["b", "n"],                                                     # gill-size:                broad=b,narrow=n
    ["k", "n", "b", "h", "g", "r", "o", "p", "u", "e", "w", "y"],   # gill-color:               black=k,brown=n,buff=b,chocolate=h,gray=g,green=r,orange=o,pink=p,purple=u,red=e,white=w,yellow=y
    ["e", "t"],                                                     # stalk-shape:              enlarging=e,tapering=t
    ["b", "c", "u", "e", "z", "r", "?"],                            # stalk-root:               bulbous=b,club=c,cup=u,equal=e,rhizomorphs=z,rooted=r,missing=?
    ["f", "y", "k", "s"],                                           # stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s
    ["f", "y", "k", "s"],                                           # stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s
    ["n", "b", "c", "g", "o", "p", "e", "w", "y"],                  # stalk-color-above-ring:   brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y
    ["n", "b", "c", "g", "o", "p", "e", "w", "y"],                  # stalk-color-below-ring:   brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y
    ["p", "u"],                                                     # veil-type:                partial=p,universal=u
    ["n", "o", "w", "y"],                                           # veil-color:               brown=n,orange=o,white=w,yellow=y
    ["n", "o", "t"],                                                # ring-number:              none=n,one=o,two=t
    ["c", "e", "f", "l", "n", "p", "s", "z"],                       # ring-type:                cobwebby=c,evanescent=e,flaring=f,large=l,none=n,pendant=p,sheathing=s,zone=z
    ["k", "n", "b", "h", "r", "o", "u", "w", "y"],                  # spore-print-color:        black=k,brown=n,buff=b,chocolate=h,green=r,orange=o,purple=u,white=w,yellow=y
    ["a", "c", "n", "s", "v", "y"],                                 # population:               abundant=a,clustered=c,numerous=n,scattered=s,several=v,solitary=y
    ["g", "l", "m", "p", "u", "w", "d"],                            # habitat:                  grasses=g,leaves=l,meadows=m,paths=p,urban=u,waste=w,woods=d
]


In [53]:
# Covert raw Mushroom data into a workable numpy matrix

dataset = []

with open("mushroom/agaricus-lepiota.data") as fp:
    lines = fp.readlines()
    for line in lines:
        line = line.replace("\n", "")
        attributes = line.split(",")[1:]
        row = np.array([], dtype=bool)
        row = []
        for id, attribute in enumerate(attributes):
            columns = np.zeros(len(gabarito[id]), dtype=bool)
            columns[gabarito[id].index(attribute)] = True
            row = np.concatenate((row, columns))
        dataset.append(row)

dataset = np.array(dataset, dtype=bool)

print(dataset.shape)

(8124, 126)


In [54]:
from lib.BinaryDataset import BinaryDataset

binary_dataset = BinaryDataset(dataset)

In [55]:
from lib.FormalConceptAnalysis import GreConD

coverages =  np.arange(0, 1.01, 0.01)

number_of_factors = []

for coverage in coverages:
    concepts, _ = GreConD(binary_dataset, coverage=coverage)
    number_of_factors.append(len(concepts))

print(number_of_factors)

[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 15, 16, 16, 17, 18, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 29, 31, 32, 34, 35, 37, 38, 40, 42, 44, 46, 49, 52, 55, 58, 62, 66, 70, 75, 86, 119]


In [56]:
import plotly.express as px

coverages *= 100 #scale coverages for plotting

fig = px.line(x=number_of_factors,y=coverages,markers=True, title='Relationship between the number of factors and the approximation degree.', labels={
                     "x": "Number of Factors on Algorithm 2",
                     "y": "Coverage",
                 })
fig.show()
