# Importações Necessárias

In [15]:
import numpy as np;
import pandas as pd;
from scipy.stats import chisquare;

# Parâmetros de Entrada

In [16]:
baseDados = "BPressureNishiBook.dat";
baseRegras = "BPressureNishiBook.txt";

# Pré-Processamento Base Transacional

In [17]:
mt = pd.read_table(baseDados, delim_whitespace=True, dtype="str", header=None);
mtBinaria = pd.get_dummies(mt);
dados = mtBinaria.astype('int').to_numpy();
dadosNeg = abs(dados-1);
print(dados);

[[1 0 0 0 0 1 0 0 1 0 0 1 1 0 0 1 0 0]
 [1 0 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 1]
 [0 0 1 0 0 1 0 0 1 0 0 1 1 0 0 0 0 1]
 [0 0 1 0 0 1 0 0 1 0 0 1 1 0 0 1 0 0]
 [0 1 0 1 0 0 0 1 0 0 1 0 0 0 1 0 1 0]
 [0 1 0 1 0 0 0 1 0 0 0 1 0 0 1 1 0 0]
 [0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 0 1]
 [1 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 0 1]
 [0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0]
 [1 0 0 0 0 1 0 1 0 0 1 0 1 0 0 0 0 1]
 [0 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0]
 [0 1 0 0 1 0 0 0 1 0 0 1 0 1 0 0 1 0]
 [0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 1 0 0]
 [1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0]
 [0 0 1 0 0 1 0 0 1 0 0 1 1 0 0 0 1 0]]


# Pré-processamento RAs

In [18]:
regras = pd.read_table(baseRegras, sep="#", names=("AR", "sup", "cnf"));
regras["antc"], regras["cons"] =  regras["AR"].str.split("==>").str;
del regras["AR"]
regras["sup"] = regras["sup"].str.replace("SUP:", "").astype('float');
regras["cnf"] = regras["cnf"].str.replace("CONF:", "").astype('float');
regras["cons"] = regras["cons"].str.split();
regras["antc"] = regras["antc"].str.split();
print(regras);

      sup       cnf  antc                cons
0     5.0  0.555556   [6]                 [1]
1     5.0  1.000000   [1]                 [6]
2     3.0  1.000000   [4]                 [2]
3     3.0  0.500000   [2]                 [4]
4     3.0  1.000000   [5]                 [2]
...   ...       ...   ...                 ...
1369  1.0  0.333333  [15]   [3, 6, 9, 12, 16]
1370  1.0  0.100000  [12]   [3, 6, 9, 15, 16]
1371  1.0  0.166667   [9]  [3, 6, 12, 15, 16]
1372  1.0  0.111111   [6]  [3, 9, 12, 15, 16]
1373  1.0  0.250000   [3]  [6, 9, 12, 15, 16]

[1374 rows x 4 columns]


# Suporte

In [19]:
def intersect(itemset, base):
    tmp = base[:,np.array(itemset)-1]
    return tmp.all(axis=1)

def abSupp(itemset1, itemset2=[], negativo=False):
    itemset = itemset1 + itemset2;
    base=np.copy(dados)
    if negativo:
        base = abs(base-1);    
    return np.sum(intersect(itemset, base), axis=0);
    
    
def relSupp(itemset1, itemset2=[], negativo=False):
    return abSupp(itemset1, itemset2, negativo) / dados.shape[0];

print(abSupp([6]));      
print("12 6 ==> 13 #SUPP: " + str(abSupp([12, 6], [13]))) #5
print("12 17 14 ==> 2 #SUPP: " + str(abSupp([12, 17, 14], [2]))) #2

9
12 6 ==> 13 #SUPP: 5
12 17 14 ==> 2 #SUPP: 2


# Confiança

In [20]:
def conf(antc,consq):
    return abSupp(antc,consq)/abSupp(antc);

print(conf([12, 17, 14], [2]));# 1.0
print(conf([6, 1, 18], [13])); # .666
print(conf([6, 13], [1, 18])); # .2857142857142857

1.0
0.6666666666666666
0.2857142857142857


# Added Value

In [21]:
def addedValue(antc, consq):
    return conf(antc, consq)-reSupp(consq);

# All-Confidence

In [32]:
def allConf(itemset):
    print(itemset)
    maxSup = max([relSupp(list(i)) for i in itemset])    
    return relSupp(itemset)/maxSup

def allConf2(itemset):
    maxSup = relSupp(np.array(itemset))
    relSupp(itemset)/maxSup
    
print(allConf([1, 6]))
print(allConf2([1, 6]))

[1, 6]


TypeError: 'int' object is not iterable

# Casual Support

In [9]:
def casualSupp(ant, cons):
    return relSupp(ant, cons) + relSupp(ant, cons, negativo=True);

# Casual Confidence

In [10]:
def casualConf(ant, cons):
    conf1 = conf(ant,cons)
    conf2 = abSupp(ant,cons, negativo=True) / abSupp(ant, negativo=True)
    return (conf1 + conf2) * .5

print(casualConf([1], [6]))

0.8


# Certainty Factor (CF)

In [11]:
def certFactor(ant, cons):
    return conf(ant,cons)-relSupp(cons)/relSupp(cons, negativo=True);

# Chi-Squared

http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.81.4523&rep=rep1&type=pdf

ftp://ftp.cse.buffalo.edu/users/azhang/disc/disc01/cd1/out/websites/kdd_explorations_full/ahmed.pdf

https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.chisquare.html

https://stackoverflow.com/questions/9330114/chi-squared-test-in-python

In [12]:
def chiSqrd(antc, cons):
    #    l1 = [5,15,5]
    #    l2 = [50,10,15]
    #    l3 = [55,25,20]    
    #    a= np.matrix([l1, l2, l3])
    
    lin = [intersect(antc, dados), intersect(antc, dadosNeg)]
    col = [intersect(cons, dados), intersect(cons, dadosNeg)]

    tbCtg = np.matrix([[np.sum(i*j) for i in lin] for j in col])
    df = tbCtg.shape[1] - 1
    observado = tbCtg.getA1()
    
    calcEsp = lambda i, j : np.sum(tbCtg[i,:])*np.sum(tbCtg[:,j])/np.sum(tbCtg)    
    esperado = [calcEsp(i,j) for i in range(tbCtg.shape[0]) for j in range(tbCtg.shape[0])]
    
    chi, p = chisquare(f_obs=observado, f_exp=esperado, ddof=df)    
    return chi

print(chiSqrd([1],[6]))

5.0


# Cross-Support Ratio

In [13]:
def crossSuppRatio(itemset):
    suportes = [abSupp(i) for i in itemset]
    return min(suportes)/max(suportes)

# Collective Strength ????
http://www09.sigmod.org/disc/disc99/disc/sigmod_papers/slipp_a_new_framework_for_/slides.pdf

In [14]:
def collectiveStrength(itemset):
    violation = dados.shape[0]-abSupp(itemset)-abSupp(itemset, negativo=True)
    violation /= dados.shape[0]

    sup = relSupp(itemset)
    
    return ((1-violation)/1-sup) * ((sup)/violation)
    
print(collectiveStrength([6,1]))    



0.5000000000000001


# Conviction

In [15]:
def conviction(ant, cons):
    return (1-relSupp(cons))/1-conf(ant, cons)

# Cosine

In [16]:
def cosine(antc, cons):
    return(relSupp(antc,cons)/(relSupp(antc), relSup(cons))**0.5)

# Coverage

In [17]:
def coverage(antc, cons):
    return relSupp(antc)