In [8]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import collections
import Materials_Substitution as MS

Transactions_id = np.load("transaction_ofm1d0.npy") # Each T contains index of pair HG
List_of_HG = np.load("RTX7600_ofm_HG.npy")

In [69]:
Transaction_items = []
for i in Transactions_id:
    Atom_Guest = [ List_of_HG[j]["Guest_Atoms"][0] for j in i ]
    Transaction_items.append( Atom_Guest )

def transaction_vectorization(D=None):
    """
    param:
    - D: dataset contains transactions Ts, each T is a list of items
    return:
    - D_vectors: vectorization of D
    - itemset: labels corresponding to the dimensions in D_vectors
    """
    X = []
    for a in D:
        X += a
    itemset = np.array( collections.Counter( X ).keys() )
    
    ref = MS.Element_onehot_encode( list_elements=itemset )
    
    D_vectors = []
    for a in D:
        D_vectors.append( MS.List_atom_vectorization( Atoms=a, ref=ref ) )
    D_vectors = np.array(D_vectors)
    D_vectors[D_vectors>1] = 1
    
    itemset_vectors = []
    for item in itemset:
        itemset_vectors.append( ref[item] )
    itemset_vectors = np.array( itemset_vectors )
    return D_vectors, itemset, itemset_vectors

def check_one_hot(X=None):
    X[X>=1] = 1
    X[X<1] = 0
    return X

def sort_k_itemsets( L_k=None ):
    L_k = check_one_hot(L_k)
    d = len(L_k.T)
    
    binary_to_decimal = 2**( np.arange(d)[::-1] )
    binary_to_decimal = L_k*binary_to_decimal
    binary_to_decimal = np.sum( binary_to_decimal, axis=1 )
    index = np.argsort(binary_to_decimal)[::-1]
    return L_k[index]

def apriori_algorithm(L_k_1=None):
    L_k_1 = sort_k_itemsets( L_k=L_k_1 )
    
    L_k = []
    for i in range( len(L_k_1) ):
        id1 = np.where( L_k_1[i]==1 )[0]
        if len(id1)>1:
            id1 = id1[:-1]
        
            id2 = np.sum( L_k_1[i:, id1], axis=1 )==len(id1)
            if len( L_k_1[i:][id2] )>1:
                L_k += list( L_k_1[i] + L_k_1[i:][id2][1:] )
            else:
                continue
        else:
            L_k += list( L_k_1[i]+L_k_1[(i+1):])
            
    L_k = np.array( L_k )
    return sort_k_itemsets( L_k )

def check_frequent( D=None, itemsets=None, s_threshold=None ):
    results = []
    for itemset in itemsets:
        id1 = np.where( itemset==1 )[0]
        id2 = np.sum( D[:,id1], axis=1 )==len(id1)
        if len(D[id2]) >= s_threshold:
            results.append(itemset)
        else:
            continue
    return np.array( results )

def association_rule_mining(transactions=None, k_max=None, s_threshold=None):
    D_vectors, itemsets, itemset_vectors = transaction_vectorization(D=transactions)
    
    L1 = check_frequent(D=D_vectors, itemsets=itemset_vectors, s_threshold=s_threshold)
    L = []
    L += list(L1)
    for k in range(k_max-2):
        if len(L1)>1:
            print len(L1)
            L_k = apriori_algorithm(L_k_1=L1)
            L_k = check_frequent( D=D_vectors, itemsets=L_k, s_threshold=s_threshold )
            L += list(L_k)
            L1 = L_k
            #if k==2:
                #for m in L_k:
                    #print itemsets[np.where(m==1)[0]]
        else:
            break
    return np.array(L)
%time results = association_rule_mining(transactions=Transaction_items, k_max=5, s_threshold=200)

32
354
2926
[u'Pr' u'La' u'Na' u'Nd']
[u'Pr' u'La' u'Na' u'Li']
[u'Pr' u'La' u'Na' u'Lu']
[u'Pr' u'La' u'Na' u'Tb']
[u'Pr' u'La' u'Na' u'Pm']
[u'Pr' u'La' u'Na' u'Yb']
[u'Pr' u'La' u'Na' u'Ge']
[u'Pr' u'La' u'Na' u'Dy']
[u'Pr' u'La' u'Na' u'Ho']
[u'Pr' u'La' u'Na' u'Mg']
[u'Pr' u'La' u'Na' u'B']
[u'Pr' u'La' u'Na' u'K']
[u'Pr' u'La' u'Na' u'Si']
[u'Pr' u'La' u'Na' u'Sm']
[u'Pr' u'La' u'Na' u'Y']
[u'Pr' u'La' u'Na' u'Ca']
[u'Pr' u'La' u'Na' u'Al']
[u'Pr' u'La' u'Na' u'Ce']
[u'Pr' u'La' u'Na' u'As']
[u'Pr' u'La' u'Na' u'Gd']
[u'Pr' u'La' u'Na' u'Ga']
[u'Pr' u'La' u'Na' u'Eu']
[u'Pr' u'La' u'Na' u'Tm']
[u'Pr' u'La' u'Na' u'Er']
[u'Pr' u'La' u'Nd' u'Li']
[u'Pr' u'La' u'Nd' u'Lu']
[u'Pr' u'La' u'Nd' u'Tb']
[u'Pr' u'La' u'Nd' u'Pm']
[u'Pr' u'La' u'Nd' u'Yb']
[u'Pr' u'La' u'Nd' u'Ge']
[u'Pr' u'La' u'Nd' u'Dy']
[u'Pr' u'La' u'Nd' u'Ho']
[u'Pr' u'La' u'Nd' u'Mg']
[u'Pr' u'La' u'Nd' u'B']
[u'Pr' u'La' u'Nd' u'K']
[u'Pr' u'La' u'Nd' u'Si']
[u'Pr' u'La' u'Nd' u'Sm']
[u'Pr' u'La' u'Nd' u'Y']
[u'Pr'

In [2]:
a = int(raw_input("please enter a random number:"))
print a

please enter a random number:32
32
