In [13]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [14]:
col_names = ['timestamp', 'acade-year', 'label', 'scale', 'gender', 'age', 'where', 'status', 'finan', 'copeup', 'fam', 'pressure', 'result', 'livingplace', 'support', 'smedia', 'infer', 'meal', 'sick', 'hobby', 'sleep']
# load dataset
df = pd.read_csv("depression_dataset.csv",names=col_names)

In [15]:
df.drop([0], axis=0, inplace=True)
df.drop(['timestamp'], axis=1, inplace=True)
df.head()

Unnamed: 0,acade-year,label,scale,gender,age,where,status,finan,copeup,fam,pressure,result,livingplace,support,smedia,infer,meal,sick,hobby,sleep
1,4th year,Normal,65,Male,22,Home,Single,Yes,3,Good,Yes,No,Yes,Family,Yes,Yes,Yes,No,No,8
2,4th year,Good,75,Male,22,Home,Single,Yes,3,Normal,Yes,No,Yes,Family,No,No,Yes,Yes,No,6
3,2nd year,Bad,25,Male,22,Home,Single,Yes,2,Normal,Yes,No,No,No one,Yes,Yes,Yes,Yes,Yes,6
4,3rd year,Bad,25,Male,22,Home,Single,Yes,1,Good,Yes,Yes,No,Family,Yes,Yes,Yes,Yes,Yes,6
5,1st year,Normal,50,Male,22,Home,Single,Yes,3,Good,Yes,No,Yes,No one,Yes,Yes,Yes,Yes,Yes,6


In [59]:
df['scale'] = df['scale'].astype('int')

In [60]:
basket = (df.groupby(['scale','label'])['scale']
          .sum().unstack().reset_index().fillna(0)
          .set_index('scale'))
basket.head()

label,Bad,Good,Normal,Very bad,Very good
scale,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0.0,0.0,0.0,1.0,0.0
6,0.0,0.0,0.0,6.0,0.0
8,0.0,0.0,0.0,8.0,0.0
9,0.0,0.0,9.0,9.0,0.0
10,0.0,0.0,0.0,880.0,0.0


In [61]:
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1
basket_sets = basket.applymap(encode_units)
basket_sets.head()

label,Bad,Good,Normal,Very bad,Very good
scale,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0,0,0,1,0
6,0,0,0,1,0
8,0,0,0,1,0
9,0,0,1,1,0
10,0,0,0,1,0


In [62]:
def APRIORI_MY(data, min_support=0.04,  max_length = 4):
    # Collecting Required Library
    import numpy as np
    import pandas as pd
    from itertools import combinations
    # Step 1:
    # Creating a dictionary to stored support of an itemset.
    support = {} 
    L = list(data.columns)
    
    # Step 2: 
    #generating combination of items with len i in ith iteration
    for i in range(1, max_length+1):
        c = set(combinations(L,i))
        
    # Reset "L" for next ith iteration
        L =set()     
    # Step 3: 
        #iterate through each item in "c"
        for j in list(c):
            #print(j)
            sup = data.loc[:,j].product(axis=1).sum()/len(data.index)
            if sup > min_support:
                #print(sup, j)
                support[j] = sup
                
                # Appending frequent itemset in list "L", already reset list "L" 
                L = list(set(L) | set(j))
        
    # Step 4: data frame with cols "items", 'support'
    result = pd.DataFrame(list(support.items()), columns = ["Items", "Support"])
    return(result)

In [63]:
my_freq_itemset = APRIORI_MY(basket_sets, 0.04, 3)
my_freq_itemset.sort_values(by = 'Support', ascending = False)

Unnamed: 0,Items,Support
1,"(Normal,)",0.285714
3,"(Good,)",0.265306
4,"(Bad,)",0.265306
2,"(Very good,)",0.22449
0,"(Very bad,)",0.163265
5,"(Good, Normal)",0.122449


In [64]:
def ASSOCIATION_RULE_MY(df, min_threshold=0.5):
    import pandas as pd
    from itertools import permutations
    
    # STEP 1:
    #creating required varaible
    support = pd.Series(df.Support.values, index=df.Items).to_dict()
    data = []
    L= df.Items.values
    
    # Step 2:
    #generating rule using permutation
    p = list(permutations(L, 2))
    
    # Iterating through each rule
    for i in p:
        
        # If LHS(Antecedent) of rule is subset of RHS then valid rule.
        if set(i[0]).issubset(i[1]):
            conf = support[i[1]]/support[i[0]]
            #print(i, conf)
            if conf > min_threshold:
                #print(i, conf)
                j = i[1][not i[1].index(i[0][0])]
                lift = support[i[1]]/(support[i[0]]* support[(j,)])
                leverage = support[i[1]] - (support[i[0]]* support[(j,)])
                data.append([i[0], (j,), support[i[0]], support[(j,)], support[i[1]], conf, lift, leverage])

         
    # STEP 3:
    result = pd.DataFrame(data, columns = ["antecedents", "consequents", "antecedent support", "consequent support",
                                        "support", "confidence", "Lift", "Leverage"])
    return(result)

In [65]:
my_rule = ASSOCIATION_RULE_MY(my_freq_itemset, 0.2)
my_rule

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,Lift,Leverage
0,"(Normal,)","(Good,)",0.285714,0.265306,0.122449,0.428571,1.615385,0.046647
1,"(Good,)","(Normal,)",0.265306,0.285714,0.122449,0.461538,1.615385,0.046647


In [66]:

my_rule.sort_values(by='Lift', ascending= False).head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,Lift,Leverage
0,"(Normal,)","(Good,)",0.285714,0.265306,0.122449,0.428571,1.615385,0.046647
1,"(Good,)","(Normal,)",0.265306,0.285714,0.122449,0.461538,1.615385,0.046647
