# Association Rule Mining

## 1. Load Libraries

In [25]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

## 2. Load Datasets

In [26]:
final_df = pd.read_csv('../final dataset/Ventolin_df.csv', index_col= 'Unnamed: 0')

In [27]:
ventolin_df = final_df.copy()

## 3. Create Age-Specific Dataframes

In [29]:
# Step 3: Create separate DataFrames for each age category
VentolinChildren_df = ventolin_df[ventolin_df['age_category'] == 'Children & Adolescents']
VentolinYoungAdults_df = ventolin_df[ventolin_df['age_category'] == 'Young Adults']
VentolinAdults_df = ventolin_df[ventolin_df['age_category'] == 'Adults']
VentolinSeniors_df = ventolin_df[ventolin_df['age_category'] == 'Seniors']
VentolinElderly_df = ventolin_df[ventolin_df['age_category'] == 'Elderly']


## 4. Associate Rule Mining Function

In [None]:
# Function to create transactions and perform association rule mining
def perform_association_rule_mining(data, min_support=0.01, metric = 'confidence'):
    '''
    transactions: all 'pt' events collated by primaryid. 
    frequent itemsets: through apriori calculation
    rules: assocation rules calculated from frequent itemsets by the confidence metric.
    '''

    # Create transactions by pt (adverse events)
    transactions = data.groupby(['primaryid']).apply(lambda x: x['pt'].dropna().tolist()).to_list()   

    # Perform one-hot encoding of transactions
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)
    
    # Generate frequent itemsets
    min_support = min_support
    frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
    
    # Generate association rules
    metric = metric
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.1)
    
    return rules

### 4.1 Assocation Rules Mining by Cateogry

### Associate Rule Mining for entire dataset

In [None]:
Ventolin_rules = perform_association_rule_mining(ventolin_df, min_support=0.01, metric='confidence')
Ventolin_rules.sort_values(['confidence'],ascending=False).head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
3,(Chest discomfort),(Dyspnoea),0.017592,0.087411,0.010445,0.59375,6.792649,0.008908,2.246374,0.868053
13,(Vomiting),(Nausea),0.02309,0.055525,0.013194,0.571429,10.291372,0.011912,2.203775,0.92417
4,(Cough),(Dyspnoea),0.031886,0.087411,0.012095,0.37931,4.339406,0.009307,1.470283,0.7949
9,(Product substitution issue),(Drug ineffective),0.030236,0.035184,0.010995,0.363636,10.335227,0.009931,1.516139,0.931406
6,(Dizziness),(Nausea),0.042881,0.055525,0.015393,0.358974,6.465093,0.013012,1.473381,0.883195
0,(Asthenia),(Fatigue),0.030786,0.051127,0.010995,0.357143,6.985407,0.009421,1.476025,0.884061
8,(Drug ineffective),(Product substitution issue),0.035184,0.030236,0.010995,0.3125,10.335227,0.009931,1.410565,0.936182
7,(Nausea),(Dizziness),0.055525,0.042881,0.015393,0.277228,6.465093,0.013012,1.324234,0.895019
10,(Headache),(Nausea),0.051127,0.055525,0.013194,0.258065,4.647716,0.010355,1.272988,0.827129
11,(Nausea),(Headache),0.055525,0.051127,0.013194,0.237624,4.647716,0.010355,1.244626,0.830981


### Association Rules by Age Categories

In [None]:
Adult_rules = perform_association_rule_mining(VentolinAdults_df, min_support=0.01, metric='confidence')
Child_rules = perform_association_rule_mining(VentolinChildren_df, min_support=0.01,metric='confidence')
YoungAdult_rules = perform_association_rule_mining(VentolinYoungAdults_df, min_support=0.01,metric='confidence')
Elderly_rules = perform_association_rule_mining(VentolinElderly_df, min_support=0.01,metric='confidence')
Senior_rules = perform_association_rule_mining(VentolinSeniors_df, min_support=0.01,metric='confidence')

  transactions = data.groupby(['primaryid']).apply(lambda x: x['pt'].dropna().tolist()).to_list()
  transactions = data.groupby(['primaryid']).apply(lambda x: x['pt'].dropna().tolist()).to_list()
  transactions = data.groupby(['primaryid']).apply(lambda x: x['pt'].dropna().tolist()).to_list()
  transactions = data.groupby(['primaryid']).apply(lambda x: x['pt'].dropna().tolist()).to_list()
  transactions = data.groupby(['primaryid']).apply(lambda x: x['pt'].dropna().tolist()).to_list()


#### Association Rules by Adults

In [None]:
Adult_rules.sort_values(['confidence'],ascending=False).head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
427,"(Sleep disorder, Hypoaesthesia oral)","(Convulsion, Drug interaction)",0.010076,0.012594,0.010076,1.0,79.4,0.009949,inf,0.997455
413,"(Paralysis, Hypoaesthesia oral)","(Convulsion, Drug interaction)",0.010076,0.012594,0.010076,1.0,79.4,0.009949,inf,0.997455
923,"(Paralysis, Hypoaesthesia oral, Gait disturbance)","(Sleep disorder, Convulsion)",0.010076,0.012594,0.010076,1.0,79.4,0.009949,inf,0.997455
432,"(Convulsion, Drug interaction)","(Sleep disorder, Hypoaesthesia oral)",0.012594,0.010076,0.010076,0.8,79.4,0.009949,4.949622,1.0
431,"(Hypoaesthesia oral, Drug interaction)","(Sleep disorder, Convulsion)",0.010076,0.012594,0.010076,1.0,79.4,0.009949,inf,0.997455
430,"(Hypoaesthesia oral, Convulsion)","(Sleep disorder, Drug interaction)",0.010076,0.012594,0.010076,1.0,79.4,0.009949,inf,0.997455
429,"(Sleep disorder, Drug interaction)","(Hypoaesthesia oral, Convulsion)",0.012594,0.010076,0.010076,0.8,79.4,0.009949,4.949622,1.0
428,"(Sleep disorder, Convulsion)","(Hypoaesthesia oral, Drug interaction)",0.012594,0.010076,0.010076,0.8,79.4,0.009949,4.949622,1.0
866,"(Hypoaesthesia oral, Convulsion, Drug interact...","(Sleep disorder, Paralysis)",0.010076,0.012594,0.010076,1.0,79.4,0.009949,inf,0.997455
924,"(Sleep disorder, Paralysis, Hypoaesthesia oral)","(Gait disturbance, Convulsion)",0.010076,0.012594,0.010076,1.0,79.4,0.009949,inf,0.997455


#### Association Rules by Elderly

In [None]:
Elderly_rules.sort_values(['confidence'],ascending=False).head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
15,(Feeling abnormal),(Dizziness),0.018248,0.051095,0.014599,0.8,15.657143,0.013666,4.744526,0.953532
14,(Dizziness),(Feeling abnormal),0.051095,0.018248,0.014599,0.285714,15.657143,0.013666,1.374453,0.986538
28,(Nausea),(Vomiting),0.047445,0.025547,0.018248,0.384615,15.054945,0.017036,1.583485,0.980077
29,(Vomiting),(Nausea),0.025547,0.047445,0.018248,0.714286,15.054945,0.017036,3.333942,0.958052
27,(Headache),(Malaise),0.025547,0.029197,0.010949,0.428571,14.678571,0.010203,1.698905,0.956305
26,(Malaise),(Headache),0.029197,0.025547,0.010949,0.375,14.678571,0.010203,1.559124,0.9599
23,(Visual impairment),(Dizziness),0.014599,0.051095,0.010949,0.75,14.678571,0.010203,3.79562,0.945679
22,(Dizziness),(Visual impairment),0.051095,0.014599,0.010949,0.214286,14.678571,0.010203,1.254147,0.982051
11,(Chills),(Dyspnoea),0.014599,0.065693,0.010949,0.75,11.416667,0.00999,3.737226,0.925926
10,(Dyspnoea),(Chills),0.065693,0.014599,0.010949,0.166667,11.416667,0.00999,1.182482,0.976563


#### Association Rules by Seniors

In [None]:
Senior_rules.sort_values(['confidence'], ascending=False).head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Arthralgia),(Myalgia),0.02746,0.016018,0.010297,0.375,23.410714,0.009858,1.574371,0.984314
1,(Myalgia),(Arthralgia),0.016018,0.02746,0.010297,0.642857,23.410714,0.009858,2.723112,0.972868
10,(Drug ineffective),(Product substitution issue),0.044622,0.028604,0.010297,0.230769,8.067692,0.009021,1.262815,0.916966
11,(Product substitution issue),(Drug ineffective),0.028604,0.044622,0.010297,0.36,8.067692,0.009021,1.492777,0.901845
8,(Dizziness),(Nausea),0.040046,0.051487,0.014874,0.371429,7.213968,0.012812,1.508997,0.897314
9,(Nausea),(Dizziness),0.051487,0.040046,0.014874,0.288889,7.213968,0.012812,1.349936,0.908138
2,(Dyspnoea),(Chest discomfort),0.100686,0.022883,0.01373,0.136364,5.959091,0.011426,1.131398,0.92536
3,(Chest discomfort),(Dyspnoea),0.022883,0.100686,0.01373,0.6,5.959091,0.011426,2.248284,0.851678
6,(Diarrhoea),(Nausea),0.037757,0.051487,0.010297,0.272727,5.29697,0.008353,1.304205,0.843044
7,(Nausea),(Diarrhoea),0.051487,0.037757,0.010297,0.2,5.29697,0.008353,1.202803,0.855247


#### Association Rules by Young Adults

In [None]:
YoungAdult_rules.sort_values(['confidence'], ascending=False).head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
111,(Hypertension),"(Muscle twitching, Convulsion)",0.012658,0.012658,0.012658,1.0,79.0,0.012498,inf,1.0
113,(Muscle twitching),"(Hypertension, Convulsion)",0.012658,0.012658,0.012658,1.0,79.0,0.012498,inf,1.0
108,"(Hypertension, Convulsion)",(Muscle twitching),0.012658,0.012658,0.012658,1.0,79.0,0.012498,inf,1.0
29,(Convulsion),(Muscle twitching),0.012658,0.012658,0.012658,1.0,79.0,0.012498,inf,1.0
28,(Muscle twitching),(Convulsion),0.012658,0.012658,0.012658,1.0,79.0,0.012498,inf,1.0
27,(Convulsion),(Hypertension),0.012658,0.012658,0.012658,1.0,79.0,0.012498,inf,1.0
26,(Hypertension),(Convulsion),0.012658,0.012658,0.012658,1.0,79.0,0.012498,inf,1.0
109,"(Hypertension, Muscle twitching)",(Convulsion),0.012658,0.012658,0.012658,1.0,79.0,0.012498,inf,1.0
110,"(Muscle twitching, Convulsion)",(Hypertension),0.012658,0.012658,0.012658,1.0,79.0,0.012498,inf,1.0
56,(Hypertension),(Muscle twitching),0.012658,0.012658,0.012658,1.0,79.0,0.012498,inf,1.0


#### Association Rules by Children & Adolescents

In [None]:
Child_rules.sort_values(['confidence'], ascending=False).head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
627,(Pain in extremity),"(Headache, Anxiety, Suicidal ideation, Aggress...",0.017241,0.017241,0.017241,1.0,58.0,0.016944,inf,1.0
385,(Pain in extremity),"(Headache, Suicidal ideation)",0.017241,0.017241,0.017241,1.0,58.0,0.016944,inf,1.0
462,"(Headache, Aggression)","(Pain in extremity, Anxiety)",0.017241,0.017241,0.017241,1.0,58.0,0.016944,inf,1.0
463,"(Pain in extremity, Anxiety)","(Headache, Aggression)",0.017241,0.017241,0.017241,1.0,58.0,0.016944,inf,1.0
465,"(Aggression, Pain in extremity)","(Headache, Anxiety)",0.017241,0.017241,0.017241,1.0,58.0,0.016944,inf,1.0
468,(Pain in extremity),"(Headache, Aggression, Anxiety)",0.017241,0.017241,0.017241,1.0,58.0,0.016944,inf,1.0
484,"(Mood swings, Aggression, Anxiety)",(Irritability),0.017241,0.017241,0.017241,1.0,58.0,0.016944,inf,1.0
487,"(Aggression, Anxiety, Irritability)",(Mood swings),0.017241,0.017241,0.017241,1.0,58.0,0.016944,inf,1.0
488,"(Mood swings, Anxiety)","(Aggression, Irritability)",0.017241,0.017241,0.017241,1.0,58.0,0.016944,inf,1.0
489,"(Mood swings, Aggression)","(Anxiety, Irritability)",0.017241,0.017241,0.017241,1.0,58.0,0.016944,inf,1.0
