In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules
import pickle

In [2]:
df = pd.read_pickle("../data/processed.pkl")
categorical_columns = df.select_dtypes(include=['category']).columns

In [3]:
with open("../data/data_element_details.pkl", "rb") as file:
    data_element_details = pickle.load(file)

In [4]:
keep_cols = []
for value in data_element_details.values():
    column_name = f"{value['Label']} ({value['SAS Variable Name']})"
    if value['Section Name'] in ["Social Determinants and Health Equity", "Adverse Childhood Experiences"] and column_name in df.columns:
        keep_cols.append(column_name)
df = df[keep_cols]

In [5]:
df

Unnamed: 0,"Live With Anyone Depressed, Mentally Ill, Or Suicidal? (ACEDEPRS)",Live With a Problem Drinker/Alcoholic? (ACEDRINK),Live With Anyone Who Used Illegal Drugs or Abused Prescriptions? (ACEDRUGS),Live With Anyone Who Served TIme in Prison or Jail? (ACEPRISN),Were Your Parents Divorced/Seperated? (ACEDIVRC),How Often Did Your Parents Beat Each Other Up? (ACEPUNCH),How Often Did A Parent Physically Hurt You In Any Way? (ACEHURT1),How Often Did A Parent Swear At You? (ACESWEAR),How Often Did Anyone Ever Touch You Sexually? (ACETOUCH),How Often Did Anyone Make You Touch Them Sexually? (ACETTHEM),...,Satisfaction with life (LSATISFY),How often get emotional support needed (EMTSUPRT),How often do you feel socially isolated from others? (SDHISOLT),Have you lost employment or had hours reduced? (SDHEMPLY),During the past 12 months have you received food stamps (FOODSTMP),"How often did the food that you bought not last, and you didnt have money to get more? (SDHFOOD1)",Were you not able to pay your bills? (SDHBILLS),Were you not able to pay utility bills or threatened to lose service? (SDHUTILS),"Has a lack of reliable transportation kept you from appointments, meetings, work, or getting things needed (SDHTRNSP)",How often have you felt this kind of stress? (SDHSTRE1)
0,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,...,1 - Very satisfied,1 - Always,5 - Never,2 - No,2 - No,5 - Never,2 - No,2 - No,2 - No,4 - Rarely
1,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,...,1 - Very satisfied,1 - Always,5 - Never,2 - No,2 - No,5 - Never,2 - No,2 - No,2 - No,5 - Never
2,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,...,2 - Satisfied,2 - Usually,3 - Sometimes,2 - No,2 - No,5 - Never,2 - No,2 - No,2 - No,5 - Never
3,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,...,1 - Very satisfied,1 - Always,3 - Sometimes,2 - No,2 - No,5 - Never,2 - No,2 - No,2 - No,5 - Never
4,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,...,1 - Very satisfied,1 - Always,5 - Never,2 - No,2 - No,5 - Never,2 - No,2 - No,2 - No,5 - Never
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
445127,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,...,7 - Dont know/Not sure,3 - Sometimes,3 - Sometimes,1 - Yes,1 - Yes,3 - Sometimes,2 - No,2 - No,2 - No,2 - Usually
445128,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,...,1 - Very satisfied,1 - Always,5 - Never,2 - No,1 - Yes,5 - Never,2 - No,2 - No,2 - No,5 - Never
445129,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,...,9 - Refused,5 - Never,1 - Always,2 - No,2 - No,5 - Never,2 - No,2 - No,1 - Yes,1 - Always
445130,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,BLANK - Not asked or Missing,...,1 - Very satisfied,2 - Usually,5 - Never,2 - No,2 - No,5 - Never,2 - No,2 - No,2 - No,4 - Rarely


In [6]:
# df = df[categorical_columns]
transactions = []
for _, row in df.iterrows():
    transaction = [f"{col}:{val}" for col, val in zip(df.columns, row) if pd.notna(val) and not col.startswith("BLANK")]
    transactions.append(transaction)

In [7]:
# Encode the transactions
te = TransactionEncoder()
te_ary = te.fit_transform(transactions)
encoded_df = pd.DataFrame(te_ary, columns=te.columns_)

In [8]:
# Apply FP-Growth algorithm
frequent_itemsets = fpgrowth(encoded_df, min_support=0.05, use_colnames=True, max_len=5)

In [9]:
# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

In [16]:
pd.set_option('max_colwidth', 400)
rules[rules['consequents'] == frozenset({"Satisfaction with life (LSATISFY):1 - Very satisfied"})]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
365336,(How often get emotional support needed (EMTSUPRT):1 - Always),(Satisfaction with life (LSATISFY):1 - Very satisfied),0.265117,0.25667,0.171408,0.646536,2.518939,0.103360,2.102986,0.820549
430775,"(How often get emotional support needed (EMTSUPRT):1 - Always, Has a lack of reliable transportation kept you from appointments, meetings, work, or getting things needed (SDHTRNSP):2 - No)",(Satisfaction with life (LSATISFY):1 - Very satisfied),0.253527,0.25667,0.166456,0.656562,2.558002,0.101383,2.164380,0.815930
430780,"(Were you not able to pay utility bills or threatened to lose service? (SDHUTILS):2 - No, How often get emotional support needed (EMTSUPRT):1 - Always)",(Satisfaction with life (LSATISFY):1 - Very satisfied),0.252601,0.25667,0.165665,0.655837,2.555178,0.100830,2.159822,0.814342
430785,"(Were you not able to pay your bills? (SDHBILLS):2 - No, How often get emotional support needed (EMTSUPRT):1 - Always)",(Satisfaction with life (LSATISFY):1 - Very satisfied),0.247706,0.25667,0.163960,0.661914,2.578854,0.100382,2.198644,0.813819
430790,"(How often get emotional support needed (EMTSUPRT):1 - Always, Have you lost employment or had hours reduced? (SDHEMPLY):2 - No)",(Satisfaction with life (LSATISFY):1 - Very satisfied),0.245159,0.25667,0.161321,0.658025,2.563703,0.098396,2.173641,0.808036
...,...,...,...,...,...,...,...,...,...,...
771782,"(Were Your Parents Divorced/Seperated? (ACEDIVRC):BLANK - Not asked or Missing, How often get emotional support needed (EMTSUPRT):1 - Always, How often have you felt this kind of stress? (SDHSTRE1):5 - Never, How often do you feel socially isolated from others? (SDHISOLT):5 - Never)",(Satisfaction with life (LSATISFY):1 - Very satisfied),0.092847,0.25667,0.069964,0.753539,2.935828,0.046133,3.016011,0.726868
771790,"(Live With Anyone Who Served TIme in Prison or Jail? (ACEPRISN):BLANK - Not asked or Missing, How often have you felt this kind of stress? (SDHSTRE1):5 - Never, How often do you feel socially isolated from others? (SDHISOLT):5 - Never, How often get emotional support needed (EMTSUPRT):1 - Always)",(Satisfaction with life (LSATISFY):1 - Very satisfied),0.092847,0.25667,0.069964,0.753539,2.935828,0.046133,3.016011,0.726868
771798,"(Live With Anyone Who Used Illegal Drugs or Abused Prescriptions? (ACEDRUGS):BLANK - Not asked or Missing, How often have you felt this kind of stress? (SDHSTRE1):5 - Never, How often do you feel socially isolated from others? (SDHISOLT):5 - Never, How often get emotional support needed (EMTSUPRT):1 - Always)",(Satisfaction with life (LSATISFY):1 - Very satisfied),0.092847,0.25667,0.069964,0.753539,2.935828,0.046133,3.016011,0.726868
771806,"(Live With a Problem Drinker/Alcoholic? (ACEDRINK):BLANK - Not asked or Missing, How often get emotional support needed (EMTSUPRT):1 - Always, How often have you felt this kind of stress? (SDHSTRE1):5 - Never, How often do you feel socially isolated from others? (SDHISOLT):5 - Never)",(Satisfaction with life (LSATISFY):1 - Very satisfied),0.092847,0.25667,0.069964,0.753539,2.935828,0.046133,3.016011,0.726868


In [14]:
with open('../data/frequent_itemsets.pickle', 'wb') as handle:
    pickle.dump(frequent_itemsets, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('../data/association_rules.pickle', 'wb') as handle:
    pickle.dump(rules, handle, protocol=pickle.HIGHEST_PROTOCOL)