In [2]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

In [7]:
df = pd.read_csv('mh_data_processed.csv', header=None, names=['PMID', 'MH'], on_bad_lines='skip')

grouped = df.groupby('PMID')['MH'].apply(list)

mh_list_df = grouped.apply(lambda x: ','.join(x)).reset_index(name='MH_terms')

mh_list_df['MH_terms'].to_csv("./JClose/mh/mh_pubmed_data_processed.csv", index=False, header=False)

grouped

PMID
10000       [MH-*Alleles, MH-Binding Sites, MH-Drug Stabil...
30000       [MH-Adrenergic beta-Antagonists, MH-Animals, M...
48000       [MH-Adolescent, MH-Adult, MH-Apgar Score, MH-B...
49000       [MH-Dihydrostreptomycin Sulfate, MH-Kinetics, ...
63000       [MH-Adult, MH-Electrocardiography, MH-Humans, ...
                                  ...                        
37861000    [MH-Humans, MH-*Pyroptosis, MH-Oxygen, MH-Gluc...
37899000    [MH-Humans, MH-*Health Personnel, MH-Qualitati...
37949000    [MH-Humans, MH-Fibrin, MH-Plasminogen Activato...
38007000    [MH-Male, MH-Humans, MH-Adult, MH-Monkeypox vi...
38059000    [MH-Humans, MH-*Melanoma, MH-Immunogenic Cell ...
Name: MH, Length: 1877, dtype: object

In [3]:
transactions = grouped['MH_terms'].tolist()

te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)


frequent_itemsets = apriori(df, min_support=0.1, use_colnames=True)
frequent_itemsets


Unnamed: 0,support,itemsets
0,0.189132,(MH-Adult)
1,0.117741,(MH-Aged)
2,0.251465,(MH-Animals)
3,0.362813,(MH-Female)
4,0.716036,(MH-Humans)
5,0.345765,(MH-Male)
6,0.176345,(MH-Middle Aged)
7,0.148641,"(MH-Female, MH-Adult)"
8,0.189132,"(MH-Humans, MH-Adult)"
9,0.139584,"(MH-Male, MH-Adult)"


In [4]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)
rules[['antecedents', 'consequents', 'support', 'confidence', 'lift', 'conviction']]

Unnamed: 0,antecedents,consequents,support,confidence,lift,conviction
0,(MH-Adult),(MH-Female),0.148641,0.785915,2.166172,2.976334
1,(MH-Adult),(MH-Humans),0.189132,1.0,1.396577,inf
2,(MH-Adult),(MH-Male),0.139584,0.738028,2.134482,2.49735
3,(MH-Middle Aged),(MH-Adult),0.114012,0.646526,3.418391,2.293995
4,(MH-Adult),(MH-Middle Aged),0.114012,0.602817,3.418391,2.073741
5,(MH-Aged),(MH-Humans),0.117741,1.0,1.396577,inf
6,(MH-Female),(MH-Humans),0.322323,0.888399,1.240719,2.544465
7,(MH-Male),(MH-Female),0.245605,0.710324,1.957823,2.199651
8,(MH-Female),(MH-Male),0.245605,0.676946,1.957823,2.025156
9,(MH-Middle Aged),(MH-Female),0.147576,0.836858,2.306582,3.90572
