In [373]:
from pyrca.analyzers.bayesian import BayesianNetwork
import pandas as pd
import json

In [374]:
with open('data/data.json', encoding="utf-8") as f:
    data = json.load(f)
    
data = {k:v for k,v in list(data.items())[:20]}

In [375]:
symptoms = set()
for disease in data.values():
    symptoms.update(disease['keys']['common_keys'])

In [376]:
nodes = list(symptoms | data.keys())
df = pd.DataFrame(0, index=list(data.keys()), columns=list(symptoms))
graph_df = pd.DataFrame(0, index=nodes, columns=nodes)
for disease, info in data.items():
    for symptom in info['keys']['common_keys']:
        df.at[disease, symptom] = 1
for disease, info in data.items():
    for symptom in info['keys']['common_keys']:
        graph_df.at[disease, symptom] = 1

In [377]:
graph_df

Unnamed: 0,产后抑郁,精神状态异常,12岁之前起病,自发、令人苦恼、不愉悦的生殖器充血以及感觉到即将发生性高潮（PGAD/GPD）,偏执,愤怒和易激惹,严重的功能缺损,高热＞39.5°C(＞103°F),在休闲活动中不能保持安静,女性性功能障碍,...,无最近的伤恸病史,常常说太多的话,冲动行为,精力下降或疲劳感增加,5-羟色胺综合征,无法遵照指令或难以完成职责和分配的任务（非误解或对抗行为所引起）,感官现象,回避、厌恶或不愿参加需要持续脑力劳动的任务,呼吸迟缓,疲劳
产后抑郁,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
精神状态异常,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12岁之前起病,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
自发、令人苦恼、不愉悦的生殖器充血以及感觉到即将发生性高潮（PGAD/GPD）,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
偏执,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
无法遵照指令或难以完成职责和分配的任务（非误解或对抗行为所引起）,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
感官现象,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
回避、厌恶或不愿参加需要持续脑力劳动的任务,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
呼吸迟缓,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [378]:
model = BayesianNetwork(config=BayesianNetwork.config_class(graph=graph_df))

In [379]:
test_data = [[0 for i in range(128)] for j in range(128)]
data_df = pd.DataFrame(test_data, columns=nodes)
for i, (disease, info) in enumerate(data.items()):
    for symptom in info['keys']['common_keys']:
        data_df.at[i, symptom] = 1
        data_df.at[i, disease] = 1

In [380]:
# from pyrca.applications.example.rca import RCAEngine
# engine = RCAEngine()
# engine.build_causal_graph(
#     df=graph_df,
#     run_pdag2dag=True,
#     max_num_points=5000000,
#     verbose=True
# )
# bn = engine.train_bayesian_network(dfs=[data_df])
# bn.print_probabilities()

In [381]:
model.train([data_df])



In [382]:
from difflib import SequenceMatcher

In [383]:
def sim_ratio(a, b):
    return SequenceMatcher(None, a, b).ratio()

In [384]:
def format_root_causes(results):
    output = []
    for node, prob in results['root_cause_nodes']:
        disease = node.split('_')[1]
        line = f"{disease}: {prob:.4f}"
        output.append(line)
    return "\n".join(output)

In [385]:
def find_matched_nodes(symp_list):
    matched_nodes = []
    nodes = list(symptoms)
    for symp in symp_list:
        max_ratio = 0
        matched_node = None
        for node in nodes:
            ratio = sim_ratio(symp, node)
            if ratio > max_ratio:
                max_ratio = ratio
                matched_node = node
        matched_nodes.append(matched_node)
    return matched_nodes

In [386]:
def diagnose(symp_list, model):
    matched_nodes = find_matched_nodes(symp_list)
    return format_root_causes(model.find_root_causes(matched_nodes).to_dict())
    

In [387]:
symp_list = ['瞳孔小', '思维奇怪', '容易愤怒']

In [388]:
print(diagnose(symp_list, model))





Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 1002.94it/s]
Eliminating: 阿片类药物过量: 100%|██████████| 1/1 [00:00<00:00, 1003.18it/s]
Finding Elimination Order: : 100%|██████████| 1/1 [00:00<?, ?it/s]
Eliminating: 人格障碍: 100%|██████████| 1/1 [00:00<00:00, 999.36it/s]
Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 1003.18it/s]
Eliminating: 广泛性焦虑障碍: 100%|██████████| 1/1 [00:00<00:00, 499.98it/s]
Finding Elimination Order: : 100%|██████████| 1/1 [00:00<?, ?it/s]
Eliminating: 阿片类药物过量: 100%|██████████| 1/1 [00:00<?, ?it/s]
Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 1000.79it/s]
Eliminating: ROOT_阿片类药物过量: 100%|██████████| 1/1 [00:00<?, ?it/s]
Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 996.75it/s]
Eliminating: 人格障碍: 100%|██████████| 1/1 [00:00<?, ?it/s]
Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 996.51it/s]
Eliminating: ROOT_人格障碍: 100%|██████████| 1/1 [00:00<00:00, 999.83it/s]
Finding Elimination Order

阿片类药物过量: 0.9802
人格障碍: 0.9802
广泛性焦虑障碍: 0.9802
