In [1]:
import tensorflow as tf
from tensorflow import keras

import os
from os import path

import tempfile
import time

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

import pandas as pd
pd.options.mode.chained_assignment = None

import seaborn as sns

import sklearn

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-2w5ejx31 because the default path (/home/juliuswa/installed/apache-tomcat-9.0.30/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [2]:
df_name = "95"

data_path = "../../RoboDocData"
df_path = f"{data_path}/{df_name}"

df = pd.read_pickle(f"{df_path}/dataframe_{df_name}.pkl")

In [5]:
with open(f"{data_path}/disease_matrix_raw.npy", 'rb') as f:
    disease_matrix_raw = np.load(f)

with open(f"{data_path}/symptoms_dict.npy", 'rb') as f:
    symptoms_dict = np.load(f)

with open(f"{data_path}/patient_history_dict.npy", 'rb') as f:
    patient_history_dict = np.load(f)

with open(f"{data_path}/diagnoses_dict.npy", 'rb') as f:
    diagnoses_dict = np.load(f)  

In [7]:
print(np.mean(np.sum(disease_matrix_raw, axis=1)))

11.280858494588722


In [8]:
def index_to_name(i: int):
    lenS = len(symptoms_dict)
    lenPH = len(patient_history_dict)
    lenD = len(diagnoses_dict)

    if i < lenS:
        return f"Symptom: {symptoms_dict[i][1]}"
    elif (i > lenS) & (i < lenS + lenPH):
        return f"History: {patient_history_dict[i - lenS][1]}"
    elif i > lenS + lenPH:
        return f"Disease: {diagnoses_dict[i - (lenS + lenPH)][1]}"
    else:
        return "Invalid index"

def index_to_description(i: int):
    lenS = len(symptoms_dict)
    lenPH = len(patient_history_dict)
    lenD = len(diagnoses_dict)

    if i < lenS:
        return f"{symptoms_dict[i][2]}"
    elif (i > lenS) & (i < lenS + lenPH):
        return f"{patient_history_dict[i - lenS][2]}"
    elif i > lenS + lenPH:
        return f"{diagnoses_dict[i - (lenS + lenPH)][2]}"
    else:
        return "Invalid index"

def print_disease_frequency(disease_matrix, n):
    diseases_count = np.sum(disease_matrix, axis=0)
    index_sorted = np.argsort(diseases_count)[::-1]

    for i in range(0, n):
        print(f"#{i+1:03}: {index_to_name(index_sorted[i])}, {int(diseases_count[index_sorted[i]])} times")

def print_disease_mortality(death_diseases, alive_diseases, n, lower_bound):
    death_diseases_count = np.sum(death_diseases, axis=0)
    alive_diseases_count = np.sum(alive_diseases, axis=0)

    diseases_mortality = np.zeros_like(death_diseases_count)
    for d in range(len(death_diseases_count)):
        if death_diseases_count[d] + alive_diseases_count[d] > lower_bound:
            diseases_mortality[d] = death_diseases_count[d] / max(death_diseases_count[d] + alive_diseases_count[d], 1)

    index_sorted = np.argsort(diseases_mortality)[::-1]

    for i in range(0, n):
        print(f"#{i+1:03}: {index_to_name(index_sorted[i])}")
        print(f"\t{index_to_description(index_sorted[i])}")
        print(f"\tkilled {int(death_diseases_count[index_sorted[i]])} people, {int(alive_diseases_count[index_sorted[i]])} survived")
        print(f"\t=> {diseases_mortality[index_sorted[i]]:.2f} mortality")

In [10]:
death_diseases = disease_matrix_raw[df.died_in_hospital == 1]
alive_diseases = disease_matrix_raw[df.died_in_hospital == 0]

print_disease_mortality(death_diseases, alive_diseases, 20, 20)

#001: Disease: Subdural hem-deep coma
	"Subdural hemorrhage following injury without mention of open intracranial wound
	killed 41 people, 3 survived
	=> 0.93 mortality
#002: Disease: Subarach hem-deep coma
	"Subarachnoid hemorrhage following injury without mention of open intracranial wound
	killed 36 people, 4 survived
	=> 0.90 mortality
#003: Disease: Cl skl base fx-deep coma
	"Closed fracture of base of skull with subarachnoid
	killed 35 people, 7 survived
	=> 0.83 mortality
#004: Disease: Brain hem NEC-deep coma
	"Other and unspecified intracranial hemorrhage following injury without mention of open intracranial wound
	killed 23 people, 5 survived
	=> 0.82 mortality
#005: History: Encountr palliative care
	Encounter for palliative care
	killed 731 people, 282 survived
	=> 0.72 mortality
#006: Symptom: Coma
	Coma
	killed 98 people, 51 survived
	=> 0.66 mortality
#007: Disease: Defibrination syndrome
	Defibrination syndrome
	killed 258 people, 181 survived
	=> 0.59 mortality
#008: D