In [53]:
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
from scipy import stats
import threading

In [2]:
filename = 'dataset_diabetes/diabetic_data.csv'
data = pd.read_csv(filename).to_numpy()

In [3]:
# Count number of patients with more than one hospital stay
patient_nums = {}
duplicate_count = 0
for i in data:
    if i[1] in patient_nums:
        patient_nums[i[1]] += 1
    else:
        patient_nums[i[1]] = 1
for i in patient_nums:
    if patient_nums[i] > 1:
        duplicate_count += 1
print(duplicate_count)

16773


In [4]:
# Count steady patients for each medication
num_medications = 24
ind = 24
ids = []
medications = [[] for i in range(num_medications)]
for i in data:
    ids.append(i[0])
    for j in range(num_medications):
        if i[j + ind] == 'Steady':
            medications[j].append(i[0])

In [5]:
# ids to indices
from_id = {}
for i in range(len(ids)):
    from_id[ids[i]] = i

In [None]:
patients = np.zeros((len(ids), len(ids)))
for i in medications:
    for j in range(len(i)):
        for k in range(j + 1, len(i)):
            patients[from_id[i[j]], from_id[i[k]]] += 1
            patients[from_id[i[k]], from_id[i[j]]] += 1

In [6]:
# create network of medical specialties
meds = set()
for i in data:
    meds.add(i[11])
meds.remove('?')
nums_to_meds = list(meds)
meds_to_nums = {}
for ind, m in enumerate(nums_to_meds):
    meds_to_nums[m] = ind
num_meds = len(nums_to_meds)

In [40]:
adjacency = np.zeros((num_meds, num_meds))
counts = {}
for k in range(100000):
    ind = np.random.randint(0, len(data) - 1)
    i = data[ind]
    ind2 = np.random.randint(0, len(data) - 1)
    j = data[ind2]
    if (i[11] != '?') and (j[11] != '?') and (i[11] != j[11]):
        a = meds_to_nums[i[11]]
        b = meds_to_nums[j[11]]
        if i[2] == j[2]:
            adjacency[a, b] += 1
            adjacency[b, a] += 1
            if a in counts:
                counts[a] += 1
            else:
                counts[a] = 1
            if b in counts:
                counts[b] += 1
            else:
                counts[b] = 1
        if i[3] == j[3]:
            adjacency[a, b] += 1
            adjacency[b, a] += 1
            if a in counts:
                counts[a] += 1
            else:
                counts[a] = 1
            if b in counts:
                counts[b] += 1
            else:
                counts[b] = 1
adjacency /= np.mean(adjacency)

In [43]:
for i in range(len(adjacency)):
    try:
        adjacency[i,:] /= counts[i]
    except:
        pass
    try:
        adjacency[:,i] /= counts[i]
    except:
        pass

In [48]:
graph = nx.from_numpy_matrix(adjacency)

In [49]:
# Eigenvector centrality
eigenvector = nx.eigenvector_centrality(graph)
e2 = []
for i in eigenvector:
    e2.append((i, eigenvector[i]))
e2.sort(key = lambda x: x[1], reverse = True)
for i in range(5):
    print(nums_to_meds[e2[i][0]])

InternalMedicine
Emergency/Trauma
Cardiology
Family/GeneralPractice
Surgery-General


In [60]:
def thread_func(start, end, adjacency):
    print('thread from ' + str(start) + ' to ' + str(end) + ' starting')
    for ind in range(start, end):
        for ind2 in range(ind + 1, len(data)):
            i = data[ind]
            j = data[ind2]
            if (i[11] != '?') and (j[11] != '?') and (i[11] != j[11]):
                a = meds_to_nums[i[11]]
                b = meds_to_nums[j[11]]
                if i[2] == j[2]:
                    adjacency[a, b] += 1
                    adjacency[b, a] += 1
                if i[3] == j[3]:
                    adjacency[a, b] += 1
                    adjacency[b, a] += 1
    print('thread from ' + str(start) + ' to ' + str(end) + ' finishing')
num_threads = 200
arrays = [np.zeros((num_meds, num_meds)) for i in range(num_threads)]
start = 0
end = int(len(data) / num_threads)
for i in range(num_threads):
    if i == (num_threads - 1):
        end = len(data)
    x = threading.Thread(target=thread_func, args=(start, end, arrays[i]))
    x.start()
    start = end
    end = start + int(len(data) / num_threads)

thread from 0 to 508 starting
thread from 508 to 1016 starting
thread from 1016 to 1524 starting
thread from 1524 to 2032 starting
thread from 2032 to 2540 starting
thread from 2540 to 3048 starting
thread from 3048 to 3556 starting
thread from 3556 to 4064 startingthread from 4064 to 4572 starting

thread from 4572 to 5080 starting
thread from 5080 to 5588 starting
thread from 5588 to 6096 starting
thread from 6096 to 6604 startingthread from 6604 to 7112 starting
thread from 7112 to 7620 starting
thread from 7620 to 8128 starting

thread from 8128 to 8636 starting
thread from 8636 to 9144 starting
thread from 9144 to 9652 starting
thread from 9652 to 10160 starting
thread from 10160 to 10668 starting
thread from 10668 to 11176 starting
thread from 11176 to 11684 starting
thread from 11684 to 12192 starting
thread from 12192 to 12700 starting
thread from 12700 to 13208 starting
thread from 13208 to 13716 startingthread from 13716 to 14224 starting

thread from 14224 to 14732 starting
