In [218]:
import glob
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from collections import Counter
from collections import defaultdict

In [219]:
raw_data = []
for file in glob.glob('fhir/general/*'):
    raw_data.append(pd.read_json(file))

In [4]:
def dataPipeTuples(raw_data):
    training_data = {}
    for patient in raw_data:
        id = patient['entry'][0]['resource']['id']
        training_data[id] = {}
        #total number of times patient is inpatient for chf
        training_data[id]['total_chf'] = 0
        #first time of discharge for chf and second time admitted for chf
        training_data[id]['chf_dates'] = ()
        
        for entry in patient['entry']:
            resource_type = entry['resource']['resourceType']
            if resource_type == "Encounter":
                class_code = entry['resource']['class']['code']
                if class_code == "IMP" or class_code == "EMER":
                    try:
                        reason_code = entry['resource']['reasonCode'][0]['coding'][0]['code']
                    except:
                        continue
                    start_date = entry['resource']['period']['start'][0:10]
                    if (training_data[id]['total_chf'] == 1):
                        if (pd.date_range(training_data[id]['chf_dates'][0], start_date).shape[0] > 29):
                            if reason_code == "88805009":
                                training_data[id]['total_chf'] += 1
                    else:
                        if reason_code == "88805009":
                            training_data[id]['total_chf'] += 1
                            
                if (training_data[id]['total_chf'] == 1):
                    training_data[id]['chf_dates'] = (entry['resource']['period']['end'][0:10],)
                elif (training_data[id]['total_chf'] == 2):
                    training_data[id]['chf_dates'] = (training_data[id]['chf_dates'][0], start_date)
        if (training_data[id]['total_chf'] == 0):
            del training_data[id]

    return training_data

In [5]:
def dataPipeMatrix(raw_data, tuple_list):
    training_data = {}
    for patient in raw_data:
        id = patient['entry'][0]['resource']['id']
        if id in tuple_list:
            training_data[id] = defaultdict(Counter)
            check_date = tuple_list[id]['chf_dates'][0]
            for entry in patient['entry']:
                resource_type = entry['resource']['resourceType']

                if resource_type == "Encounter":
                    start_date = entry['resource']['period']['start'][0:7]
                    if pd.date_range(start_date, check_date).shape[0] != 0:
                        try:
                            reason_code = entry['resource']['reasonCode'][0]['coding'][0]['code']
                        except:
                            reason_code = entry['resource']['type'][0]['coding'][0]['code']
                        training_data[id][start_date][reason_code] = 1
                elif resource_type == "Observation":
                    start_date = entry['resource']['effectiveDateTime'][0:7]
                    if pd.date_range(start_date, check_date).shape[0] != 0:
                        reason_code = entry['resource']['code']['coding'][0]['code']
                        training_data[id][start_date][reason_code] = 1 
                elif resource_type == "Procedure":
                    start_date = entry['resource']['performedPeriod']['start'][0:7]
                    if pd.date_range(start_date, check_date).shape[0] != 0:
                        reason_code = entry['resource']['code']['coding'][0]['code']
                        training_data[id][start_date][reason_code] = 1
                elif resource_type == "Condition":
                    start_date = entry['resource']['onsetDateTime'][0:7]
                    if pd.date_range(start_date, check_date).shape[0] != 0:
                        reason_code = entry['resource']['code']['coding'][0]['code']
                        training_data[id][start_date][reason_code] = 1
                elif resource_type == "Immunization":
                    start_date = entry['resource']['occurrenceDateTime'][0:7]
                    if pd.date_range(start_date, check_date).shape[0] != 0:
                        reason_code = entry['resource']['vaccineCode']['coding'][0]['code']
                        training_data[id][start_date][reason_code] = 1
                elif resource_type == "MedicationRequest":
                    start_date = entry['resource']['authoredOn'][0:7]
                    if pd.date_range(start_date, check_date).shape[0] != 0:
                        reason_code = entry['resource']['medicationCodeableConcept']['coding'][0]['code']
                        training_data[id][start_date][reason_code] = 1

    return training_data

In [6]:
def addMonth(dt):
    month = int(dt[5:])
    year = int(dt[0:4])
    if month == 12:
        return str(year+1)+"-01"
    else:
        date = str(month+1).zfill(2)
        return dt[0:5]+date
def addYear(dt, num):
    year = int(dt[0:4])
    date = str(year+num)
    return date+dt[4:]

In [7]:
def dataPipeDataFrames(raw_data):
    frames = []
    tuples = dataPipeTuples(raw_data)
    data = dataPipeMatrix(raw_data, tuples)
    for id in data:
        df = pd.DataFrame.from_dict(data[id], orient="index")
        df = df.reset_index().rename(columns={'index':'Month'}).sort_values(by='Month', ascending=False)
        two_year_range = np.arange(np.datetime64(addYear(tuples[id]['chf_dates'][0][0:7], -2)), np.datetime64(addMonth(tuples[id]['chf_dates'][0][0:7]))).astype(str).tolist()
        df = df.set_index('Month').reindex(two_year_range).sort_values(by='Month', ascending=False).fillna(0)
        frames.append(df)
    all_columns = []
    for frame in frames:
        all_columns.extend(x for x in frame.columns.tolist() if not x in all_columns)
    final_frames = []
    for df in frames:
        cols = df.columns.tolist()
        cols.extend(x for x in all_columns if not x in cols)
        df = df.reindex(columns=sorted(cols, reverse=True), fill_value=0)
        final_frames.append(df)
    return (final_frames, tuples)

In [220]:
frames, tuples = dataPipeDataFrames(raw_data)

In [221]:
len(frames)

67

In [10]:
pd.DataFrame.from_dict(tuples, orient="index").shape

(47, 2)

In [222]:
frames[0].shape

(25, 327)

In [248]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data 
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
BATCH_SIZE = 2

In [249]:
labels = pd.DataFrame.from_dict(tuples, orient="index")
labels = labels.drop(columns=['chf_dates'])
labels = labels.mask(labels == 1, 0)
labels = labels.mask(labels > 1, 1)

In [250]:
ff = []
for df in frames:
    ff.append(df.values)
x = torch.Tensor(ff)
x = x.view(-1,1,25,327)
y = torch.Tensor(labels.values).squeeze().type(torch.LongTensor)
train = torch.utils.data.TensorDataset(x, y)
train_size = int(0.8 * len(train))
test_size = len(train) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(train, [train_size, test_size])

In [251]:
x.shape

torch.Size([67, 1, 25, 327])

In [252]:
def fit(model, train_loader):
    optimizer = torch.optim.Adam(model.parameters())#,lr=0.001, betas=(0.9,0.999))
    error = nn.CrossEntropyLoss()
    EPOCHS = 5
    model.train()
    for epoch in range(EPOCHS):
        correct = 0
        for batch_idx, (X_batch, y_batch) in enumerate(train_loader):
            var_X_batch = Variable(X_batch).float()
            #print(y_batch.shape)
            var_y_batch = Variable(y_batch)
            optimizer.zero_grad()
            output = model(var_X_batch)
            loss = error(output, var_y_batch)
            loss.backward()
            optimizer.step()

            # Total correct predictions
            predicted = torch.max(output.data, 1)[1] 
            correct += (predicted == var_y_batch).sum()
            #print(epoch)
            print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy:{:.3f}%'.format(
                    epoch, batch_idx*len(X_batch), len(train_loader.dataset), 100.*batch_idx / len(train_loader), loss.data, float(correct*100) / float(BATCH_SIZE*(batch_idx+1))))

In [253]:
def evaluate(model, test_loader):
    correct = 0 
    for test_imgs, test_labels in test_loader:
        #print(test_imgs.shape)
        test_imgs = Variable(test_imgs).float()
        output = model(test_imgs)
        predicted = torch.max(output,1)[1]
        correct += (predicted == test_labels).sum()
    print("Test accuracy:{:.3f}% ".format( float(correct) / (len(test_loader)*BATCH_SIZE)))

In [254]:
# Pytorch train and test sets
#train = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
#test = torch.utils.data.TensorDataset(torch_X_test,torch_y_test)

# data loader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = False)

In [255]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=(4, 79))
        self.conv2 = nn.Conv2d(32, 32, kernel_size=(4, 79))
        self.conv3 = nn.Conv2d(32,64, kernel_size=(4, 79))
        self.fc1 = nn.Linear(3*3*64, 256)
        self.fc2 = nn.Linear(256, 2)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        #x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(F.max_pool2d(self.conv3(x),2))
        x = F.dropout(x, p=0.5, training=self.training)
        #print(x.shape)
        x = x.view(-1,3*3*64 )
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
 
cnn = CNN()
print(cnn)

it = iter(train_loader)
X_batch, y_batch = next(it)
print(cnn.forward(X_batch).shape)

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(4, 79), stride=(1, 1))
  (conv2): Conv2d(32, 32, kernel_size=(4, 79), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(4, 79), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=2, bias=True)
)
torch.Size([2, 2])


In [256]:
fit(cnn,train_loader)



In [262]:
evaluate(cnn,test_loader)

Test accuracy:0.857% 
