In [None]:
import nn_data
import torch
import torch.nn as nn
import numpy as np
import nn_files

In [None]:
HPO_PATH = 'data/hp.obo'
LABEVENTS_HPO_PATH = 'data/OUT_LABEVENTS_HPO.csv'
DIAGNOSES_HPO_PATH = 'data/DIAGNOSE_ICD_hpo.csv'


## Dataset loading

In [None]:
data = nn_data.LoadedData(HPO_PATH, LABEVENTS_HPO_PATH, DIAGNOSES_HPO_PATH)


In [None]:
input_data_creator = nn_data.HPODatasetCreator(
    data, mode='labevents', enable_parent_nodes=True)
target_data_creator = nn_data.HPODatasetCreator(
    data, mode='diagnoses', enable_parent_nodes=True)
# target_data_creator = nn_data.ICDDatasetCreator(data, batch = True)


## Model Creation

### Dataset Creation

In [None]:
input_data: list[list[int]] = input_data_creator.data()
target_data: list[list[int]] = target_data_creator.data()

input_tensor = torch.FloatTensor(input_data)
target_tensor = torch.FloatTensor(target_data)


### Accuracy Functions

In [None]:
def calc_accuracy(output, target) -> float:
    number_of_features = target.sum(axis=1)
    correctly_identified = (target * np.sqrt(output)).sum(axis=1)
    return np.mean(correctly_identified / (number_of_features + .00001))


In [None]:
def real_effect(outputs, targets):
    correct_diagnosed = 0
    false_positive = 0
    false_negative = 0
    total_to_diagnose = sum(targets[0])

    for i in range(len(outputs[0])):
        if(outputs[0, i] >= 0.5 and targets[0, i] == 1):
            correct_diagnosed += 1
        if(outputs[0, i] < 0.5 and targets[0, i] == 1):
            false_negative += 1
        if(outputs[0, i] > 0.5 and targets[0, i] == 0):
            false_positive += 1

    print("Correct diagnoses:" f'{correct_diagnosed}/{total_to_diagnose}')
    print("False positives:" f'{false_positive}')
    print("False negatives:" f'{false_negative}\n')


### Model generation

In [None]:
# device selection, where NN is trained
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


### Usage of Autoencoder

In [None]:
use_autoencoder = True  # True of False


### Model Architectures

In [None]:
if use_autoencoder:
    # Model of the AutoEncoder
    reduction_factor_hidden = 0.7
    reduction_factor_latent = 0.5

    input_size_AE = len(input_data[0])
    hidden_size_AE = int(input_size_AE*reduction_factor_hidden)
    latent_size_AE = int(input_size_AE*reduction_factor_latent)

    loss_func_AE = nn.MSELoss()
    loss_func_NN = nn.CrossEntropyLoss()

    AE = nn_files.Autoencoder(input_size_AE, hidden_size_AE, latent_size_AE)
    AE.to(device)

    # Model of NN with Encoder Structure
    enlarging_factor_NN = 1.4
    input_size_NN = latent_size_AE
else:
    AE = None

    loss_func_NN = nn.CrossEntropyLoss()

    # Model of NN without Encoder Structure
    enlarging_factor_NN = 1.4
    input_size_NN = len(input_data[0])

output_size_NN = len(target_data[0])
hidden_size_NN = int(max(input_size_NN, output_size_NN) * enlarging_factor_NN)

model = nn_files.FCNModel(input_size_NN, hidden_size_NN,
                    output_size_NN, enlarging_factor_NN, AE)
_ = model.to(device)


### Test with Training Function

### Train Autoencoder

In [None]:
if use_autoencoder:
    batch_size = 8
    learning_rate = 1e-2
    num_epochs = 60

    # optimizer
    # Bestimmung des Optimizers, standard: Adam
    optimizer = torch.optim.Adam(
        AE.parameters(),
        lr=learning_rate,
        betas=(0.9, 0.999),
    )

    dataset_AE = torch.utils.data.TensorDataset(input_tensor, input_tensor)
    dataset_AE_split = nn_files.split_dataset(batch_size, dataset_AE)

    # train autoencoder
    nn_files.training(
        AE, device, dataset_AE_split,
        optimizer=optimizer, loss_func=loss_func_AE,  num_epochs=num_epochs,
    )


In [None]:
if use_autoencoder:
    nn_files.test(
        AE, device, dataset_AE_split, data_creator=input_data_creator,
    )


### Train NN

In [None]:
if use_autoencoder:
    batch_size = 8
    learning_rate = 1e-4
    num_epochs = 20

    # optimizer
    # Bestimmung des Optimizers, standard: Adam
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=learning_rate,
        betas=(0.9, 0.999),
    )
else:
    batch_size = 8
    learning_rate = 1e-4
    num_epochs = 20

    # optimizer
    # Bestimmung des Optimizers, standard: Adam
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=learning_rate,
        betas=(0.9, 0.999),
    )


In [None]:
dataset = torch.utils.data.TensorDataset(input_tensor, target_tensor)
dataset_split = nn_files.split_dataset(batch_size, dataset)


In [None]:
nn_files.training(
    model, device, dataset_split,
    num_epochs=num_epochs,
    optimizer=optimizer, loss_func=loss_func_NN,
    real_effect=real_effect, calc_accuracy=calc_accuracy,
)


### Testing

In [None]:
nn_files.test(
    model, device, dataset_split,
    target_data_creator,
    real_effect=real_effect, calc_accuracy=calc_accuracy,
    sort_output_by_confidence=True,
)
