In [None]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
from algorithms.autoencoder import autoencoder as ae
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder as onehot
from imblearn.over_sampling import RandomOverSampler

path = "/media/jahan/solo/"
# path = "/Volumes/JAHAN/"
os.chdir(path)

# prep data
def prep(csv):   
    labels = pd.read_csv(csv)
    labels = labels[labels["Frontal/Lateral"] == "Frontal"]
    normal = labels[labels["No Finding"] == 1]
    normal = normal.Path
    abnormal = labels[labels["No Finding"] != 1]
    abnormal = abnormal.Path
    print(len(normal),len(abnormal))

    x_a = pd.concat([normal,abnormal]).values
    x_b = pd.concat([normal.sample(frac=1),abnormal.sample(frac=1)]).values
    x_c = pd.concat([abnormal.sample(n=len(normal)),normal.sample(n=len(abnormal),replace=True)]).values
    
    x = np.stack((x_a,x_b,x_c), axis=-1)
    
    y = np.full(len(normal),0)
    y = np.append(y, np.full(len(abnormal),1))
    
    sm = RandomOverSampler(random_state=42)
    X_res, y_res = sm.fit_resample(x, y)
    
    print(X_res.shape)
    
    X = np.stack((X_res[:,0], X_res[:,1], X_res[:,2], y_res), axis=-1)
    np.random.shuffle(X)
    
    return X[:,0], X[:,1], X[:,2], X[:,3]

x_a,x_b,x_c, y = prep("CheXpert-v1.0-small/train.csv")

print(y)

config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.9
# config.gpu_options.allow_growth = True
sess = tf.Session(config=config)


save_path = "/home/jahan/Documents/CXR_CNN/output/triple"
# save_path = "/Users/jahan/Documents/research/CXR_CNN/output"
print(x_a.size)

In [None]:
strategy = tf.distribute.MirroredStrategy()
# strategy = tf.distribute.experimental.CentralStorageStrategy()
with strategy.scope():
    encoder = ae(save_path=save_path)
#     encoder.build(load="/triple.ckpt")
    encoder.build()

In [None]:
with strategy.scope():
    history = encoder.train(a_paths=x_a, b_paths=x_b, c_paths=x_c, epochs=5)

In [None]:
labels = pd.read_csv("CheXpert-v1.0-small/valid.csv")
labels = labels[labels["Frontal/Lateral"] == "Frontal"]
normal = labels[labels["No Finding"] == 1]
normal = normal.Path
abnormal = labels[labels["No Finding"] != 1]
abnormal = abnormal.Path
print(len(normal),len(abnormal))

x_a = pd.concat([normal,abnormal]).values
x_b = pd.concat([normal.sample(frac=1),abnormal.sample(frac=1)]).values
x_c = pd.concat([abnormal.sample(n=len(normal)),normal.sample(n=len(abnormal),replace=True)]).values

y = np.full(len(normal),0)
y = np.append(y, np.full(len(abnormal),1))

print(x_a.size)

In [None]:
probs = encoder.predict_proba(x_a,x_b,x_c, y)
print(probs)

In [None]:
from sklearn.metrics import roc_auc_score

auc = roc_auc_score(y, probs)

print(auc)