In [13]:
import tensorflow as tf
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# load model
with open('../nn_output/model.json', 'r') as json_file:
    model_json = json_file.read()
model = tf.keras.models.model_from_json(model_json) 
model.load_weights('../nn_output/model_checkpoint.h5')
with open('../nn_output/feature_scaler.pickle', 'rb') as handle:
    scalerx = pickle.load(handle)


In [14]:
# preprocess df BEFORE stratification
df = pd.read_csv('./data/10334-1.csv')

# preprocess
df = df[df.gender=='F']
df['date'] = pd.to_datetime(df.date)
df = df.sort_values(by='date', ascending=True)
df = df.drop_duplicates(subset='id', keep='last') # most recent record per patient
df = df.dropna(inplace=False, subset='value')

# outlier removal
df = df[(df.value >= np.quantile(df.value, 0.01)) & (df.value <= np.quantile(df.value, 0.99))]

len(df)


19625

In [15]:
def predict_df(df, filename):
    
    inpt = np.array(df.value).astype(float)
    
    pd.DataFrame({'value': inpt}).to_csv(filename)
    
    # standardize
    inpt_mean = inpt.mean()
    inpt_std = inpt.std()
    inpt = (inpt - inpt_mean)/inpt_std
    
    # feature extraction
    inpt = np.quantile(inpt, np.linspace(0, 1, 500))
    
    # feature scaling
    inpt = scalerx.transform(inpt.reshape(1, -1))[0]
    
    # predict
    p = model.predict(inpt.reshape(1, -1))
    p *= inpt_std
    p += inpt_mean
    
    return p
    

In [16]:
df['age_group_start'] = [int(i[1]['age-group'][:2]) if i[1]['age-group'][1]!='-' else 5 for i in df.iterrows()]


In [17]:
ages = []
ris = []
for i in sorted(list(set(df.age_group_start))):
    print(i)
    sub = df[df.age_group_start==i]
    ris.append(predict_df(sub, './data/age_binned/value_'+str(i)+'.csv'))
    ages.append(i)
    

5
10
15
20
25
30
35
40
45
50
55
60
65
70
75
80
85


In [18]:
np.save('./predictions_by_age.npy', ris)
np.save('./prediction_age_groups.npy', ages)


After the CSVs are generated from the above step, refineR predictions can be made on the same data using `../../../refineR/cancer_ag125/predict_age.R`
