In [1]:
%matplotlib inline
%timeit

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import os
import time
import pickle
import datetime
import re
import pyActigraphy

import sys
sys.path.append('/home/ngrav/project/')
from wearables.scripts import utils as wearutils
from wearables.scripts import data as weardata
from wearables.scripts import train as weartrain
from wearables.scripts import eval_ as weareval
from wearables.scripts import model as wearmodels
from wearables.scripts import DTW as weardtw

import torch
import torch.nn as nn
import torch.nn.functional as F

from scipy.spatial.distance import pdist, squareform
import fastdtw
import umap

plt.rc('font', size = 9)
plt.rc('font', family='sans serif')
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rcParams['legend.frameon']=False
plt.rcParams['axes.grid']=False
plt.rcParams['legend.markerscale']=1
plt.rcParams['savefig.dpi'] = 600
sns.set_style("ticks")

In [2]:
# load data
mfp = '/home/ngrav/project/wearables/model_zoo'
bst_modelpkl = os.path.join(mfp, '213-itv52_InceptionTime_GA5.pkl')
bst_trainer = os.path.join(mfp, 'trainer_itv52_InceptionTime_GA5.pkl')

pfp = '/home/ngrav/project/wearables/results/'
md = pd.read_csv(os.path.join(pfp, 'md_v52_220111.csv'), index_col=0)

In [3]:
def merge_out2md(md, bst_trainerfp, bst_modelfp, return_embeds=True, out_file=None, verbose=False):
    def loadpkl(file):
        with open(file, 'rb') as f:
            data = pickle.load(f)
            f.close()
        return data
    trainer = loadpkl(bst_trainerfp)
    if verbose:
        total_t = time.time()
    dt = pd.DataFrame()
    if return_embeds:
        embeds = pd.DataFrame()
    for split in ['train', 'val', 'test']:
        if verbose:
            tic = time.time()
            print('Starting inference for {} set...'.format(split))

        evaluation = weareval.eval_trained(trainer, split=split,
                                           modelpkl=bst_modelfp,
                                           two_outputs=True)
        dt = dt.append(pd.DataFrame({
            'y':evaluation.y.numpy(), 'yhat':evaluation.yhat.numpy(), 
            'split':[split]*evaluation.y.shape[0],
            'error':(evaluation.yhat - evaluation.y).numpy()
        }, index=evaluation.id))
        if return_embeds:
            embeds = embeds.append(pd.DataFrame(evaluation.out2.numpy(), index=evaluation.id))
        if verbose:
            print('  inference for {} set done in {:.0f}-s\t{:.2f}-min elapsed'.format(split, time.time()-tic, (time.time()-total_t)/60))
    md = md.merge(dt, left_index=True, right_index=True, how='left')
    if out_file is not None:
        md.to_csv(out_file)
    if return_embeds:
        if out_file is not None:
            embeds.to_csv(os.path.join(os.path.split(out_file)[0], 'embeds_v52.csv'))
        return md, embeds
    else:
        return md

In [4]:
md, embeds = merge_out2md(md, bst_trainer, bst_modelpkl, out_file=os.path.join(pfp, 'md_v52_220112.csv'), verbose=True)

Starting inference for train set...


  return F.conv1d(input, weight, bias, self.stride,
  return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)


  inference for train set done in 554-s	9.23-min elapsed
Starting inference for val set...




  inference for val set done in 115-s	11.14-min elapsed
Starting inference for test set...


  return F.mse_loss(input, target, reduction=self.reduction)


  inference for test set done in 216-s	14.74-min elapsed


In [5]:
md.head()

Unnamed: 0,record_id,age_enroll,marital,gestage_by,insur,ethnicity,race,bmi_1vis,prior_ptb_all,fullterm_births,...,ave_logpseudocount_sleep,ave_logpseudocount_wknd,ave_logpseudocount_wkday,ave_logpseudocount_day,ave_logpseudocount_night,Pre-term birth,y,yhat,split,error
1001_14,1001,26.0,0.0,1.0,1.0,0.0,1.0,34.820812,0.0,1.0,...,0.170627,1.146411,0.637768,1.010842,0.555347,False,14.0,17.139236,train,3.139236
1001_25,1001,26.0,0.0,1.0,1.0,0.0,1.0,34.820812,0.0,1.0,...,0.018178,0.021256,0.025278,0.024005,0.024252,False,25.0,18.259083,train,-6.740917
1001_32,1001,26.0,0.0,1.0,1.0,0.0,1.0,34.820812,0.0,1.0,...,0.001696,0.002719,0.003989,0.00408,0.003172,False,32.0,16.724869,train,-15.275131
1002_6,1002,22.0,0.0,1.0,1.0,0.0,1.0,23.355223,0.0,0.0,...,0.048014,0.034127,0.83202,0.820863,0.387239,False,6.0,19.194839,test,13.194839
1003_14,1003,23.0,0.0,1.0,1.0,0.0,1.0,27.193642,1.0,1.0,...,0.117282,0.148441,0.358667,0.405664,0.19154,False,14.0,30.649376,val,16.649376


In [18]:
md.index.duplicated().sum()

0

In [16]:
md = pd.read_csv(os.path.join(pfp, 'md_v52_220111.csv'), index_col=0)

In [17]:
md.shape

(2463, 122)