In [1]:
import os
import re
from pathlib import Path
import numpy as np
import pandas as pd
import holoviews as hv
import hvplot.pandas
from holoviews import opts,dim
import matplotlib.pyplot as plt
import cv2
from IPython.display import display_png
opts.defaults(opts.Curve(width=600, framewise=True))
%matplotlib inline
hv.extension('bokeh')

### Data

In [2]:
data_path = Path('../data')

In [3]:
sig_files = [data_path/file for file in sorted(os.listdir(data_path)) if '.txt' in file]
info_files = [data_path/file for file in sorted(os.listdir(data_path)) if '.hea' in file]
file_names = [str(data_path/file)[:-4] for file in sorted(os.listdir(data_path)) if '.txt' in file]

In [4]:
sig_files[0]

PosixPath('../data/1001.txt')

In [5]:
label_df = pd.read_csv('../data/label/label_df.csv')

In [6]:
label_df.head()

Unnamed: 0,rec_id,step1,step2,step3,step4
0,1001,1,1,2,1
1,1002,1,2,3,2
2,1003,1,2,3,2
3,1004,2,2,3,2
4,1005,1,2,2,1


In [7]:
rec_labels = label_df[label_df.rec_id==1001]
rec_labels.values[0][1:]#[1]

array([1, 1, 2, 1])

### Record

In [8]:
class Record:
    def __init__(self, location):
        #'fhr' is unmodified data, 'fhr_pro' is zero-removed data
        self.rec_id = int(str(location).split('/')[-1])
        self.fhr,self.uc = self.read_signals(location) #fhr orignal,
        self.info,self.freq = self.read_info(location)      
        self.labels = self.get_labels()
        self.pos2stage = int(self.info['Pos'])         #beg of stage 2
        self.vline = hv.VLine(self.pos2stage).opts(color='black',line_dash = 'dotted')
        self.pH = self.info['pH']
        self.delType = self.info['Deliv']                   #Delivery Type
        self.fhr_pro,self.pos2stage_pro = self.preprocess_fhr()          #fhr processed ,pos2stage processed
        self.vline_pro = hv.VLine(self.pos2stage_pro).opts(color='green',line_dash = 'dotted')
       
    def __str__(self):
        return str(self.rec_id)
    
    def __repr__(self):
        return self.__str__()
    
    def read_signals(self,location):
        fhr_l = []
        uc_l = []
        f = open(str(location)+'.txt') #open signal data file
        for line in f.readlines():
            line = line.split('\t')
            fhr_v = int(line[1]) #fhr_value
            uc_v = int(line[2])  #uc_value
            fhr_l.append(fhr_v)
            uc_l.append(uc_v)
        fhr = np.asarray(fhr_l)/100 #fhr:fhr array
        uc = np.asarray(uc_l)/100 #uc:uc_array
        f.close()
        return fhr,uc
    
    def read_info(self,location):
        info = dict()
        f = open(str(location)+'.hea') #open info file
        lines = [line for line in f.readlines()]
        for line in lines[7:]:
            if '#' in line:
                if re.search(r'\d+', line):
                    info[re.search(r'\w+', line).group()] = re.search(r"[-+]?\d*\.\d+|\d+", line).group()
        f.close()
        freq = int(lines[0].split(' ')[2]) #Frequency
        return info,freq
        
    def get_info_df(self):
        df = pd.DataFrame.from_dict(self.info,orient = 'index')
        return df
    
    def get_labels(self):
        rec_labels = label_df[label_df.rec_id==self.rec_id]
        return rec_labels.values[0][1:]
    
    def preprocess_fhr(self):
        signal = self.fhr.copy()
        missing_count = 0
        corr_sig = []   #corrected_signal
        for i in signal:
            if i!= 0:
        #         print(i)
                corr_sig.append(i)
            else:
                missing_count+=1

        new_pos2stage = self.pos2stage - missing_count  #corrected pos2stage
        
        return np.asarray(corr_sig),new_pos2stage
    
    def plot_fhr(self,orignal = True):
        if orignal:
            fhr = self.fhr
            vline = self.vline
        else:
            fhr = self.fhr_pro
            vline = self.vline_pro
        hline_1  = hv.HLine(160,).opts(color='red',line_dash = 'dotted',width = 1,)
        hline_2  = hv.HLine(110,).opts(color='red',line_dash = 'dotted',width = 1)
        fhr_plot = hv.Curve(fhr,'Time','FHR')*vline * hline_1 * hline_2
        fhr_plot.opts(
            opts.Curve( height=400, width=700,xaxis=None, line_width=1, tools=['hover'],line_alpha = 1))
        return fhr_plot
       
    def plot_uc(self):
        uc_plot = hv.Curve(self.uc,'Time','UC')*self.vline#*self.vline
        uc_plot.opts(
            opts.Curve( height=250, width=700,xaxis=None, line_width=1, tools=['hover'],color = 'orange'))
        return uc_plot
    
    def plot_labels(self):
        label_plot = hv.Bars(self.labels)
        label_plot.opts(
             opts.Bars(height = 250, width = 250,color = 'green'))
        l_1 = hv.HLine(1).opts(color='blue',line_dash = 'dotted')
        l_2 = hv.HLine(2).opts(color='blue',line_dash = 'dotted')
        l_3 = hv.HLine(3).opts(color='blue',line_dash = 'dotted')
        return label_plot * l_1 * l_2 * l_3

In [9]:
r = Record(data_path/'1004')
r

1004

In [10]:
r.uc

array([46., 43., 43., ...,  0.,  0.,  0.])

In [11]:
# r.info
# r.info['pH']

In [12]:
df = r.get_info_df()
# df.loc['CK']

### Dynamic Plot  of Orignal Data

In [13]:
def load_signals(location, **kwargs):
    rec = Record(location)
    fhr_plot = rec.plot_fhr()
    uc_plot = rec.plot_uc()
    label_plot = rec.plot_labels()

    
    layout = fhr_plot + uc_plot + label_plot
    layout.cols(1)
    return layout

In [14]:
dmap = hv.DynamicMap(load_signals, kdims='FHR').redim.values(FHR=file_names)
dmap

110 to 160 is the range in which FHR usually lies.

### Moving Average

In [15]:
r = Record(data_path/'1001')

In [16]:
def ma(x, period, type_ ='simple'): 
    x = np.asarray(x)
    if type=='simple':
        weights = np.ones(period)
    else:
        weights = np.exp(np.linspace(-1., 0., period)) #expontial MA

    weights /= weights.sum()

    a =  np.convolve(x, weights, mode='full')[:len(x)]
    a[:period] = a[period]
    return a

In [17]:
period = 100
val = ma(r.fhr, period,'S')

In [18]:
fhr_plot = r.plot_fhr()

In [19]:
ema_plot = hv.Curve(val,'FHR_ema').opts(color = 'orange')

In [20]:
fhr_plot*ema_plot

### Fill by rolling mean

In [21]:
def previous_points(data,from_point,num_of_points):
    return data[from_point-num_of_points:from_point]

def pct_zero(signal):
    c = 0
    for i in signal:
        if i==0:
            c+=1
    return c/len(signal)*100

def fill_mean(rec,window_size):
    rolling_window = window_size
    signal = rec.fhr.copy() # We don't want to modify the actual signal by mistake so copy
#     print(pct_zero(signal))
    start_pt = rolling_window+1
    corrected_fhr = signal.copy()
    for i,c in enumerate(signal[start_pt:]):
        if c == 0:
            corrected_fhr[i+start_pt] = np.mean(previous_points(corrected_fhr,i+start_pt,rolling_window))
    return corrected_fhr

In [22]:
def load_signals(location, **kwargs):
    rec = Record(location)
    fhr_plot = rec.plot_fhr() 
    label_plot = rec.plot_labels()
    uc_plot = rec.plot_uc()
    corrected_fhr = fill_mean(rec,window_size = 50)
    mean_fill = hv.Curve(corrected_fhr).opts(color = 'orange',line_width =1,line_alpha = 0.9)
        
    val_simple = ma(corrected_fhr, period = 100,type_ = 'simple')
    ma_simple_plot = hv.Curve(val_simple,'FHR_ema').opts(color = 'red',line_width =1)
    
    uc_l1  = hv.HLine(20,).opts(line_dash = 'dotted',line_width = 1,color='red',)
    uc_l2  = hv.HLine(60,).opts(line_dash = 'dotted',line_width = 1,color='red',)
    uc_l3  = hv.HLine(100,).opts(line_dash = 'dotted',line_width = 1,color='red',)
    
    layout = fhr_plot * mean_fill  * ma_simple_plot + uc_plot*uc_l1*uc_l2*uc_l3 + label_plot 
    layout.cols(1)
    return layout

dmap = hv.DynamicMap(load_signals, kdims='FHR').redim.values(FHR=file_names)
dmap

### Missing values

In [23]:
# missing = [pct_zero(Record(name).fhr) for name in file_names]

In [24]:
# max(missing),min(missing),np.mean(missing)

In [25]:
# m = pd.DataFrame(missing,columns=['m_val'])

In [26]:
# len(m[m.m_val<1]),len(m[m.m_val<10]),len(m[m.m_val<20])