In [1]:
import os
import pickle
import statistics
import numpy as np
import pandas as pd
from glob import glob
import sqlalchemy as sql
from tqdm import tqdm_notebook
from datetime import date, datetime, timedelta

## Flatline Algorithm Using Data on Big Mama

### Functions

In [33]:
folder = '16k_round_3'

Load list of dsns in this round

In [55]:
list_of_dsns = pickle.load(open(f'{folder}/rollups_dsns.p','rb'))

Pickle inclusion dsns (the only thing not taken into account yet will be those with no data)

    *Remember that we are only doing babies w/ bdays in a certain range and that are the first to use the device*

In [56]:
dsns_with_info = pickle.load(open('all_dsns_with_info.p','rb')) # includes cc dsns
baby_info = pickle.load(open('16k_reg_and_cc_bdays4.p','rb'))
v2_devices = pickle.load(open('/Users/brodriguez/Documents/Owlet-code/V2_monitoring_data/v2_devices.p', 'rb'))
inclusion_dsns = set(list_of_dsns).intersection(dsns_with_info).intersection(v2_devices).intersection(baby_info.dsn.values)

more_than_5_bdays = set(pickle.load(open('dsns_more_than_5_bdays.p', 'rb'))).intersection(inclusion_dsns)
for dsn in more_than_5_bdays:
    inclusion_dsns.remove(dsn)
    
pickle.dump(inclusion_dsns, open(f'/Users/brodriguez/Documents/Owlet-code/{folder}/inclusion_dsns.p', 'wb'))


Get locations for inclusion dsns

In [57]:
location_data = pd.read_csv('/Users/brodriguez/Documents/Owlet-code/GPS_locations_Mar_2019.csv', compression='gzip')
def dsn_in_16000(x):
    if x['dsn'] in inclusion_dsns:
        return True
    else:
        return False

# Location info for only the 16000 dsns (otherwise the df is too big and it slows everything down)
in_16000 = location_data.apply(dsn_in_16000, axis=1)
location_data = location_data.loc[in_16000].sort_values(by='created_at')

pickle.dump(location_data, open(f'/Users/brodriguez/Documents/Owlet-code/{folder}/16000_location_data.p', 'wb'))

Find critical events (load 10 min rollups with query on big mama

In [10]:
def binary(x):
    '''Convert int to binary and return indices of bits that are 1
    
    Args:
      x (int): bit mask
      
    Returns:
      list: indices where bit mask is true
    '''
    bi = list(bin(x)[2:])[::-1]
    indices = np.where(np.array(bi) == "1")[0]
    return indices

def at_risk(df):
    '''Indicate whether there was 24 hours of base state 7 data
    
    Args:
      df (Dataframe): all 10 minute rollup data for a given device
      
    Returns:
      boolean: False if there was at least 24 hours of data at base state 7
    '''
    if df.shape[0] < 144: 
        # Less than 24 hours of use
        return True
    else:
        count = 0
        for row in df.itertuples():
            if 7 in binary(int(row.BaseStateMask)):
                count += 1
            if count >= 144:
                return False
    if count >= 144:
        return False
    else:
        # Less than 24 hours of base state 7 (successful monitoring)
        return True
    
def find_last_day(dsn, df_all, dsn_bdays, dsns_1_bday):
    '''Find the last day that the first baby used the device
    
    Args:
      dsn (str)            : device number
      df_all (Dataframe)   : 10 minute rollup data for the given device
      dsn_bdays (Dataframe): reported birthdays for all devices
      dsns_1_bday (list)   : list of dsns that have 1 reported birthday
      
    Returns:
      Timestamp : Last day that there was data for the dsn
    '''
    # if there is only 1 unique bday: return  very last day of use
    dsn_bdays = list(dsn_bdays.sort_values(by='created_at').created_at)
    if (dsn in dsns_1_bday) or (len(dsn_bdays) == 1):
        return list(df_all.TimeWindowStartTime)[-1]
    else:
        #  take last use before second birthday was reported
        df_all = df_all.loc[(df_all.TimeWindowStartTime >=  dsn_bdays[0]) & (df_all.TimeWindowStartTime <  dsn_bdays[1])]
        if df_all.shape[0] == 0:
            # No data between 1st 2 reported bdays
            return -1
        return list(df_all.TimeWindowStartTime)[-1]
    

def baby_age(day, baby_df, no_reg):
    '''Get the age and birthday of a baby
    
    Args:
      day (Timestamp)    : The last day the device was used
      baby_df (Dataframe): birthday information for a specific device
      no_reg (boolean)   : True if there is no record of registration for the device
      
    Returns:
      Timedelta : age of the baby in days
      str       : Baby's birthday
    '''
    # Still would need to modify the birthday data to have a column with the reported date
    if no_reg:
        return pd.Timedelta(days=1), ''
    relevant_birthdays = baby_df.loc[baby_df.created_at <= str(day)] # What was reported before the last use
    if relevant_birthdays.shape[0] == 0:
        return pd.Timedelta(days=-1), ''
    else:
        # Use the last reported bday
        last_reported = max(relevant_birthdays.created_at.values)
        bday = relevant_birthdays.query('created_at == @last_reported').birthDate.values[0]
    
    age = day.date() - date(int(bday[:4]), int(bday[4:6]), int(bday[6:]))# diff between birthday and day of incident
    
    return age, bday 

def in_US(day, location_df, no_reg):
    '''Find out if device was used in the US
    
    Args:
      day (Timestamp)        : The last day the device was used
      location_df (Dataframe): location information for a specific device
      no_reg (boolean)       : True if there is no record of registration for the device
      
    Returns:
      boolean : True if the device was used in the US before last day of use
    '''
    
    if no_reg:
        return True
    location = location_df.loc[location_df.created_at <= str(day)]
    if location.shape[0] == 0:
        # Don't know the location
        return False
    elif location.cc.iloc[-1] != 'US':
        return False
    else:
        return True 

def get_df_day(df_all, day):
    '''Get 2 hours of data just prior to the time given
    
    Args:
      df_all (Dataframe): 10 minute rollup data for a specific device
      day (Timestamp)   : The last day the device was used
      
    Returns:
      boolean : True if the device was used in the US before last day of use
    '''
    # return the 2 hours just prior to the datetime (and include the datetime)
    # if there is not data in the 2 hours before, it will get what is there
    prior_2_hrs = day - pd.Timedelta(120,'m')
    return df_all[(df_all.TimeWindowStartTime >= prior_2_hrs) & (df_all.TimeWindowStartTime <= day)] #.TimeWindowStartTime or .FirstReadingTime


def possible_flatline(df, low_HR_thres, high_HR_thres, low_O2_thres, valid_thres):
    '''Find critical events (high or low heart rate or low oxygen) in the data given
    
    Args:
      df (Dataframe)     : 2 second data for a last day of use
      low_HR_thres (int) : threshold for low heart rate
      high_HR_thres (int): threshold for high heart rate
      low_O2_thres (int) : threshold for low oxygen 
      valid_thres (float): threshold for valid data
      
    Returns:
      str : 'True' or 'True + description' if vitals were outside of thresholds, else 'False'
    '''
    valid_percent = df.ValidSamples/df.TotalSamples
    
    # We don't alert for low HR unless O2 is also low.
    critical_vitals = ((df.HeartRateRawMin < low_HR_thres) & (df.OxygenRawMin < 90)) | (df.OxygenRawMin < low_O2_thres) | (df.HeartRateRawMax > high_HR_thres)
    critical_event = any(critical_vitals & (valid_percent >= valid_thres))
    
    if critical_event:
        # Low HR won't be valid if the O2 is not actually dropping
        if all(df.HeartRateRawMax < 220) & all(df.OxygenAvgMin >= 90):
            # Check how many times oxygen_raw_min was below 60 
            if (df.OxygenRawMin < 60).sum() >= 5:
                # For cases of Oxygen Noise Index (indicating bad hardware)
                return 'True, many instantaneous'
            return 'True, instantaneous'
        elif all(df.HeartRateRawMax < 220) & all(df.OxygenAvgMin < 90): 
            # this category is just a "nice to know"
            return 'True, low baseline'
        else:
            return 'True'
    else:
        return 'False'
    

def last_vitals(df):
    '''Description of last vitals during last 10 minutes
    
    Args:
      df (Dataframe): data for a devices last 2 hours of use
      
    Returns:
      str : description of vitals in last 10 minutes
    '''
    if df.shape[0] == 0:
        return 'not valid'
    else:
        last_30_min = df.loc[df.ValidSamples > 0].iloc[-3:] # last 30 min or less
        end_min_hr = last_30_min.HeartRateAvgMin.min() 
        end_max_hr = last_30_min[-2:].HeartRateAvgMax.max()# dont want 30 min
        end_avg_o2 = last_30_min.OxygenAvgMin.min()
        end_raw_o2 = last_30_min.OxygenRawMin.min()       
    if (end_min_hr < 60) & (end_avg_o2 < 80): # TODO O2 threshold here may be too low
        return 'low HR'
    elif (end_max_hr > 220):
        return 'high HR'
    # if o2 < 70 make extra low o2 category?
    elif (end_raw_o2 < 80) & (end_avg_o2 < 85): # Avg min? < 90?
        return 'low O2'
    else:
        return 'Good vitals'
    

def find_critical_events(dsn_list, location_data, baby_info, dsns_1_bday, conn, low_HR_thres=60, high_HR_thres=220, O2_thres=70, valid_thres=.4, no_reg=False):
    '''Find all last use cases and classify critical events
    
    Args:
      dsn_list (list)          : all dsns to analyze
      location_data (Dataframe): location information for all devices
      baby_info (Dataframe)    : birthday information for all devices
      dsns_1_bday (list)       : list of dsns that have 1 reported birthday
      conn (Connection)        : connection to big mama
      low_HR_thres (int)       : threshold for low heart rate
      high_HR_thres (int)      : threshold for high heart rate
      low_O2_thres (int)       : threshold for low oxygen 
      valid_thres (float)      : threshold for valid data
      no_reg (boolean)         : True if there is no record of registration for the device
      
    Returns:
      Dataframe : columns describing last use of all inclusion devices
    '''
    count_total = 0
    count = 0
    classifications = []
    cc_dsns = pickle.load(open('cc_dsns.p', 'rb')) # dsns with connected care
    for dsn in tqdm_notebook(dsn_list):
        # sorted/duplicate timestamps have been dropped (rollup algorithm)

        df_all = pd.read_sql('select * from tenminsock where dsn = %(d)s', conn, params={'d':dsn})
        # Won't take as long if I don't need to load all of the data to df_all
        df_all = df_all.drop_duplicates()
        df_all = df_all.sort_values(by='TimeWindowStartTime')
        
        at_risk_or_hardware = at_risk(df_all)
        valid_df = df_all.loc[df_all.ValidSamples > 2] # > 2 so we don't miss possible cases, but also dont base critical event on 1 reading
        dsn_location = location_data.loc[location_data.dsn == dsn] 
        dsn_baby_info = baby_info.loc[baby_info.dsn == dsn] 

        # df could be empty! if so, skip it
        if valid_df.shape[0] != 0:
            last_day = find_last_day(dsn, valid_df, baby_info.loc[baby_info.dsn == dsn],dsns_1_bday)
            if last_day != -1:
                # there is data for 1st baby
                if dsn in cc_dsns:
                    cc = 1
                else:
                    cc = 0
                # Calculate age of baby and dont check for critical events if they are older than 1
                age, bday = baby_age(last_day, dsn_baby_info, no_reg)
                # CHECK FOR BIRTHDAY IN THE EXPECTED RANGE (Feb-oct 2017)
                if bday > '20170222' and bday <= '20171017':
                    count_total += 1
                    if (age < pd.Timedelta(days=365)) and (age >= pd.Timedelta(days=0)):
                        count += 1
                        # if day is w/in 2 weeks of last day we have data (shouldnt happen unless we need to do bdays after oct)
        #                 if (str(day) <= two_weeks_before_received) & (str(day) >= '2017-01-31 23:59:59'):

                        # Check if in the US
                        if in_US(last_day, dsn_location, no_reg):
                            df_day = get_df_day(valid_df, last_day)
                            flatline = possible_flatline(df_day, low_HR_thres, high_HR_thres, O2_thres, valid_thres)
                            vitals = last_vitals(df_day) 
                            classifications.append((dsn, last_day.date(), flatline, vitals, cc, at_risk_or_hardware, age, bday))

                    elif age >= pd.Timedelta(days=365):
                        # add row for older babies so we know the actual last day of use
                        classifications.append((dsn, last_day.date(), 'False', 'Good vitals', cc, at_risk_or_hardware, age, bday))

    print('total babies with last day', count_total)
    print('total babies < 1 on last day', count)
    df_columns = ['dsn', 'date', 'critical_event', 'last_10_minutes', 'cc', 'at_risk_or_issues', 'baby_age', 'birthday']
    df_classified = pd.DataFrame(classifications, columns=df_columns)
    
    return df_classified

Sock off classifications..How to access 2 second data? Same way as SVT?

In [12]:
def load_2sec(dsn, day, time_index=False):
    '''Load 2 second data for the given dsn and day
    
    Args:
      dsn (str)           : device number
      day (Date)          : last day of use
      time_index (boolean): make timestamp the index or not
      
    Returns:
      Dataframe : 2 second data for the given dsn and day
    '''
    if folder == '16k_round_1':
        df_2sec = pd.read_csv(f'/Users/brodriguez/Documents/Owlet-code/{folder}/16000_2sec/{day}/{dsn}.csv.zip',names=column_names)
    else:
        df_2sec = pd.read_csv(f'/Users/brodriguez/Documents/Owlet-code/{folder}/16000_2sec/{day}/{dsn}.csv.gz',compression='gzip',names=column_names)
    df_2sec.timestamp = pd.to_datetime(df_2sec.timestamp, unit='s')
    df_2sec = df_2sec.sort_values(by=['timestamp'])
    df_2sec.drop_duplicates('timestamp',inplace=True)

    df_2sec = df_2sec.loc[(df_2sec.ble_rssi != 0)]
    df_2sec.reset_index(inplace=True, drop=True)
    
    if time_index:
        df_2sec.set_index('timestamp',inplace=True)
    
    return df_2sec

def lost_signal(df):
    '''Find the timestamp when the signal is lost
    
    Args:
      df (Dataframe): non charging data
      
    Returns:
      int : index when the signal was lost
    '''
    consecutive_valid = df.notification_mask.eq(0).rolling(window=16).sum().fillna(0)
    if consecutive_valid[consecutive_valid>=15].shape[0] == 0:
        return 0
    return consecutive_valid[consecutive_valid>=15].index[-1]

def multiple_sock_off(df, thresh=-.05):
    '''Find the timestamps when the sock is taken off
    
    Args:
      df (Dataframe): non charging data
      
    Returns:
      list : list of indices when the sock came off
   '''
    diff_new = df.skin_temperature.diff().fillna(0)
    diff_rolling = diff_new.rolling(300).mean().fillna(method='bfill') # different window?
    off_indices = []
    # find ALL indices of min..
    if min(diff_rolling) < thresh: # < must match one for less_than
        less_than = diff_rolling[diff_rolling < thresh]
        
        # 0 accounts for first time it is below threshold, > 30 accounts for multiple sock offs
        drop_indices = [0] + list(np.where(np.diff(less_than.index.values) > 30)[0] + 1)
        drop_indices = less_than.index.values[drop_indices]
        
        # For each index:
        for i in drop_indices:
            if i == 0:
                off_indices.append(0)
            else:
                off = diff_new.iloc[max(0,(i-275)):i]
                off = off.loc[off != 0]
                
                if off.shape[0] == 0:
                    off_indices.append(i)#??????
                else:
                    # - 3 because window = 4, max(0, ) because if idxmax < 3 you get a negative index
                    off_index_reset = max(0, off.eq(-1).reset_index().skin_temperature.rolling(window=4).sum().fillna(0).idxmax() - 3)
                    count_neg_1 = off.iloc[off_index_reset:].eq(-1).value_counts()

                    #  if there are no False or no True:
                    if len(count_neg_1) == 1:
                        if count_neg_1.index[0]:
                            off_index = off.index[off_index_reset]
                            off_indices.append(off_index)
                    elif count_neg_1[True]/(count_neg_1[False] + count_neg_1[True]) >= .7: # What threshhold?
                        off_index = off.index[off_index_reset] # This is the index where the sock came off
                        off_indices.append(off_index)
                    
    return off_indices

def mvmt(x):
    '''
    Args:
      x (int): notifications bit mask
      
    Returns:
      int : 1 if there was movement, else 0
   '''
    if x == 0:
        return 1
    return int(bin(x)[-2])

def signal_sock(x):
    '''Determine if the sock came off before the signal was lost
    
    Args:
      x (Series): Row of a dataframe
      
    Returns:
      str : Information on whether sock came off
   '''
    five_min = pd.Timedelta(minutes=5) #  is 5 min right choice
    try:
        df = load_2sec(x['dsn'], str(x['date']).replace('-','')) 
    except FileNotFoundError:
        return 'Need data'
    
    df_non_charging = df.loc[(df.base_state > 3) & (df.heart_rate_raw != 0)]
    df_non_charging.reset_index(drop=True, inplace=True)
    
    broken_sensor = any((df_non_charging.skin_temperature < 120) & (df_non_charging.skin_temperature > 0))
    
    time_values = df_non_charging.timestamp
    last_reading = df_non_charging.timestamp.values[-1]
    
    loss = lost_signal(df_non_charging)
    sock = multiple_sock_off(df_non_charging)
    
    # does the sock come off within 15 minutes of signal loss?
    last_base_state = df.loc[df.timestamp <= time_values[loss] + pd.Timedelta(15,'m')].iloc[-1].base_state 
#     print('last base state',last_base_state, 'signal loss', time_values[loss])
    # make timestamp the index
    df_non_charging.set_index('timestamp',inplace=True)
    mvmt_flag = df_non_charging.loc[(df_non_charging.index > time_values[loss]) & (df_non_charging.index < last_reading)].notification_mask.apply(mvmt) # are thresholds good?
    
    # if lost signal is within 1 min of last reading (before charging) return 'data cut off' 
    if abs(time_values[loss] - last_reading) < pd.Timedelta(minutes=1): # is 1 min good?
        if last_base_state == 3:
            return 'sock off'
        else:
            return 'data cut off'
        
    elif broken_sensor == True:
        if last_base_state == 3:
            return 'sock off'
        else:
            return 'unknown'
    
    # if sock comes off w/in ~5 min of lost signal return 'sock off'
    # sock off has to be at the end not beginning***
    elif len(sock) > 0: 
        for sock_off in sock:
            # if sock off is way before loss no sock off...
            if abs(sock_off - loss) > 1800: # Don't care about sock off more than 30 minutes before the signal was lost
                # if the sock came off before going on again
                pass
            elif (sock_off < loss) or (abs(time_values[sock_off] - time_values[loss]) <= five_min): # is 5 minutes good?
                return 'sock off' 
            else:
                # if time btw loss and sock off has movement > 20% of the time => cut off 
                loss_to_off = mvmt_flag.loc[mvmt_flag.index < (time_values[sock_off] - pd.Timedelta(minutes=5))].rolling(window=100).sum().fillna(method='bfill')
                if loss_to_off.min() < 15: # 20% or more?
                    return 'signal lost before sock off'
                else:
                    return 'sock off' # sock off or data cut off?
    
    elif dead_battery(df_non_charging):
        # Check for dead battery after checking for sock off (if battery dies after sock off we don't care)
        return 'battery died'
    
    # if time btw loss and last reading has movement > 20% => cut off
    loss_to_last = mvmt_flag.rolling(window=100).sum().fillna(method='bfill')
    if loss_to_last.min() > 15: # 20% or more?
        if last_base_state == 3:
            return 'sock off'
        else:
            return 'data cut off'
    
    if last_base_state == 3:
        return 'sock off'
    return 'signal lost before sock off'


### Run algorithm

In [3]:
DB_USER = 'brodriguez'
DB_PASSWORD = input()
DB_CONN = 'localhost'
DB_NAME = 'owletsock'
# SQL login
s = f'mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_CONN}/{DB_NAME}'
engine = sql.create_engine(s)
conn = engine.connect()

letmein


In [4]:
folder = '16k_round_2'
inclusion_dsns = pickle.load(open(f'/Users/brodriguez/Documents/Owlet-code/{folder}/inclusion_dsns.p', 'rb'))
location_data = pickle.load(open(f'/Users/brodriguez/Documents/Owlet-code/{folder}/16000_location_data.p', 'rb'))
dsns_1_bday = pickle.load(open('dsns_1_bday.p', 'rb'))
baby_info = pickle.load(open('16k_reg_and_cc_bdays4.p','rb'))

In [5]:
inclusion_dsns = inclusion_dsns.intersection(baby_info.dsn.values)

In [11]:
df_classified = find_critical_events(list(inclusion_dsns)[:1], location_data, baby_info, dsns_1_bday, conn) 

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


total babies with last day 1
total babies < 1 on last day 1


In [32]:
conn.close()

In [47]:
pickle.dump(df_classified, open('round_2_classified_test.p','wb'))
# df_classified = pickle.load(open('round_2_classified_test.p','rb'))

In [59]:
true = ['True', 'True, low baseline', 'True, instantaneous', 'True, many instantaneous']
df_classified.query('critical_event in @true and at_risk_or_issues == False and last_10_minutes != "Good vitals"')

Unnamed: 0,dsn,date,critical_event,last_10_minutes,cc,at_risk_or_issues,baby_age,birthday
74,AC000W001060959,2018-02-12,"True, low baseline",low O2,0,False,251 days,20170606
600,AC000W001087323,2017-03-13,"True, low baseline",low O2,0,False,21 days,20170220
978,AC000W001041852,2018-02-26,"True, low baseline",low O2,1,False,324 days,20170408
1038,AC000W001205802,2018-11-05,True,low O2,1,False,165 days,20180524
1092,AC000W001066401,2018-07-11,True,low O2,0,False,186 days,20180106
1117,AC000W001052267,2017-10-20,True,low O2,1,False,9 days,20171011
1442,AC000W000505417,2017-05-01,"True, low baseline",low O2,1,False,2 days,20170429
1743,AC000W001067140,2017-11-09,True,low O2,0,False,10 days,20171030
2607,AC000W002575870,2018-08-16,True,low O2,1,False,241 days,20171218
2608,AC000W002450261,2018-11-28,True,high HR,1,False,245 days,20180328


#### Pull 2 sec data

In [53]:
#Pull 2 sec data onto my drive?
true = ['True', 'True, low baseline', 'True, instantaneous', 'True, many instantaneous']
df_critical = df_classified.query('critical_event in @true and at_risk_or_issues == False')
critical_dsns = df_critical.dsn.unique().tolist()
df_fullpath = pd.read_sql("""select * from devices where dsn in %(d)s""", conn, params={'d':critical_dsns})
df_fullpath = df_fullpath[['dsn','fullpath','masknum']]

directories = []
for day in tqdm_notebook(df_critical.date.unique().tolist()):
    date = str(day).replace('-','')
    directories.append(date)

# import os
# for file in directories:
#     os.system(f"mkdir ~/Documents/Owlet-code/{folder}/{file}")

commands = []
for dsn, day in tqdm_notebook(zip(df_critical.dsn.tolist(),df_critical.date.tolist())):
    day = str(day).replace('-','')
    path = df_fullpath[df_fullpath.dsn == dsn].fullpath.values[0]
    folder_num = df_fullpath[df_fullpath.dsn == dsn].masknum.values[0]

    # ***Change destination to round folder/16000_2sec
    line = f'get {path}/{dsn}_{day}* ~/Documents/Owlet-code/{folder}/16000_2sec/{day}/{dsn}.csv.gz' 
    commands.append(line)

# Paste these commands in sftp bigmama

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




#### Once I have 2 second data:

In [None]:
#dont want to apply this to whole df, just to critical events (have to get 2sec data on computer before)
if df_critical.shape[0] != 0:
    df_critical['signal_sock'] = df_critical.apply(signal_sock, axis=1)

## Breakdown of classifications: ** * ?

In [None]:
# ** cases 

df_critical[(df_critical.at_risk_or_issues == False) &
            (df_critical.last_10_minutes != 'Good vitals') &
            (df_critical.critical_event != 'True, instantaneous') &
            (df_critical.critical_event != 'True, many instantaneous') &
            (df_critical.signal_sock != 'sock off') &
            (df_critical.signal_sock != 'data cut off')]

In [None]:
# * cases

df_critical[(df_critical.at_risk_or_issues == False) &
            (df_critical.last_10_minutes != 'Good vitals') &
            (df_critical.critical_event != 'True, instantaneous') &
            (df_critical.signal_sock != 'signal lost before sock off') &
            (df_critical.signal_sock != 'unknown') &
            (df_critical.signal_sock != 'battery died')]

In [None]:
# ? cases

df_critical[(df_critical.at_risk_or_issues == False) &
            (((df_critical.last_10_minutes != 'Good vitals') &
            ((df_critical.critical_event == 'True, instantaneous') &
            (df_critical.signal_sock != 'sock off'))) |
            (df_critical.critical_event == 'True, many instantaneous'))]

2 second data still needed:

In [None]:
df_critical[df_critical.signal_sock == 'Need data']