# Code for loading and modifying datasets

In [2]:
# Utilities
from tqdm import tqdm_notebook as tqdm
import os
import pdb
import glob

# Numerical
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Machine Learning (Q1)
from sklearn.linear_model import Ridge as Ridge
from sklearn.linear_model import LinearRegression as OLS
from sklearn.metrics import mean_squared_error as mse

# Machine Learning (Q2)
from sklearn.linear_model import SGDClassifier as SGD
from sklearn.linear_model import LogisticRegression as LR # 

# Plots
%matplotlib notebook

In [7]:
data_path = "C:/Users/allen/Documents/OSCAR_Data-pulseox-data"
print(os.listdir(data_path))
example = os.path.join(data_path, 'OSCAR_allen_Details_2019-10-24.csv')

['OSCAR_allen_Details_2019-10-08.2.xlsx', 'OSCAR_allen_Details_2019-10-08.3.csv', 'OSCAR_allen_Details_2019-10-24.csv', 'OSCAR_allen_Details_2019-10-24.xlsx', 'OSCAR_allen_Details_2019-10-24_labels.txt', 'Preferences.xml', 'Profiles', 'profiles.xml']


In [43]:
def day_to_df(path):
    '''
    @args:
        path: os.path object representing path to OSCAR csv file to parse
    @return:
        return: tuple(sp02, pulse, start_time),
        sp02 = pd.DataFrame of SP02 data
        pulse = pd.DataFrame of Pulse data
        start_time = UTC start time in s
    '''
    df = pd.read_csv(path)
    time = df['Session'][0]
    df = df.drop(columns='Session')
    df['DateTime'] -= time # All times start from 0 for session
    
    sp02 = df[df['Event'] == 'SPO2']
    sp02 = sp02.drop(columns='Event')
    
    pulse = df[df['Event'] == 'Pulse']
    pulse = pulse.drop(columns='Event')

    pulse = pulse.reset_index(drop=True)
    sp02 = sp02.reset_index(drop=True)
    return sp02, pulse, time

In [64]:
sp02, pulse, start_time = day_to_df(example)
# print(sp02.head(2))
# print(pulse.head(2))
# print(start_time)
plt.clf()
plt.plot(sp02['DateTime'].values, sp02['Data/Duration'].values)
plt.plot(pulse['DateTime'].values, pulse['Data/Duration'].values)
plt.xlim(20, 1000)
plt.savefig(os.path.split(example)[-1] + '.png')
# 74720 time units for 32 minutes


In [74]:
# Performing FFT
sp02_fft = np.zeros(np.max(sp02['DateTime'])+1)
sp02_fft[sp02['DateTime']] = sp02['Data/Duration']

plt.clf()
plt.plot(np.arange(sp02_fft.shape[0]), sp02_fft)
plt.xlim(20, 1000)
plt.savefig('pre-fft.png')
