# Analyzing OR Blood Pressure Measurements

In [None]:
%matplotlib inline

In [None]:
import pandas as pd
import os
import time
import datetime
import numpy as np
DATADIR = os.path.join(os.path.expanduser('~'),"DATA", "TimeSeries", "UofUData")
os.path.exists(DATADIR)

In [None]:
data = pd.read_csv(os.path.join(DATADIR,"data_all.csv"), nrows=1000)

In [None]:
data.head()

In [None]:
type(data["noninvDIA"][0])
np.nan

In [None]:
t0 = data["VirtualDateTime"][0]
print(t0)
print(type(t0))


## Convert ``VirtualDataTime`` from string to datetime

### Define parse string

In [None]:
parse_str = "%Y-%m-%d %H:%M:%S"

### Test parse string

In [None]:
print(time.strftime(parse_str, time.localtime()))
time.strptime(t0, parse_str)

### Create datetime

In [None]:
print(datetime.datetime.strptime(t0, parse_str))
print(type(datetime.datetime.strptime(t0, parse_str)))

In [None]:
def datestring_to_datetime(s, parse_str):
    try:
        return datetime.datetime.strptime(s, parse_str)
    except:
        return np.nan

In [None]:
type(datestring_to_datetime(t0, parse_str))

### Modify DataFrame

In [None]:
data["VirtualDateTime Parsed"] = \
data.apply(lambda x: datetime.datetime.strptime(x["VirtualDateTime"], parse_str), axis=1)
data.head()

In [None]:
data["VirtualDateTime Parsed"][0]

### Alternatively we can do simple date conversions using Pandas ``to_datetime`` function

In [None]:
data["VirtualDateTime Parsed2"] = pd.to_datetime(data["VirtualDateTime"], dayfirst=True)
print(type(data["VirtualDateTime Parsed2"][0]))
data.head()

In [None]:
data["VirtualCaseID"].unique()

In [None]:
sumbdata = data.dropna().head()#["invDIA"].plot()

In [None]:
data[data["VirtualCaseID"]==10349].plot(x="VirtualDateTime Parsed", 
                                 y=["invSYS", "invMAP", "invDIA"])

### Do we need to drop data
#### Explore ``dropna`` with different values for ``how`

In [None]:
data[data["VirtualCaseID"]==10349].dropna(how="all").plot(x="VirtualDateTime Parsed", 
                                 y=["invSYS", "invMAP", "invDIA"])

## Data Cleansing  Filters
#### Consider the following two criteria for considering a measurement as spurious

1. x increases by more than 100 from one sample to the next
    * $|x_{i}-x_{i-1}| > 100$
1. x is lower than 10
    * $x_i < 10$
    
### Analysis

* The second condition should be easy for us to implement. We've aleady performed multiple examples of Boolean filtering.
* The first condition is more challenging because it requires taking differences between rows and to date we've only computed on single rows

### Approaches to Computing Differences

1. We could use the Pandas DataFrame [``shift``](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.shift.html) method
1. We could use the Pandas DataFrame [``diff``](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.diff.html) method

#### Shift Approach

In [None]:
data["invSYS"] - data["invSYS"].shift(-1)

## Exercise: 
### Use the shift method to implement the maximum difference filter

#### Diff Method

In [None]:
data["invSYS"].diff(-1)

## Exercise: 
### Use the diff method to implement the maximum difference filter

## Built-in Pandas Computational Tools
### Pandas provides a number of functions that for smoothing data that might be of value
#### [Window Functions](http://pandas.pydata.org/pandas-docs/stable/computation.html)
