# The purpose of this notebook is to analyze and make predictions using chest compression data

In [None]:
import pandas as pd
import numpy as np

import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt

from scipy import signal

In [None]:
# Load data
df_mat = pd.read_csv('data/air_mat_comp_600.csv')
print(df_mat.shape)
dfg_pil = pd.read_csv('data/Pillow-CPR.csv')
print(dfg_pil.shape)
dfg_ted = pd.read_csv('data/TeddyBear-CPR.csv')
print(dfg_ted.shape)
dfr_pil = pd.read_csv('data/pillow_reed.csv')
print(dfr_pil.shape)
dfr_ted = pd.read_csv('data/teddy_reed.csv')
print(dfr_ted.shape)

## Absolute truth: Number of compressions per data set
- Mattress ------> 600 (30 per set)
- Pillow George -> 150 (30 per set)
- Pillow Reed ---> 150 (30 per set)
- Teddy George --> 90 (30 per set) *** QUESTIONABLE
- Teddy Reed ----> 150 (30 per set)

#### Total: 1,140

### Add labels to each of the dataframes to indicate their compression object

In [None]:
df_mat['object'] = "mattress"
dfg_pil['object'] = "pillow_george"
dfg_ted['object'] = "teddy_george"
dfr_pil['object'] = "pillow_reed"
dfr_ted['object'] = "teddy_reed"

In [None]:
# Rename columns in df_mat
df_mat = df_mat.rename(columns={'Linear Acceleration x (m/s^2)': 'Acceleration x (m/s^2)',\
                                'Linear Acceleration y (m/s^2)': 'Acceleration y (m/s^2)',\
                                'Linear Acceleration z (m/s^2)': 'Acceleration z (m/s^2)'}) 

### Create time delta column

In [None]:
def time_delta(data):
    data['prev_time'] = data['Time (s)'].shift(1)
    data['time_delta'] = data['Time (s)'] - data['prev_time']
    return data

In [None]:
df_mat = time_delta(df_mat)
dfg_pil = time_delta(dfg_pil)
dfg_ted = time_delta(dfg_ted)
dfr_pil = time_delta(dfr_pil)
dfr_ted = time_delta(dfr_ted)

In [None]:
dfg_pil.head()

In [None]:
# Reed teddy
dfr_ted.plot( 'Time (s)' , 'Absolute acceleration (m/s^2)')
plt.title("Teddy Reed Compressions over Time")

In [None]:
# Clean pillow reed
dfr_pil[150:-500].plot( 'Time (s)' , 'Absolute acceleration (m/s^2)')

In [None]:
# Cut rows
dfr_pil = dfr_pil[150:-500]

In [None]:
# Clean teddy Reed
dfr_ted[150:-1800].plot( 'Time (s)' , 'Absolute acceleration (m/s^2)')

In [None]:
dfr_ted = dfr_ted[150:-1800]

### Stack all dataframes

In [None]:
print("expected row count:",df_mat.shape[0] + 
                            dfg_pil.shape[0] +
                            dfg_ted.shape[0] +
                            dfr_pil.shape[0] +
                            dfr_ted.shape[0])
df = pd.concat([df_mat,dfg_pil,dfg_ted,dfr_pil,dfr_ted], ignore_index=True,sort=False)
print('actual row count  :',df.shape[0])

In [None]:
df.info()

### Plot data:
- Are peaks easy to visualize?

In [None]:
# df[df['object'] == 'mattress'].plot(x='Time (s)', y=['Acceleration x (m/s^2)', 
#                                                      'Acceleration y (m/s^2)',
#                                                      'Acceleration z (m/s^2)', 
#                                                      'Absolute acceleration (m/s^2)'], kind="bar", figsize=(9, 8))

In [None]:
# Mattress
df[df['object'] == 'mattress'].plot( 'Time (s)' , 'Absolute acceleration (m/s^2)')
#df[df['object'] == 'mattress'].plot( 'Time (s)' , 'Acceleration x (m/s^2)')
#df[df['object'] == 'mattress'].plot( 'Time (s)' , 'Acceleration y (m/s^2)')
#df[df['object'] == 'mattress'].plot( 'Time (s)' , 'Acceleration z (m/s^2)')
plt.title("Mattress Magnitude over Time")
plt.show()

In [None]:
# George pillow
df[df['object'] == 'pillow_george'].plot( 'Time (s)' , 'Absolute acceleration (m/s^2)')
plt.title("Pillow George Compressions over Time")

In [None]:
# George teddy
df[df['object'] == 'teddy_george'].plot( 'Time (s)' , 'Absolute acceleration (m/s^2)')
plt.title("Teddy George Magnitude Over Time")
plt.show()

In [None]:
# Reed pillow
df[df['object'] == 'pillow_reed'][105:].plot( 'Time (s)' , 'Absolute acceleration (m/s^2)')
plt.title("Pillow Reed Magnitude over Time")
plt.show()

In [None]:
# Reed teddy
df[df['object'] == 'teddy_reed'].plot( 'Time (s)' , 'Absolute acceleration (m/s^2)')
plt.title("Teddy Reed Compressions over Time")

## Notes: 
- All compression data seems to be well spaced
- George Teddy data seems to be 90 compressions
    - Confirm with George

### Create the following:
- Half second increment column
  - Ascneding in value
- Rolling std
  - std across half second increment blocks

In [None]:
df.head()

In [None]:
# Roughly how many rows in .1 seconds?
df['time_delta'].head(11).sum()

In [None]:
# A compression may occur in a fraction of a second
# Take the rolling std for every ten rows
#df['rolling_stdev'] = df.groupby('object')['Absolute acceleration (m/s^2)'].rolling(10).std()
df['rolling_stdev'] = df.groupby('object')['Absolute acceleration (m/s^2)']\
                        .transform(lambda s: s.rolling(10).std())

In [None]:
df.head(20)

In [None]:
df[df['object'] == 'mattress'].plot( 'Time (s)' , 'rolling_stdev')
plt.title("Mattress: Rolling Standard Dev over Time")

In [None]:
df[df['object'] == 'pillow_george'].plot( 'Time (s)' , 'rolling_stdev')
plt.title("Pillow George: Rolling Standard Dev over Time")

In [None]:
# Max and min rolling stdev
print(df['rolling_stdev'].min(), '-', df['rolling_stdev'].max())

### Use thresholding to label data
- Total 1,140 compressions

In [None]:
# Define a function that calculates the threshold for the number of compressions
def find_compression_threshold(data, object_name, true_compression_count):
    """
    This function finds the threshold for the absolute acceleration that best matches the true number of compressions.
    
    Parameters:
    data (DataFrame): The dataframe containing the sensor data.
    object_name (str): The name of the object on which compressions were performed.
    true_compression_count (int): The actual number of compressions performed on the object.
    
    Returns:
    float: The threshold of absolute acceleration that best matches the true compression count.
    """
    
    # Filter data for the specific object
    object_data = data[data['object'] == object_name]
    
    # Initialize variables
    best_threshold = None
    best_count_diff = np.inf
    
    # Iterate over a range of possible thresholds to find the best one
    for threshold in np.linspace(object_data['Absolute acceleration (m/s^2)'].min(),
                                 object_data['Absolute acceleration (m/s^2)'].max(),
                                 num=4000):  # num can be set to a different number for finer/coarser search
        
        # Count the number of times the acceleration goes above the threshold
        count = (object_data['Absolute acceleration (m/s^2)'] > threshold).sum()
        
        # Find the difference between the counted compressions and the true compressions
        count_diff = abs(count - true_compression_count)
        
        # Update the best threshold if this threshold is closer to the true compression count
        if count_diff < best_count_diff:
            best_count_diff = count_diff
            best_threshold = threshold
            
    return best_threshold

In [None]:
# Create a dictionary of object and compression counts
object_comp_dct = {'mattress':600,\
                   'pillow_george':150,\
                   'teddy_george':90,\
                   'pillow_reed':150,\
                   'teddy_reed':150}

In [None]:
# Test the function for one object to see if it works as expected
comp_dict = {}
for obj,val in object_comp_dct.items():
    test_object_name = obj
    test_true_compression_count = val
    #print('{} threshold:'.format(obj), find_compression_threshold(df, obj, val))
    comp_dict[obj] = find_compression_threshold(df, obj, val)

In [None]:
comp_dict

In [None]:
# First create a label column, set them all to 0
df['is_compression'] = 0
# Using the threshold above, create compression labels in the dataframe
for obj,val in comp_dict.items():
    df['is_compression'] = np.where((df['object'] == obj) & \
                                    (df['Absolute acceleration (m/s^2)'] > val),1,df['is_compression'])

In [None]:
df['is_compression'].sum()

### Export data to csv

In [None]:
df.to_csv('data/labeled_comp.csv',index=False)

In [None]:
#df[(df['object']=='teddy_reed')&(df['Absolute acceleration (m/s^2)'] > 16.14450353764216)].shape

### 4.7285 is the threshold for all objects at which we get 1,140 compressions
- But what is the threshold for just the mattress?

### Thresholding methodology doesn't work across objects
- Matress threshold:
- Pillow Reed:
- Teddy Reed:

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
#df[df['object'] == 'mattress'].head()