# Get data from only the time when the surfer is in the water
use the temperature readings from the ocean dataframe to remove all motion and ocean data where the temperature is outside half a standard deviation of the median temperature 

## import ride data

In [1]:
from smartfin_ride_api_v2 import Ride

ride = Ride()
data = ride.get_ride_data('12181')

ride initialized
fetching ride from: https://surf.smartfin.org/ride/12181
fetching ocean data from: https://surf.smartfin.org/media/201710/google_115648017259601508520_000666D32296_171016174700_Ocean.CSV
fetching motion data from: https://surf.smartfin.org/media/201710/google_115648017259601508520_000666D32296_171016174700_Motion.CSV


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  odf['Time'] = [time / 1000 for time in odf['Time']]


IndexError: index 0 is out of bounds for axis 0 with size 0

In [None]:
mdf = data['motion_data']
odf = data['ocean_data']

In [None]:
mdf

In [None]:
odf

## plot imu and temperature values
as you can see here, there are readings that are obviously recorded when the surfer is out of the water, thus giving us bad temperature and IMU values

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(odf['Time'], odf['Calibrated Temperature 1'])
plt.show()
plt.plot(mdf['Time'], mdf['IMU A2'])
plt.show()

## Remove readings before entrance in the water

In [None]:
# remove readings from ocean and motion dataframes where surfer is on land before entering the water
def remove_before_entrance(mdf, odf, threshold, med):
    
    # get temperature series
    temps = odf['Calibrated Temperature 1']
    enter_index = get_water_entrance_index(temps, threshold, med)
    
    # get the time where the surfer enters the water in the ocean dataframe
    startTime = odf.iloc[enter_index]['Time']
    
    # find the index in motion dataframe that matches with start index calculated from ocean dataframe
    startIdx = mdf.iloc[(mdf['Time']-startTime).abs().argsort()[:1]]
    return mdf.loc[startIdx.index[0]:], odf.tail(len(odf) - enter_index)
   

# calculate the index in ocean dataframe that the surfer enters the water
def get_water_entrance_index(temps, threshold, med):

    above = False
    count = 0
    consecutiveWithin = 0
    
    # calculate the index at the point where the temperature readings fall within the threshold consecutively
    for time, reading in temps.items():
        if abs(reading - med) < threshold:
            if above == True:
                above = False
            else:
                consecutiveWithin += 1
                
            # if the temperatures fall within the threshold consecutively, then we can assume the surfer is in the water
            if consecutiveWithin > 10:
                return count

            above = False

        else:
            above = True
            consecutiveWithin = 0
        count += 1 
        
    return firstInstance

## remove entries after the surfer exits the water

In [None]:
# remove readings from ocean and motion dataframes where surfer is on land after exiting the water
def remove_after_exit(mdf, odf, threshold, med):

    # get the temperature series
    temps = odf['Calibrated Temperature 1']
    
    # get the index where surfer exits the water
    exit_index = get_water_exit_index(temps, threshold, med)
    
    # get the time where the surfer enters the water in the ocean dataframe
    endTime = odf.iloc[exit_index]['Time']

    # find the index in motion dataframe that matches with end index calculated from ocean dataframe
    endIdx = mdf.iloc[(mdf['Time']-endTime).abs().argsort()[:1]]
    return mdf.loc[:endIdx.index[0]], odf.head(exit_index)
   
    
# calculate the index in ocean dataframe that the surfer enters the water
def get_water_exit_index(temps, threshold, med):
    above = False
    count = 0

    # calculate the index at the last point where the temperature readings transition from within to outside the threshold 
    for time, reading in temps.items():
        if abs(reading - med) > threshold:
            
            # record index where temperature transition from within to outside the threshold
            if above == False:
                above = True
                firstInstance = count
            
            above = True
        
        else:
            above = False
            firstInstance = 0
        count += 1 
    
    return firstInstance

## putting it all together

In [None]:
def get_water_data(mdf, odf):

    temps = odf['Calibrated Temperature 1']
    threshold = temps.std() / 2
    med = temps.median()
    
    mdf, odf = remove_before_entrance(mdf, odf, threshold, med)
    mdf, odf = remove_after_exit(mdf, odf, threshold, med)
    return mdf, odf


mdf = data['motion_data']
odf = data['ocean_data']
mdf, odf = get_water_data(mdf, odf)

In [None]:
mdf

In [None]:
odf

## plots
as you can see, the sections of data before and after the ride where the temperatures and imu values were way off are pretty much all gone now

In [None]:
plt.plot(odf['Time'], odf['Calibrated Temperature 1'])
plt.show()
plt.plot(mdf['Time'], mdf['IMU A2'])
plt.show()