In [None]:
import pandas as pd
from os.path import join
import os
import numpy as np
import matplotlib.pyplot as plt

import plotly
import plotly.graph_objects as go

In [None]:
data_sec_1 = pd.read_csv(join("./data/accelerometer_data_section_1.csv"))
data_sec_2 = pd.read_csv(join("./data/accelerometer_data_section_2.csv"))
data_sec_3 = pd.read_csv(join("./data/accelerometer_data_section_3.csv"))
data_sec_4 = pd.read_csv(join("./data/accelerometer_data_section_4.csv"))
data_sec_5 = pd.read_csv(join("./data/accelerometer_data_section_5.csv"))
data_sec_6 = pd.read_csv(join("./data/accelerometer_data_section_6.csv"))
data_sec_7 = pd.read_csv(join("./data/accelerometer_data_section_7.csv"))

vehicle_timestamps = pd.read_csv(join("./data/vehicle_timestamps.csv"))

In [None]:
## Convert to Datetimes in new columns
data_sec_1['Timestamp'] = pd.to_datetime(data_sec_1['Timestamp'], infer_datetime_format=True)
data_sec_2['Timestamp'] = pd.to_datetime(data_sec_2['Timestamp'], infer_datetime_format=True)
data_sec_3['Timestamp'] = pd.to_datetime(data_sec_3['Timestamp'], infer_datetime_format=True)
data_sec_4['Timestamp'] = pd.to_datetime(data_sec_4['Timestamp'], infer_datetime_format=True)
data_sec_5['Timestamp'] = pd.to_datetime(data_sec_5['Timestamp'], infer_datetime_format=True)
data_sec_6['Timestamp'] = pd.to_datetime(data_sec_6['Timestamp'], infer_datetime_format=True)
data_sec_7['Timestamp'] = pd.to_datetime(data_sec_7['Timestamp'], infer_datetime_format=True)


vehicle_timestamps['Timestamp'] = pd.to_datetime(vehicle_timestamps['Timestamp'], infer_datetime_format=True)


In [None]:
data = data_sec_1.append([data_sec_2,data_sec_3,data_sec_4,data_sec_5,data_sec_6,data_sec_7])
data['Timestamp'] = pd.to_datetime(data['Timestamp'], infer_datetime_format=True)

In [None]:
## One hot encode direction:
vehicle_timestamps = pd.get_dummies(vehicle_timestamps, columns=['Direction'])
# print(vehicle_timestamps)

In [None]:
clifton_timestamps = vehicle_timestamps[vehicle_timestamps['Direction_Clifton'] == 1]
# print(clifton_timestamps)

In [None]:
leigh_woods_timestamps = vehicle_timestamps[vehicle_timestamps['Direction_Leigh_Woods'] == 1]
# print(leigh_woods_timestamps)

In [None]:
# vehicle_timestamps_group = vehicle_timestamps.groupby('Timestamp').agg({
#     'Direction': '<br>'.join,
#     'Timestamp':'sum'
# })

# data_group = data.groupby('Timestamp').agg({
#     'Acceleration': '<br>'.join,
#     'Timestamp':'sum'
# })

In [None]:
# time_index = pd.date_range(data['Timestamp'].min(), data['Timestamp'].max())
# # print(time_index)

# vehicle_timestamps_hourly = pd.DataFrame(vehicle_timestamps['Direction'])
# vehicle_timestamps_hourly = vehicle_timestamps_hourly.reindex(time_index, fill_value=np.nan)
# vehicle_timestamps_hourly = vehicle_timestamps_hourly.fillna(method='ffill')

# # df_monthly = \
# # pd.DataFrame(data['Timestamp'].resample(rule='1M').mean())

In [None]:
# # Generate dataset
from scipy.interpolate import interp1d
from sklearn.metrics import mean_squared_error
df_orig = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/a10.csv', parse_dates=['date'], index_col='date').head(100)
df = pd.read_csv('datasets/a10_missings.csv', parse_dates=['date'], index_col='date')

fig, axes = plt.subplots(7, 1, sharex=True, figsize=(10, 12))
plt.rcParams.update({'xtick.bottom' : False})

## 1. Actual -------------------------------
df_orig.plot(title='Actual', ax=axes[0], label='Actual', color='red', style=".-")
df.plot(title='Actual', ax=axes[0], label='Actual', color='green', style=".-")
axes[0].legend(["Missing Data", "Available Data"])

## 2. Forward Fill --------------------------
df_ffill = df.ffill()
error = np.round(mean_squared_error(df_orig['value'], df_ffill['value']), 2)
df_ffill['value'].plot(title='Forward Fill (MSE: ' + str(error) +")", ax=axes[1], label='Forward Fill', style=".-")

## 3. Backward Fill -------------------------
df_bfill = df.bfill()
error = np.round(mean_squared_error(df_orig['value'], df_bfill['value']), 2)
df_bfill['value'].plot(title="Backward Fill (MSE: " + str(error) +")", ax=axes[2], label='Back Fill', color='firebrick', style=".-")

## 4. Linear Interpolation ------------------
df['rownum'] = np.arange(df.shape[0])
df_nona = df.dropna(subset = ['value'])
f = interp1d(df_nona['rownum'], df_nona['value'])
df['linear_fill'] = f(df['rownum'])
error = np.round(mean_squared_error(df_orig['value'], df['linear_fill']), 2)
df['linear_fill'].plot(title="Linear Fill (MSE: " + str(error) +")", ax=axes[3], label='Cubic Fill', color='brown', style=".-")

## 5. Cubic Interpolation --------------------
f2 = interp1d(df_nona['rownum'], df_nona['value'], kind='cubic')
df['cubic_fill'] = f2(df['rownum'])
error = np.round(mean_squared_error(df_orig['value'], df['cubic_fill']), 2)
df['cubic_fill'].plot(title="Cubic Fill (MSE: " + str(error) +")", ax=axes[4], label='Cubic Fill', color='red', style=".-")

# Interpolation References:
# https://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html
# https://docs.scipy.org/doc/scipy/reference/interpolate.html

## 6. Mean of 'n' Nearest Past Neighbors ------
def knn_mean(ts, n):
    out = np.copy(ts)
    for i, val in enumerate(ts):
        if np.isnan(val):
            n_by_2 = np.ceil(n/2)
            lower = np.max([0, int(i-n_by_2)])
            upper = np.min([len(ts)+1, int(i+n_by_2)])
            ts_near = np.concatenate([ts[lower:i], ts[i:upper]])
            out[i] = np.nanmean(ts_near)
    return out

df['knn_mean'] = knn_mean(df.value.values, 8)
error = np.round(mean_squared_error(df_orig['value'], df['knn_mean']), 2)
df['knn_mean'].plot(title="KNN Mean (MSE: " + str(error) +")", ax=axes[5], label='KNN Mean', color='tomato', alpha=0.5, style=".-")

## 7. Seasonal Mean ----------------------------
def seasonal_mean(ts, n, lr=0.7):
    """
    Compute the mean of corresponding seasonal periods
    ts: 1D array-like of the time series
    n: Seasonal window length of the time series
    """
    out = np.copy(ts)
    for i, val in enumerate(ts):
        if np.isnan(val):
            ts_seas = ts[i-1::-n]  # previous seasons only
            if np.isnan(np.nanmean(ts_seas)):
                ts_seas = np.concatenate([ts[i-1::-n], ts[i::n]])  # previous and forward
            out[i] = np.nanmean(ts_seas) * lr
    return out

df['seasonal_mean'] = seasonal_mean(df.value, n=12, lr=1.25)
error = np.round(mean_squared_error(df_orig['value'], df['seasonal_mean']), 2)
df['seasonal_mean'].plot(title="Seasonal Mean (MSE: " + str(error) +")", ax=axes[6], label='Seasonal Mean', color='blue', alpha=0.5, style=".-")

In [None]:
# Plot data


fig = go.Figure()

fig.add_trace(go.Scatter(x = data['Timestamp'],
                         y = data['Acceleration'],
                         mode='lines',
                         
                         name='Acceleration'))  

fig.add_trace(go.Scatter(x = clifton_timestamps['Timestamp'],
                         y = clifton_timestamps['Direction_Leigh_Woods'],
                         mode='markers',
                         
                         name='Clifton Direction'))

fig.add_trace(go.Scatter(x = leigh_woods_timestamps['Timestamp'],
                         y = leigh_woods_timestamps['Direction_Clifton'],
                         mode='markers',
                        
                         name='Leigh Woods Direction'))    

fig.update_xaxes(range=[clifton_timestamps['Timestamp'].min(), clifton_timestamps['Timestamp'].max()])

fig.show()

In [None]:
# Plot subplot data

from plotly.subplots import make_subplots

fig = make_subplots(rows=7, cols=1)

fig.append_trace(go.Scatter(
    x = data_sec_1['Timestamp'],
    y = data_sec_1['Acceleration'],
), row=1, col=1)

fig.append_trace(go.Scatter(
    x = data_sec_2['Timestamp'],
    y = data_sec_2['Acceleration'],
), row=2, col=1)

fig.append_trace(go.Scatter(
    x = data_sec_3['Timestamp'],
    y = data_sec_3['Acceleration'],
), row=3, col=1)

fig.append_trace(go.Scatter(
    x = data_sec_4['Timestamp'],
    y = data_sec_4['Acceleration'],
), row=4, col=1)

fig.append_trace(go.Scatter(
    x = data_sec_5['Timestamp'],
    y = data_sec_5['Acceleration'],
), row=5, col=1)

fig.append_trace(go.Scatter(
    x = data_sec_6['Timestamp'],
    y = data_sec_6['Acceleration'],
), row=6, col=1)

fig.append_trace(go.Scatter(
    x = data_sec_7['Timestamp'],
    y = data_sec_7['Acceleration'],
), row=7, col=1)


fig.update_layout(height=1080, width=1920, title_text="Stacked Subplots")
fig.show()