# Time Series and Gaps
What do we do when we have missing data in a time series?

In [None]:
import pandas as pd
df=pd.read_csv("datasets/ewcitmeas.txt", delim_whitespace=True, dtype=float, na_values="*")
df.rename(columns={'DD': 'day', 'MM': 'month', 'YY': 'year'}, inplace=True)
df['year'] = df['year'] + 1900
df=df.set_index(pd.to_datetime(df[['year', 'month', 'day']])).drop(["day","month","year"], axis='columns')
df.head()

In [None]:
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = 12, 8

In [None]:
df["London"].plot(style=".")

In [None]:
# Should we explore plotting?

In [None]:
# Let's make some holes!
df2=df["London"].sample(500)
df2.plot(style=".")
display(df2.head())

In [None]:
# How do we fill the holes?

In [None]:
# We could try and resample and forward fill values
df_ffill=df2.resample("D").fillna(method="ffill")
df_ffill.plot(style=".")

In [None]:
# What if we applied a rolling window?
import numpy as np
df2.resample("D").asfreq().rolling(10,min_periods=1).apply(np.nanmean).dropna().plot(style=".")

In [None]:
# Maybe we could write our own rolling window?
def rock(ary):
    for i in range(len(ary)-1,0,-1):
        if ary[i] != np.nan:
            return ary[i]
    return np.nan

df2.resample("D").asfreq().rolling(50,min_periods=1).apply(rock).dropna().plot(style=".")

In [None]:
df2.resample("D").interpolate(method="time").plot(style=".")

In [None]:
df2.resample("D").interpolate(method="linear").plot(style=".")

In [None]:
df2.resample("D").interpolate(method="polynomial", order=3).plot(style=".")

In [None]:
# what does interpolate work on?
import pandas as pd
import numpy as np
df=pd.read_csv("datasets/run.csv")
df.head()

In [None]:
df=df[0:10].append(df[20:100])
df=df.set_index(pd.to_datetime(df["timestamp"]))
df.head(20)

In [None]:
df["heart_rate"].plot(style=".")

In [None]:
df.resample("1s").interpolate(method="time").head(20)

In [None]:
df.resample("1s").interpolate(method="time")["heart_rate"].plot(style=".")

In [None]:
import matplotlib.pyplot as plt
df.resample("1s").interpolate(method="time")["heart_rate"].plot(style="b.")
plt.figure()
df.resample("1s").fillna(method="ffill")["heart_rate"].plot(style="r.")
plt.figure()
df.resample("1s").fillna(method="nearest")["heart_rate"].plot(style="g.")