In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler

In [None]:
sns.set_style("darkgrid")
sns.set_context("talk")

In [None]:
def fig_number_gen():
    """generator that will yield 0 to inf    
    """
    num = 0
    while True:
        yield num
        num += 1


fig_num = fig_number_gen()

In [None]:
# loading time-series data
df = pd.read_csv(
    "https://raw.githubusercontent.com/facebook/prophet/master/examples/example_wp_log_peyton_manning.csv"
)
print(df.head())
print(df.info())

# set ds as the time index
df.index = pd.to_datetime(df["ds"])
df = df.drop(columns="ds")

print(df.head())
print(df.info())

In [None]:
plt.figure(figsize=(16, 10))
plt.plot_date(x=df.index, y=df, fmt="-", label="input")
plt.legend()
plt.ylim(top=plt.ylim()[1] * 1.1)  # stretching the top of the plot by 10%
plt.title(
    f"full time series from {df.index[0]:'%Y-%m-%d'} to {df.index[-1]:'%Y-%m-%d'}"
)
plt.savefig(f"figs/{next(fig_num)}_full_time_series.png")
plt.show();

In [None]:
# subsetting the data
df_sub = df.loc["01-01-2011":"31-12-2014"]
df_test = df.loc["31-12-2014":]

In [None]:
plt.figure(figsize=(16, 10))
plt.plot_date(
    x=df_sub.index, y=df_sub, fmt="-",
)

plt.ylim(top=plt.ylim()[1] * 1.1)  # stretching the top of the plot by 10%
plt.title(
    f"subset of time series from {df_sub.index[0]:'%Y-%m-%d'} to {df_sub.index[-1]:'%Y-%m-%d'}"
)
plt.savefig(f"figs/{next(fig_num)}_subset_time_series.png")
plt.show();

In [None]:
# convert the datetime to floats
time_scaler = MinMaxScaler()

# shape the inputs
X = time_scaler.fit_transform(df.index.values.reshape(-1, 1))
y = df.values.reshape(-1, 1)

# fit the lin reg
reg = LinearRegression().fit(X, y)

In [None]:
# predict using the dates of the original time series
predict_time_range = pd.date_range(start=df_sub.index[0], end=df.index[-1], freq="D")
X_predict = time_scaler.transform(predict_time_range.values.reshape(-1, 1))
y_predict = reg.predict(X_predict)

In [None]:
# plot the subset with
plt.figure(figsize=(16, 10))
plt.plot_date(x=df_sub.index, y=df_sub["y"], fmt="-", label="input time series")
plt.plot_date(
    x=df_test.index, y=df_test["y"], fmt="-", label="out-of-sample time series"
)
plt.plot_date(x=predict_time_range, y=y_predict, fmt="--", label="linear regression")
plt.legend()
plt.ylim(top=plt.ylim()[1] * 1.1)  # stretching the top of the plot by 10%
plt.title("simple linear regression")
plt.savefig(f"figs/{next(fig_num)}_subset_time_series_w_linreg.png")
plt.show();

In [None]:
print(reg.intercept_)
print(reg.coef_)

In [None]:
df_sub = df_sub.assign(month=df_sub.index.month.values)

In [None]:
df_sub.sample(n=6)

In [None]:
df_month = pd.get_dummies(df_sub, columns=["month"], dtype="bool")
print(df_month.sample(n=6))
print(df_month.info())

In [None]:
# plot the subset with
plt.figure(figsize=(16, 10))
plt.plot_date(x=df_month.index, y=df_month["y"], fmt="-", label="input time series")
plt.legend()
for i in range(1,13):
    plt.plot_date(x=df_month.index, y=df_month[f"month_{i}"], fmt="-", label=f"month {i}")
    
    plt.title(f"dummy variable for month 1 to {i}")
    plt.savefig(f"figs/{next(fig_num)}_subset_time_series_w_month_{i}.png")
plt.show();

In [None]:
# perform scaling to (0,1) range
month_scaler = MinMaxScaler()


In [None]:
df_month.iloc[:,df_month.columns != 'y']