### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from statsmodels.tsa.deterministic import DeterministicProcess

### Import Dataset

In [2]:
df = pd.read_csv("../input/book-sales/book_sales.csv", index_col = "Date", parse_dates = ["Date"])
df.head()

**Create Days Column**

In [3]:
df["Days"] = np.arange(len(df))
df.head()

**Regression Plot of Paperback Book Sales Data**

In [4]:
plt.figure(figsize = (18, 7))
sns.regplot(x = "Days", 
            y = "Hardcover", 
            data = df, 
            ci = None,
            scatter_kws=dict(color="blue"),
            color = "red")

plt.title("Hardcover Book Sales by Days", size = 20, color = "blue")
plt.xticks(size = 15)
plt.yticks(size = 15)
plt.xlabel("Days", size = 20, color = "blue")
plt.ylabel("Hardcover", size = 20, color = "blue")
plt.show()

**Last Day Sales Column (Shift)**

In [5]:
df["Last Day Hardcover Sale"] = df["Hardcover"].shift(1)
df[:5]

**Lag Plot of Hardcover Sales**

In [6]:
plt.figure(figsize = (18, 7))
sns.regplot(x = "Hardcover",
            y = "Last Day Hardcover Sale",
            data = df, 
            ci = None,
            color = "red",
            scatter_kws=dict(color="blue"))
plt.title("Hardcover Book Sales by Days", size = 20, color = "blue")
plt.xticks(size = 15)
plt.yticks(size = 15)
plt.xlabel("Hardcover", size = 20, color = "blue")
plt.ylabel("Last Day Hardcover Sale", size = 20, color = "blue")
plt.show()

### Tunnel Traffic

In [7]:
tt = pd.read_csv("../input/tunnel/tunnel.csv", index_col = "Day", parse_dates = ["Day"])
tt.head()

In [8]:
tt["Days"] = np.arange(len(tt))
tt[:5]

**Linear Regression**

In [9]:
X = tt[["Days"]]

y = tt["NumVehicles"]

model = LinearRegression()

model.fit(X, y)

pred = model.predict(X)
pd.DataFrame({"Actual": y,
              "Predicted": pred}).sample(10)

In [10]:
plt.figure(figsize = (18, 7))
sns.scatterplot(x = "Days", y = "NumVehicles", data = tt, color = "blue", s = 70)
sns.lineplot(x = "Days", y = pred, data = tt, color = "red", linewidth = 6)
plt.title(" Tunnel Traffic by Days", size = 20, color = "blue")
plt.xticks(size = 15)
plt.yticks(size = 15)
plt.xlabel("Days", size = 20, color = "blue")
plt.ylabel(" Tunnel Traffic", size = 20, color = "blue")
plt.show()

**Lag Feature**

In [11]:
tt["Lag"] = tt["NumVehicles"].shift(1)
tt[:5]

In [12]:
tt = tt.dropna()
tt[:3]

In [13]:
X = tt[["Lag"]]

y = tt["NumVehicles"]

model = LinearRegression()

model.fit(X, y)

pred = model.predict(X)
pd.DataFrame({"Actual": y,
              "Predicted": pred}).sample(10)

In [14]:
plt.figure(figsize = (18, 7))
sns.scatterplot(x = "Lag", y = "NumVehicles", data = tt, color = "blue", s = 70)
sns.lineplot(x = "Lag", y = pred, data = tt, color = "red", linewidth = 6)
plt.title(" Tunnel Traffic by Lags", size = 20, color = "blue")
plt.xticks(size = 15)
plt.yticks(size = 15)
plt.xlabel("Lag", size = 20, color = "blue")
plt.ylabel(" Tunnel Traffic", size = 20, color = "blue")
plt.show()

**Moving Average**

In [15]:
moving_average = tt["NumVehicles"].rolling(window = 365,
                                           center = True,
                                           min_periods = 183).mean()

plt.figure(figsize = (18, 7))
sns.scatterplot(x = tt.index, y = "NumVehicles", data = tt, s = 100, color = "blue")
sns.lineplot(x = tt.index, y = moving_average, color = "red", linewidth = 6)
plt.title("Tunnel Traffic - 365-Day Moving Average", size = 20, color = "blue")
plt.xticks(size = 15)
plt.yticks(size = 15)
plt.xlabel("Days", size = 20, color = "blue")
plt.ylabel("Tunnel Traffic", size = 20, color = "blue")
plt.show()

**Forecasting for 

In [16]:
X = tt[["Days"]]
y = tt["NumVehicles"]

model = LinearRegression()

model.fit(X, y)

fore_X = pd.DataFrame({"Days": np.arange(747, 807)}, 
                      index = pd.date_range('2005-11-17', periods=60))


pred_y = model.predict(X)
fore_y = model.predict(fore_X)

In [17]:
plt.figure(figsize = (18, 7))
sns.scatterplot(x = tt.index, y = "NumVehicles", data = tt, s = 100, color = "blue")
sns.lineplot(x = tt.index, y = pred_y, color = "red", linewidth = 6)
sns.lineplot(x = fore_X.index, y = fore_y, color = "black", linewidth = 6)

plt.title("Tunnel Traffic - 365-Day Moving Average", size = 20, color = "blue")
plt.xticks(size = 15)
plt.yticks(size = 15)
plt.xlabel("Days", size = 20, color = "blue")
plt.ylabel("Tunnel Traffic", size = 20, color = "blue")
plt.show()

### US Retail Sales

In [18]:
df = pd.read_csv("../input/us-retail-sales/us-retail-sales.csv")
df.head()

In [19]:
df["Month"] = pd.to_datetime(df["Month"])
df = df.set_index(df["Month"])
df.head()

In [20]:
del df["Month"]

**US Foods and Beverages Sales**

In [21]:
plt.figure(figsize = (18, 7))
sns.scatterplot(x = df.index, y = "FoodAndBeverage", data = df, s = 100, color = "blue")

plt.title("US Foods and Beverages Sales", size = 20, color = "red", weight = "bold")
plt.xticks(size = 15)
plt.yticks(size = 15)
plt.xlabel("Months", size = 20, color = "red", weight = "bold")
plt.ylabel("Milions of Dollars", size = 20, color = "red", weight = "bold")
plt.show()

**Moving Average**

In [23]:
trend = df["FoodAndBeverage"].rolling(window = 12,
                                      center = True,
                                      min_periods = 6).mean()

plt.figure(figsize = (18, 7))
sns.scatterplot(x = df.index, y = "FoodAndBeverage", data = df, s = 100, color = "blue")
sns.lineplot(x = trend.index, y = trend, linewidth = 6, color = "red")
plt.title("US Foods and Beverages Sales by Moving Average", size = 20, color = "red", weight = "bold")
plt.xticks(size = 15)
plt.yticks(size = 15)
plt.xlabel("Months", size = 20, color = "red", weight = "bold")
plt.ylabel("Milions of Dollars", size = 20, color = "red", weight = "bold")
plt.show()