# Time Series Machine Learning Part 1 Assignment

In [1]:
%reload_ext nb_black
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.ensemble import AdaBoostRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

<IPython.core.display.Javascript object>

In [20]:
def iscatter(df, x, y, color=None, size=None, title=""):
    fig = px.scatter(df, x=x, y=y, color=color, size=size, title=title, template="none")

    fig.update_traces(marker_line_color="black", marker_line_width=1)

    fig.show()

<IPython.core.display.Javascript object>

### Import the Netflix stock price data set (NFLX_data.csv).

In [2]:
netflix = pd.read_csv(
    "https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%205/NFLX_data.csv"
)

<IPython.core.display.Javascript object>

In [3]:
netflix.head()

Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,25.9635,26.28,25.7157,25.8528,25649820,NFLX
1,2013-02-11,25.5685,26.0071,24.9714,25.4128,29321782,NFLX
2,2013-02-12,25.8085,26.2228,25.1014,25.4214,34388802,NFLX
3,2013-02-13,25.8428,26.6285,25.6657,26.6098,40799094,NFLX
4,2013-02-14,26.7557,27.1214,26.3844,26.7714,31968685,NFLX


<IPython.core.display.Javascript object>

### Transform the data by shifting the series and creating features that will allow us to forecast the price 30 days into the future from 90 days of daily history.

In [4]:
history = 90
future = 30

shifts = [x + future for x in list(range(1, history + 1))]

for shift in shifts:
    netflix["t-" + str(shift)] = netflix["close"].shift(shift)

netflix.dropna(inplace=True)

<IPython.core.display.Javascript object>

In [5]:
netflix.head()

Unnamed: 0,date,open,high,low,close,volume,Name,t-31,t-32,t-33,...,t-111,t-112,t-113,t-114,t-115,t-116,t-117,t-118,t-119,t-120
120,2013-08-01,35.22,35.7142,34.9385,35.5885,14205926,NFLX,32.69,32.7471,30.57,...,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098,25.4214,25.4128,25.8528
121,2013-08-02,35.62,35.62,34.9714,35.1685,13879663,NFLX,33.1871,32.69,32.7471,...,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098,25.4214,25.4128
122,2013-08-05,34.8443,36.2714,34.4828,36.2628,22967000,NFLX,31.9314,33.1871,32.69,...,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098,25.4214
123,2013-08-06,36.2785,36.8,35.7285,36.5571,20415983,NFLX,30.9857,31.9314,33.1871,...,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098
124,2013-08-07,36.3371,36.7814,35.5228,35.6014,16330398,NFLX,30.8,30.9857,31.9314,...,26.8685,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714


<IPython.core.display.Javascript object>

### Split the data into a training set and a testing set. Make the test set size 20%.

In [13]:
X = netflix.drop(columns=["date", "open", "high", "low", "close", "volume", "Name"])
y = netflix["close"]

<IPython.core.display.Javascript object>

In [14]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=13
)

<IPython.core.display.Javascript object>

### Instantiate an AdaBoost model and fit it to the training set.

In [15]:
model = AdaBoostRegressor()
model.fit(X_train, y_train)

AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
                  n_estimators=50, random_state=None)

<IPython.core.display.Javascript object>

### Generate predictions for the test set.

In [16]:
predictions = model.predict(X_test)

<IPython.core.display.Javascript object>

### Evaluate the results using R-Squared, Mean Absolute Error, and Root Mean Squared Error metrics.

In [19]:
print("R-Squared:" + str(r2_score(y_test, predictions)))
print("Mean Absolute Error:" + str(mean_absolute_error(y_test, predictions)))
print(
    "Root Mean Squared Error:" + str(np.sqrt(mean_squared_error(y_test, predictions)))
)

R-Squared:0.9610021438778232
Mean Absolute Error:8.131869280879384
Root Mean Squared Error:9.754164405796708


<IPython.core.display.Javascript object>

### Visually examine the results by creating a scatter plot where the x axis represents the observed results and the y axis represents the predictions.

In [25]:
results = pd.DataFrame([list(y_test), predictions], index=["Actual", "Predicted"]).T

<IPython.core.display.Javascript object>

In [27]:
iscatter(results, y_test, predictions)

<IPython.core.display.Javascript object>