# Challenge: Backtest on Other Datasets

## Download data from `yfinance`

In [1]:
import yfinance as yf

In [2]:
df_netflix = yf.download(tickers='NFLX')

[*********************100%%**********************]  1 of 1 completed


In [3]:
df_netflix.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-12-20,492.0,500.890015,488.390015,489.269989,489.269989,4563700
2023-12-21,492.660004,494.399994,486.769989,491.609985,491.609985,2756200
2023-12-22,494.0,496.019989,485.450012,486.76001,486.76001,2701100
2023-12-26,489.390015,491.480011,486.380005,491.190002,491.190002,2034500
2023-12-27,491.23999,494.019989,489.25,491.790009,491.790009,2559900


## Preprocess the data

### Filter the date range

- Since 1 year ago at least

In [4]:
df_netflix_1year = df_netflix.loc['2022-12-28':, :].copy()

### Create the target variable

#### Percentage change

- Percentage change on `Adj Close` for tomorrow

In [5]:
df_netflix_1year['change_tomorrow'] = df_netflix_1year['Adj Close'].pct_change(-1) * 100 * -1

#### Drop rows with any missing data

In [6]:
df_netflix_1year = df_netflix_1year.dropna().copy()

#### Change sign

Did the stock go up or down?

In [8]:
import numpy as np
df_netflix_1year['change_tomorrow_direction'] = np.where(df_netflix_1year.change_tomorrow > 0, 'UP', 'DOWN')

## Compute Machine Learning model

Proposal: Random Forest within `ensemble` module of `sklearn` library

In [10]:
target = df_netflix_1year.change_tomorrow_direction
explanatory = df_netflix_1year.drop(columns=['change_tomorrow_direction', 'change_tomorrow'])

In [9]:
from sklearn.ensemble import RandomForestClassifier

In [11]:
model_dt = RandomForestClassifier()

In [12]:
model_dt.fit(X=explanatory, y=target)

## Backtesting

### Create the Strategy

In [13]:
from backtesting import Backtest, Strategy

In [14]:
class ClassificationUP(Strategy):
  def init(self):
    self.model = model_dt
    self.already_bought = False

  def next(self):
      explanatory_today = self.data.df.iloc[[-1],:]
      forecast_tomorrow = model_dt.predict(explanatory_today)[0]
      
      # conditions to sell or buy
      if forecast_tomorrow =='UP' and self.already_bought==False:
          self.buy()
          self.already_bought = True

      elif forecast_tomorrow =='DOWN' and self.already_bought:
          self.sell()
          self.already_bought = False

      else:
          pass

### Run the Backtest

In [15]:
bt = Backtest(
    data=explanatory, strategy=ClassificationUP, cash=10000,
    commission=.002, exclusive_orders=True
)

### Show the report in a DataFrame

In [19]:
results = bt.run()

In [20]:
results.to_frame(name='Values').loc[:'Return [%]']

Unnamed: 0,Values
Start,2022-12-28 00:00:00
End,2023-12-26 00:00:00
Duration,363 days 00:00:00
Exposure Time [%],99.2
Equity Final [$],126518.993654
Equity Peak [$],127442.170147
Return [%],1165.189937


## Plot the backtest report

> Don't worry about this new tool just yet, we will explain in a future chapter how to interpret the following chart.

In [21]:
bt.plot(filename='backtest_report.html')

## How to invest based on the numerical increase?

> Instead of the direction (UP or DOWN)

Next chapter → [Backtesting with Regression Models]()

Classification Model | Regression Model
-|-
![](src/pred_classification.png) | ![](src/pred_regression.png)

Classification Strategy | Regression Strategy
-|-
![](src/res_classification.png) | ![](src/res_regression.png)