# Introduction to Support Vector Machines with a Trading Example

This notebook is about how to implement a Support Vector Machines model in a financial markets example, taken from [this blog post on 'Towards Data Science'](https://towardsdatascience.com/intro-to-support-vector-machines-with-a-trading-example-1d4a7997ced6).

In [1]:
from bs4 import BeautifulSoup
import datetime, json
import numpy as np
import pandas as pd
import requests, time, warnings
warnings.simplefilter('ignore')

In [2]:
# requires the TA-lib package
import talib as ta
from talib import MA_Type

In [3]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn import metrics, svm

In [4]:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import matplotlib.ticker as ticker

In [5]:
Dow_Page = requests.get('https://finance.yahoo.com/quote/%5EDJI/components?p=%5EDJI')
Dow_Content = Dow_Page.content

In [11]:
soup = BeautifulSoup(Dow_Content)
data = list(soup.findAll("td",{"class":"Py(10px) Ta(start) Pend(10px)"}))

In [12]:
Ticker_List = []
for i in data:
    TempData = str(i)
    if "title" in TempData:
        TempData = TempData[TempData.find("title"):]
        TempData = TempData[TempData.find(">")+1:TempData.find("<")]
        Ticker_List.append(TempData)
    else:
        continue

In [15]:
Start_Date = int(time.mktime((2014,10,28,4,0,0,0,0,0)))
End_Date = int(time.mktime((2019,10,28,4,0,0,0,0,0)))

In [13]:
def ScrapeYahoo(data_df,ticker, start, end):
    
    # form the URL to be scraped
    Base_Url = 'https://query1.finance.yahoo.com/v8/finance/chart/'
    Scrape_Url = Base_Url + ticker + "?period1=" + str(start)+"&period2="+str(end)+"&interval=1d"
    
    # get data from page
    r = requests.get(Scrape_Url)
    Page_Data = r.json()
    
    # compile data into a DataFrame
    Stock_df = pd.DataFrame()
    Stock_df['DateTime'] = Page_Data['chart']['result'][0]['timestamp']
    Stock_df['DateTime'] = Stock_df['DateTime'].apply(lambda x: datetime.datetime.fromtimestamp(x).date().isoformat())
    Stock_df["Open"] = Page_Data["chart"]["result"][0]["indicators"]["quote"][0]["open"]
    Stock_df["High"] = Page_Data["chart"]["result"][0]["indicators"]["quote"][0]["high"]
    Stock_df["Low"] = Page_Data["chart"]["result"][0]["indicators"]["quote"][0]["low"]
    Stock_df["Close"] = Page_Data["chart"]["result"][0]["indicators"]["quote"][0]["close"]
    Stock_df["Volume"] = Page_Data["chart"]["result"][0]["indicators"]["quote"][0]["volume"]
    Stock_df = Stock_df.set_index("DateTime")
    
    # add data to a dictionary containing all values
    data_df[ticker] =  Stock_df

In [22]:
Stock_Data = {}

for i in Ticker_List:
    ScrapeYahoo(Stock_Data, i, Start_Date, End_Date)
#     print(i + " done")
#     time.sleep(0.5)

In [23]:
Stock_Data

{'V':                   Open        High         Low       Close    Volume
 DateTime                                                            
 2014-10-28   53.652500   54.215000   53.514999   54.177502  10497600
 2014-10-29   54.077499   54.599998   53.437500   53.665001  16095200
 2014-10-30   57.132500   59.820000   57.005001   59.162498  50857600
 2014-10-31   59.322498   60.625000   59.322498   60.357498  24956000
 2014-11-03   60.244999   60.799999   59.750000   60.384998  12716000
 ...                ...         ...         ...         ...       ...
 2019-10-22  177.000000  177.789993  170.779999  170.860001  10094700
 2019-10-23  170.990005  172.169998  170.179993  171.320007   7122300
 2019-10-24  173.419998  176.869995  173.399994  176.160004   7872600
 2019-10-25  174.929993  179.149994  173.500000  177.850006  10125100
 2019-10-28  178.929993  180.179993  178.350006  179.839996  10084200
 
 [1259 rows x 5 columns],
 'MCD':                   Open        High         Low   