# Machine Learning to predict Down Jones Industrial Average

This simple example shows how to predict ^DJI price based on the past averages.

## Setup

In [1]:
# Update TuriCreate. Last updated November 4, 2020

# !pip install --upgrade pip
# !pip install Turicreate

In [2]:
import turicreate as tc

In [3]:
data_path="./DATA/processed/^DJI.csv"

In [4]:
# Load the data
data =  tc.SFrame(data_path)
data[363:373]

------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[int,str,float,float,float,float,float,float]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


Day,Date,High,Avg005,Avg030,Avg090,Avg180,Avg365
725033,1986-01-27,-125.0,-125.2,-125.1,-125.66,-126.33,0.0
725034,1986-01-28,-125.0,-125.0,-125.1,-125.63,-126.32,-126.98
725035,1986-01-29,-125.0,-125.0,-125.1,-125.61,-126.31,-126.97
725036,1986-01-30,-125.0,-125.0,-125.1,-125.59,-126.29,-126.96
725037,1986-01-31,-125.0,-125.0,-125.1,-125.57,-126.28,-126.96
725038,1986-02-01,-125.0,-125.0,-125.1,-125.54,-126.27,-126.95
725039,1986-02-02,-125.0,-125.0,-125.1,-125.52,-126.26,-126.94
725040,1986-02-03,-125.0,-125.0,-125.1,-125.5,-126.25,-126.93
725041,1986-02-04,-125.0,-125.0,-125.1,-125.48,-126.24,-126.92
725042,1986-02-05,-125.0,-125.0,-125.1,-125.46,-126.23,-126.92


## Select the data to train and test

In [5]:
# Do not take initial year data as averages are not complete
data = data[365:13063] 
# Make a train-test split
train_data, test_data = data.random_split(0.8)

## Create the model

In [14]:
# Automatically picks the right model based on your data.
# target: is the number to be predicted.
# features: are the the values that we ues to try to find pattern leading to prediciton.
model = tc.regression.create(
    train_data, 
    target='High',
    features = [
        'Avg005',
        'Avg030',
        'Avg090',
        'Avg180',
        'Avg365'
    ])

# Predict values on data that was NOT used in training

In [9]:
#test_data.explore()
test_data

Day,Date,High,Avg005,Avg030,Avg090,Avg180,Avg365
725039,1986-02-02,-125.0,-125.0,-125.1,-125.52,-126.26,-126.94
725048,1986-02-11,-125.0,-125.0,-125.1,-125.36,-126.16,-126.87
725052,1986-02-15,-124.0,-124.2,-124.93,-125.27,-126.09,-126.82
725070,1986-03-05,-124.0,-124.0,-124.27,-124.87,-125.79,-126.64
725073,1986-03-08,-124.0,-124.0,-124.17,-124.8,-125.74,-126.6
725074,1986-03-09,-124.0,-124.0,-124.13,-124.78,-125.73,-126.59
725086,1986-03-21,-123.0,-123.0,-123.63,-124.49,-125.47,-126.43
725092,1986-03-27,-123.0,-123.0,-123.43,-124.36,-125.33,-126.35
725095,1986-03-30,-123.0,-123.0,-123.33,-124.29,-125.27,-126.31
725102,1986-04-06,-123.0,-123.0,-123.1,-124.13,-125.11,-126.21


In [7]:
## Save predictions to an SArray
predictions = model.predict(test_data)
#predictions

In [8]:
## Print example preditions from the begining to the end every 100th value.
for id in range(0, 2531, 100):
    a = round( predictions[id], 2)
    b = test_data[id]["High"]
    print( "predicted ", a, ", but actual value was ", b , " difference is ", round(b-a, 2) ) # dict

predicted  -124.99 , but actual value was  -125.0  difference is  -0.01
predicted  -117.94 , but actual value was  -118.0  difference is  -0.06
predicted  -119.99 , but actual value was  -120.0  difference is  -0.01
predicted  -116.01 , but actual value was  -116.0  difference is  0.01
predicted  -112.58 , but actual value was  -113.0  difference is  -0.42
predicted  -111.11 , but actual value was  -111.0  difference is  0.11
predicted  -103.96 , but actual value was  -104.0  difference is  -0.04
predicted  -100.14 , but actual value was  -100.0  difference is  0.14
predicted  -84.57 , but actual value was  -85.0  difference is  -0.43
predicted  -58.89 , but actual value was  -59.0  difference is  -0.11
predicted  -38.92 , but actual value was  -39.0  difference is  -0.08
predicted  -44.09 , but actual value was  -44.0  difference is  0.09
predicted  -45.74 , but actual value was  -46.0  difference is  -0.26
predicted  -52.09 , but actual value was  -52.0  difference is  0.09
predicted

In [15]:
# Evaluate the model and save the results into a dictionary
results = model.evaluate( test_data ) #test_data[0:2531]
results

{'max_error': 10.599606912176988, 'rmse': 0.9890225923076993}

## Save the model for future use in MacOS, iOS, etc. applications

In [12]:
# Export to Core ML
model.export_coreml('./DATA/models/^DJI.csv')