# Prediction Model using TuriCreate in Julia

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Set-Up" data-toc-modified-id="Set-Up-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Set Up</a></span></li><li><span><a href="#Fetch-Data-(SFrame)-from-Uber-CSV" data-toc-modified-id="Fetch-Data-(SFrame)-from-Uber-CSV-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Fetch Data (SFrame) from Uber CSV</a></span></li><li><span><a href="#Split-Data" data-toc-modified-id="Split-Data-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Split Data</a></span></li></ul></div>

## Set Up

In [1]:
## uncomment for the first run
# import Pkg
# Pkg.add("PyCall")
# Pkg.add("Conda")
# ENV["PYTHON"] = "/opt/anaconda3/envs/turi/bin/python"
# /opt/anaconda3/envs/turi/lib/python3.6/site-packages/
# Pkg.build("PyCall")

In [2]:
using PyCall
tc = pyimport("turicreate")

PyObject <module 'turicreate' from '/opt/anaconda3/envs/turi/lib/python3.6/site-packages/turicreate/__init__.py'>

## Fetch Data (SFrame) from Uber CSV

In [16]:
data_path="../DATA/processed/uber.csv"
data = tc.SFrame(data_path)
println()

Finished parsing file /Users/uki/REPOS/MarketIndicators.jl/src/DATA/processed/uber.csv
Parsing completed. Parsed 100 lines in 0.034016 secs.
------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[int,str,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float,float]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
Finished parsing file /Users/uki/REPOS/MarketIndicators.jl/src/DATA/processed/uber.csv
Parsing completed. Parsed 1820 lines in 0.008586 secs.



In [34]:
# Do not take initial year data as averages are not complete
row = get(data, 1743)

record_count = size(data)[1]
feature_number = row.count
day = row["Day"]
date = row["Date"] # "2020-10-20"

println( record_count )
println( feature_number )
println( day )
println( date )

1820
33
737718
2020-10-20


## Split Data

In [6]:
# Make a train-test split
train_data, test_data = data.random_split(0.8)
println()




In [15]:
println( size(train_data) )
println( size(test_data)  )

(1490,)
(330,)


In [8]:
column_to_predict = "DJIA_Original"

model = tc.regression.create( 
    train_data, 
    target = column_to_predict, 
    features = [
                 #"DJIA_Avg005"
                "DJIA_Avg030"
                ,"DJIA_Avg090"
                ,"DJIA_Avg180"        
                ,"DJIA_Avg365"

                ,"ISM_MFC_EMP_Avg090"
                ,"ISM_MFC_EMP_Avg180"
                ,"ISM_MFC_EMP_Avg365"

                ,"HOUSE_SRT_MM_Value"
                ,"HOUSE_SRT_MM_Avg090"
                ,"HOUSE_SRT_MM_Avg180"
                ,"HOUSE_SRT_MM_Avg365"

                ,"MFC_MPI_Value"
                ,"MFC_MPI_Avg090"
                ,"MFC_MPI_Avg180"
                ,"MFC_MPI_Avg365"
        
                ,"VIX_Avg005"
                ,"VIX_Avg030"
                ,"VIX_Avg090"
                ,"VIX_Avg180"        
                ,"VIX_Avg365"
            ], 
    validation_set="auto", 
    verbose=true
)

LoadError: PyError ($(Expr(:escape, :(ccall(#= /Users/uki/.julia/packages/PyCall/BcTLp/src/pyfncall.jl:43 =# @pysym(:PyObject_Call), PyPtr, (PyPtr, PyPtr, PyPtr), o, pyargsptr, kw))))) <class 'turicreate.toolkits._main.ToolkitError'>
ToolkitError("Input data does not contain the following columns: ['HOUSE_SRT_MM_Value', 'VIX_Avg005', 'MFC_MPI_Avg090', 'HOUSE_SRT_MM_Avg180', 'HOUSE_SRT_MM_Avg365', 'MFC_MPI_Avg365', 'MFC_MPI_Value', 'ISM_MFC_EMP_Avg365', 'MFC_MPI_Avg180', 'HOUSE_SRT_MM_Avg090', 'ISM_MFC_EMP_Avg180', 'ISM_MFC_EMP_Avg090']",)
  File "/opt/anaconda3/envs/turi/lib/python3.6/site-packages/turicreate/toolkits/regression/_regression.py", line 108, in create
    dataset, validation_set = _validate_data(dataset, target, features, validation_set)
  File "/opt/anaconda3/envs/turi/lib/python3.6/site-packages/turicreate/toolkits/_internal_utils.py", line 742, in _validate_data
    dataset = _toolkits_select_columns(dataset, features + [target])
  File "/opt/anaconda3/envs/turi/lib/python3.6/site-packages/turicreate/toolkits/_internal_utils.py", line 573, in _toolkits_select_columns
    + "{}".format(missing_features)


In [9]:
## Save predictions to an SArray
predictions = model.predict(test_data)
#predictions

LoadError: UndefVarError: model not defined

In [10]:
initio = 1                        # latin: start
gradus = 30                       # latin: step
finem  = size(predictions)[1] -1  # latin: end

println(column_to_predict, " ", finem)

x_axis_ids        = Vector{Int64}() # Array{Int64,1}
y_axis_preditions = Vector{Float64}()
y_axis_actuals    = Vector{Float64}()

for id in initio:finem
    push!(x_axis_ids, id )
    
    a = get(predictions, id)
    append!(y_axis_preditions, round(a, digits=1))
    
    b = get(test_data, column_to_predict )
    b = get(b, id )
    append!(y_axis_actuals, round(b, digits=1))
    #println( "predicted ", round(a, digits=1) , "\t but actual value was \t", round(b, digits=1) , "\t difference is ",  round(b-a, digits=2)  ) # dict
end


for id in initio:gradus:finem
    a = y_axis_preditions[id]
    b = y_axis_actuals[id]
    println( "predicted ", a , "\t but actual value was \t", b , "\t difference is ",  round(b-a, digits=2)  ) # dict
end

LoadError: UndefVarError: predictions not defined

In [11]:
using Plots

#dates = format_dates( df[rows,2] , "m/d/yy")

gr()
plot(    x_axis_ids,
        [  y_axis_preditions y_axis_actuals
        ], # y-axis
    label    = 
        [ "DJIA preditions" "DJIA actuals" ""  ],
    legend   =:topleft, 
              # :right, :left, :top, :bottom, :inside, :best, :legend, :topright, :topleft, :bottomleft, :bottomright
    xlabel   = "samples",
    ylabel   = "indicators",
    size     = (980, 400), # width, height
    layout = (1, 1) # number of graphs: vertically, horizontally
    )

LoadError: UndefVarError: y_axis_preditions not defined

In [12]:
#TODO: write this in a loop to select the best model
# Evaluate the model and save the results into a dictionary
results = model.evaluate( test_data ) #test_data[0:2531]
results

LoadError: UndefVarError: model not defined

In [13]:
rmse = results["rmse"]
rmse = round(rmse, digits=2)

max_error = results["max_error"]
max_error = round(max_error, digits=2)

println( "max_error: ", max_error, ", rmse: ", rmse )
# max_error: 1069.27, rmse: 184.6

LoadError: UndefVarError: results not defined

In [14]:
# Export to Core ML
model.export_coreml("../DATA/models/^DJI.mlmodel")

LoadError: UndefVarError: model not defined