In [None]:
using Pkg
Pkg.update()

[32m[1m   Updating[22m[39m registry at `~/.julia/registries/General`
######################################################################### 100.0%


In [None]:

pkgList = ["MarketData","Sockets","DataFramesMeta","CSV","Flux","HTTP","JSON","DataFrames","Plots","TimeSeries","Hyperopt","JSON"]
for pkg in pkgList
    Pkg.add(pkg)  
end

In [None]:
using TimeSeries
using DataFrames,DataFramesMeta
using Dates
using Statistics
using Flux
using Flux: @epochs
using Plots
using Hyperopt
using JSON
using HTTP
using MarketData
using Random

In [None]:
function toPL(PL_ts)
    function f_PL(values)
        open = values[1]
        close = values[2]
        if close - open > 0
            pl = 1
        else
            pl = 0
        end
        [pl,0]
    end     
    PL = TimeSeries.rename(TimeSeries.map((timestamp, values) -> (timestamp, f_PL(values)),PL_ts)[:Open],Symbol("PL") )
    PL = TimeSeries.map((timestamp, values) -> (timestamp, Int(values)),PL)
end

In [None]:
function toReturn(price_ts)
    pct = TimeSeries.rename(percentchange(price_ts),[:return])
    pct = moving(mean,pct,5)
    return TimeSeries.map((timestamp, values) -> (timestamp, 100*values), pct)
end  

In [None]:
function toRSI(price_ts,loopback)
    pct = percentchange(price_ts)
    up = map( (timestamp, values) -> (timestamp, if values[1] < 0 ; [0] ; else ;values ;end), pct)
    down = map( (timestamp, values) -> (timestamp, if values[1] < 0 ; abs.(values) ; else ; [0] ;end), pct)
    up_roll = moving(mean, up, loopback)
    down_roll = moving(mean, down, loopback)
    function f_rsi(values)
        x = values[1]
        y = values[2]
        rsi = 100 - (100 / (1 + x/y) )
        [rsi,0]
    end
    updown = TimeSeries.rename(TimeSeries.merge(up_roll,down_roll), [:up,:down])
    rsi_ts = TimeSeries.rename(TimeSeries.map((timestamp, values) -> (timestamp, f_rsi(values)), updown)[:up],Symbol("RSI-",loopback))
    return rsi_ts
end

In [None]:
function toORSI(price_ts,day0,day1)
    rsi_ts = toRSI(price_ts,day0)
    rsi_ma = TimeSeries.rename(moving(mean,rsi_ts,day1),Symbol("RSIMA-",day0,"-",day1))
    orsi = TimeSeries.rename(rsi_ts .- rsi_ma , Symbol("ORSI-",day0,"-",day1) )
end

In [None]:
function toMACD(price_ts,day0,day1)
    ma1 = moving(mean,price_ts,day0) 
    ma2 = moving(mean,price_ts,day1)
    TimeSeries.rename(ma1 .- ma2,Symbol("MACD-",day0,"-",day1)) 
end


In [None]:
function toUpBB(price_ts,day0,day1)
    ma1 = moving(mean,price_ts,day0) 
    ma2 = moving(mean,price_ts,day1)
    sgm = moving(std,price_ts,day1)
    TimeSeries.rename(ma1 .- (ma2 .+ (2 .* sgm)) ,  Symbol("UpBB-",day0,"-",day1))
end


In [None]:
function toDownBB(price_ts,day0,day1)
    ma1 = moving(mean,price_ts,day0) 
    ma2 = moving(mean,price_ts,day1)
    sgm = moving(std,price_ts,day1)
    TimeSeries.rename(ma1 .- (ma2 .- (2 .* sgm)),  Symbol("DownBB-",day0,"-",day1))
end

In [None]:
function catFeatures(price_ts,day0,day1,delta)
    result = toReturn(price_ts)
    for f in [ toORSI, toMACD, toUpBB, toDownBB ]
        for i in day0:day1
            for j in day0+delta:day1+delta
                feature1 = f(price_ts,i,j)
                result = merge(result,feature1,method=:inner)
            end
        end
    end
    #println(colnames(result))
    return result
end

In [None]:
function toMatrix(data,featureIdx,targetIdx)
    #featureIdx=3
    data = values(data)
    X = data[:,featureIdx:end]
    print(size(X))
    N = size(X)[1]
    M = Int(size(X)[2]/4)
    k = Int(sqrt(M))
    X = transpose(X)
    X = reshape(X,(k,k,4,N))
    #targetIdx=1
    Y = data[:,targetIdx]
    return (X,Y)
end

In [None]:
function preprocessing(price_ts::TimeArray,day0,day1,delta)

    featureList = catFeatures(price_ts,day0,day1,delta)
    featureIdx=2
    targetIdx=1
    t_idx = timestamp(featureList)
    (X,Y) = toMatrix(featureList,2,1)
    return (X,Y,t_idx)
end

In [None]:
function timeseriesDataset(X,Y,seqlen,t_idx)
    xtrain = Array{Float32,4}[]
    ytrain = Array{Float32,1}[]
    time_idx = DateTime[]
    len = length(Y)
    for i in 1:len-seqlen
        mx = X[:,:,:,i:i+seqlen-1]
        xtrain = vcat(xtrain,[mx])
        my = Y[i+seqlen]
        ytrain = vcat(ytrain,[my])
        time_idx = vcat(time_idx, [t_idx[i+seqlen]])
    end
    xcurrent = [X[:,:,:,end-seqlen+1:end]]
    return (xtrain,ytrain,xcurrent,time_idx)
end

In [None]:
function build_reg_model(Nh,seqlen)
    return Chain(

    # First convolution
    Conv((2, 2), 4=>Nh, pad=(1,1), relu),
    MaxPool((2,2)),

    # Second convolution
    Conv((2, 2), Nh=>Nh, pad=(1,1), relu),
    MaxPool((2,2)),

    # Third convolution
    Conv((2, 2), Nh=>Nh, pad=(1,1), relu),
    MaxPool((2,2)),

    Flux.flatten,
    Dropout(0.1),
    (x->transpose(x)),
    GRU(seqlen,Nh),
    GRU(Nh,Nh),
    (x -> x[:,end]),
    Dense(Nh, 1),
    (x -> x[1])
    )
end

In [None]:
function reg_accuracy(m,xtest,ytest)
    prediction = m.(xtest)
    len = length(ytest)
    m = zeros(2,2)
    for i in 1:len
        yh = prediction[i] > 0 ? 1 : 0
        y = ytest[i] > 0 ? 1 : 0
        a = Int(yh[1])+1
        b = y+1
        m[a,b] += 1
    end
    m = m ./ sum(m)
    return m
end

function plot_eval(m,xtest,ytest)
    prediction = m.(xtest)
    len = length(ytest)
    #println(len)
    pred = Float32[]
    actual = Float32[]
    total = 0
    for i in 1:len
        yh = Flux.onecold(prediction[i],[0,1])
        y = Flux.onecold(ytest[i],[0,1])
        append!(pred,yh)
        append!(actual,y)
    end
    plot(1:len,pred,size = (1000, 700),color="red",tickfontsize=28,guidefontsize=28,legendfontsize=28)
    plot!(1:len,actual,size = (1000, 700),color="blue",tickfontsize=28,guidefontsize=28,legendfontsize=28)
end

In [None]:
function loadData(sym,period)
    t = Dates.now()
    ts = yahoo(sym, YahooOpt(period1 = t - Year(period), period2 = t) )
    writetimearray(ts, sym * "-price.csv")
    price_ts = ts[:,:Close]
    return Float32.(price_ts)
end

In [None]:
function evaluateTradeBak(sym,ta)
    t_idx = timestamp(ta)
    t1 = t_idx[1]
    t2 = t_idx[end]
    ts = Float32.(readtimearray(sym * "-price.csv"))
    ts3 = ts[:Open,:Close,:High]
    prev_high = TimeSeries.rename(lag(ts[:High]), [:LagHigh])
    ts4 = merge(ts3, prev_high, method=:inner)
    ts4 = TimeArray(DateTime.(timestamp(ts4)),values(ts4),colnames(ts4))
    pos = TimeSeries.rename(map( (timestamp, values) -> ( timestamp, if values[1] <= 0 ; 0 ; else ;values ;end), ta), [:pos])
    trade = merge(ts4,pos,method=:inner)
    function calcPL(values)
        o = values[1]
        c = values[2]
        h = values[3]
        pred = values[4]*(1+values[5]/100)
        if pred > 0 && pred > o
            if pred <= h
                PL = pred - o
                
            else
                PL = c - o
                
            end
        else
            PL = 0
        end
        return [PL,c-o,0,0,0]
    end
        
    PL = map( (timestamp, values) -> (timestamp, calcPL(values)), trade)
    PL = TimeSeries.rename(PL,[:PL,:base,:C3,:C4,:C5])
    return (round(sum(values(PL[:PL])),digits=2) , round(sum(values(PL[:base])),digits=2))
end

In [None]:
function evaluateTradeBak2(sym,ta)
    t_idx = timestamp(ta)
    t1 = Date(t_idx[1])
    t2 = Date(t_idx[end])
    ts = readtimearray(sym * "-price.csv")
    
    ts = Float32.(ts[t1:Day(1):t2])
    diff = map( (timestamp, values) -> (timestamp, [values[2]-values[1],0]), ts[:,[:Open,:Close]])
    PL = TimeSeries.rename(diff,[:PL,:Null])[:,:PL]
    pos = map( (timestamp, values) -> (timestamp, if values[1] <= 0 ; 0 ; else ;1 ;end), ta)
    trade = pos .* PL
    return (round(sum(values(trade)),digits=2) , values(ts[end,:Close])-values(ts[1,:Close]),round(sum(values(PL)),digits=2) )
end

In [None]:
function evaluateTradeBak3(sym,ta)
    t_idx = timestamp(ta)
    t1 = Date(t_idx[1])
    t2 = Date(t_idx[end])
    ts = readtimearray(sym * "-price.csv")
    pct = TimeSeries.rename(percentchange(ts[:Close]),[:return])
    pct = Float32.(pct[t1:Day(1):t2])
    m = zeros(2,2)
    prediction = values(ta)
    ytest = values(pct)
    len = length(prediction)
    for i in 1:len
        yh = prediction[i] > 0 ? 1 : 0
        y = ytest[i] > 0 ? 1 : 0
        a = Int(yh[1])+1
        b = y+1
        m[a,b] += 1
    end
    m = m ./ sum(m)
    @show m
    accuracy = m[1,1]+m[2,2]
    @show accuracy
    pos = map( (timestamp, values) -> (timestamp, if values[1] <= 0 ; 0 ; else ;1 ;end), ta)
    trade = pos .* pct
    return (round(sum(values(trade)),digits=2) ,round(sum(values(pct)),digits=2),accuracy )
end

In [None]:
function backtesting(sym,ta)
    pos = map( (timestamp, values) -> (timestamp, if values[1] < values[2] ; [0,0] ; else ; [1,1] ;end), merge(ta,lag(ta)) )
    pos = pos[:,colnames(pos)[1]]
    t_idx = timestamp(pos)
    t1 = t_idx[1]
    t2 = t_idx[end]
    ts = readtimearray(sym * "-price.csv")
    ts = TimeArray(DateTime.(timestamp(ts)),values(ts),colnames(ts))
    pct = TimeSeries.rename(percentchange(ts[:Close]),[:return])
    pct = Float32.(pct[t1:Day(1):t2])
    m = zeros(2,2)
    prediction = values(pos)
    ytest = values(pct)
    len = length(prediction)
    for i in 1:len
        yh = prediction[i]
        y = ytest[i] > 0 ? 1 : 0
        a = Int(yh[1])+1
        b = y+1
        m[a,b] += 1
    end
    m = m ./ sum(m)
    @show m
    accuracy = m[1,1]+m[2,2]
    @show accuracy
    trade = pos .* pct
    return (round(sum(values(trade)),digits=2) ,round(sum(values(pct)),digits=2),accuracy )
end

In [None]:
function myTrain(price_ts::TimeArray,sym,seqlen,Nh,lr,mm,day0,delta,i)
    Random.seed!(1234);
    #seqlen=7
    day1 = day0 + delta - 1
    (X,Y,t_idx) = preprocessing(price_ts,day0,day1,delta)
    (xtrain,ytrain,xcurrent,time_idx) = timeseriesDataset(X,Y,seqlen,t_idx)
    thd = 150
    xtest,ytest = xtrain[end-thd+1:end], ytrain[end-thd+1:end]
    time_test = time_idx[end-thd+1:end]
    xtrain,ytrain = xtrain[1:end-thd], ytrain[1:end-thd]
    batchsize = 20
    train_loader = Flux.Data.DataLoader(xtrain,ytrain, batchsize=batchsize,shuffle=false)
    #Nh=10
    m = build_reg_model(Nh,seqlen)
    function acc_loss(x,y)
        (acc,y0,y1,y2) = accuracy(m,x,y)
        return (1-acc-y0+y1)*abs(y2-0.5)
    end
    function mse_loss(x,y)
        yh = m.(x)
        e = Flux.mae(yh,y)
        return e
    end
    evalcb() = @show mse_loss(xtest,ytest)
    num_epoch = 25
    #lr = 0.01
    @epochs num_epoch Flux.train!(mse_loss,Flux.params(m),train_loader,RMSProp(lr,mm))
    confmx = reg_accuracy(m,xtest,ytest)
    prediction = m.(xtest)
    println(size(prediction))
    mseloss = mse_loss(xtest,ytest)
    println("mse=",mseloss)
    objcost = mseloss-1*(confmx[1,1]+confmx[2,2]-confmx[1,2]-confmx[2,1])
    plot(1:length(prediction),prediction,size = (1000, 700),color="red",tickfontsize=28,guidefontsize=28,legendfontsize=28)
    plot!(1:length(ytest),ytest,size = (1000, 700),color="blue",tickfontsize=28,guidefontsize=28,legendfontsize=28)
    png("plot-"*string(i)*".png")
    ta = TimeArray(time_test, prediction)
    @show backtesting(sym,ta)
    println("current size:", size(xcurrent[1]))
    future = m.(xcurrent)
    future = future[1]
    println("future:",future)
    println("accuracy:",(confmx[1,1]+confmx[2,2]))
    println(confmx[1,:])
    println(confmx[2,:])
    
    return (m,objcost,xtest,ytest,confmx,future,ta)
end
function myObjective(price_ts,sym,seqlen,Nh,lr,mm,day0,delta,i)
    (m,objcost,xtest,ytest,confmx,future,ta) = myTrain(price_ts,sym,seqlen,Nh,lr,mm,day0,delta,i)
    Flux.reset!(m)
    return objcost
end

In [None]:
function hyperTune(sym::String)
    #sym="GLD"
    price_ts = loadData(sym,3)
    
    ho = @hyperopt for i=20,
                sampler = RandomSampler(), 
                seqlen = StepRange(3, 5, 20),
                Nh = StepRange(5,3, 20),
                delta = StepRange(10,3, 20),
                lr =  exp10.(LinRange(-4,-3,10)),
                mm =  LinRange(0.75,0.95,5),
                day0 = StepRange(5,3, 10)
        println(i,"-",seqlen,"-",Nh,"-",lr,"-",mm,"-",day0,"-",delta)
    @show myObjective(price_ts,sym,seqlen,Nh,lr,mm,day0,delta,i)
    end

    best_params, min_f = ho.minimizer, ho.minimum
    
    
    @show min_f
    seqlen=best_params[1]
    Nh=best_params[2]
    delta=best_params[3]
    lr=best_params[4]
    mm=best_params[5]
    day0=best_params[6]
    println(seqlen,"-",Nh,"-",lr,"-",mm,"-",day0,"-",delta)
    (m,acc,xtest,ytest,confmx,future,ta) = myTrain(price_ts,sym,seqlen,Nh,lr,mm,day0,delta,21)
    @show evaluateTrade(sym,ta)
end

In [None]:
function trainPredict(sym::String)
    #20-13-20-0.000774263682681127-0.8-5-15
    seqlen=13
    Nh=20
    delta=15
    lr=0.000774
    mm=0.8
    day0=5
    println(seqlen,"-",Nh,"-",lr,"-",mm,"-",day0,"-",delta)
    df = DataFrame(symbol = String[], accuracy = Float32[], future = Float32[])
    price_ts = loadData(sym,3)
    (m,cost,xtest,ytest,confmx,future,ta) = myTrain(price_ts,sym,seqlen,Nh,lr,mm,day0,delta,sym)
    @show backtesting(sym,ta)
    acc = (confmx[1,1]+confmx[2,2])
    push!(df,[sym,acc,future])
    println(df)
    return df
end

In [None]:

trainPredict("GLD")

In [None]:
hyperTune("AAPL")

In [None]:
sym="GLD"
seqlen=13
Nh=20
delta=15
lr=0.000774
mm=0.8
day0=5
println(seqlen,"-",Nh,"-",lr,"-",mm,"-",day0,"-",delta)
df = DataFrame(symbol = String[], accuracy = Float32[], future = Float32[])
price_ts = loadData(sym,3)
(m,cost,xtest,ytest,confmx,future,ta) = myTrain(price_ts,seqlen,Nh,lr,mm,day0,delta,21)
    

In [None]:
@show backtesting(sym,ta)

In [None]:
list = readlines("list.txt")
for sym in list
    trainPredict(sym)
end