In [1]:
using Statistics, LinearAlgebra, Random, DataFrames, Plots, StatsPlots, CSV

In [72]:
using Plots, Random, LinearAlgebra, Statistics, SparseArrays

In [69]:
using Pkg
Pkg.add(Pkg.PackageSpec(name="LowRankModels", rev="master"))
include("proxgrad.jl")

[?25l[2K

[32m[1m   Updating[22m[39m git-repo `https://github.com/madeleineudell/LowRankModels.jl.git`


[?25h

[32m[1m  Resolving[22m[39m package versions...
[32m[1mNo Changes[22m[39m to `~/.julia/environments/v1.5/Project.toml`
[32m[1mNo Changes[22m[39m to `~/.julia/environments/v1.5/Manifest.toml`


proxgrad_const

In [3]:
dt = CSV.read("FinalDataSet.csv");

In [196]:
fb = filter(row -> row[:Company] in ["FB"], dt)
amzn = filter(row -> row[:Company] in ["AMZN"], dt)
aapl = filter(row -> row[:Company] in ["AAPL"], dt)
nflx = filter(row -> row[:Company] in ["NFLX"], dt)
goog = filter(row -> row[:Company] in ["GOOG"], dt);

# Facebook

In [109]:
y = convert(Array, fb[23:64,:Open] .> fb[22:63,:Open])
fb = convert(Matrix, fb[:, :])
fb = [fb[:,3:4] fb[:, 6:10]]
fb = convert(Array{Float64,2}, fb)
X = fb[23:64,:];

In [151]:
# Hinge Loss + QuadReg (λ = .1) on the whole dataset
n = 42
loss = 1/n * HingeLoss()
λ = .1
reg = λ*QuadReg()
w = proxgrad(loss, reg, X, y, maxiters=100) 
yhat = impute(loss, X*w)
fb_miss_rate = (n - sum(yhat .== y)) / n
print("Facebook classification rate = " * string(fb_miss_rate))

Facebook classification rate = 0.42857142857142855

In [155]:
# Creating Timeseries matrix
X_ts = X
for i in (1:22)
    X_ts = [X_ts fb[23-i:64-i,:]]
end

In [158]:
# Hinge Loss + QuadReg (λ = .1) for time series lags on 1, 5, 10, 15, 22 days on the whole dataset
ts = [1 5 10 15 22]
for i in (1:length(ts))
    n = 42
    loss = 1/n * HingeLoss()
    λ = .1
    reg = λ*QuadReg()
    w = proxgrad(loss, reg, X_ts[:,1:(1+ts[i])*7], y, maxiters=100) 
    yhat = impute(loss, X_ts[:,1:(1+ts[i])*7]*w)
    fb_miss_rate = (n - sum(yhat .== y)) / n
    println("Facebook classification rate of time series lag " * string(ts[i]) * "days = " * string(fb_miss_rate))
end

Facebook classification rate of time series lag 1days = 0.42857142857142855
Facebook classification rate of time series lag 5days = 0.40476190476190477
Facebook classification rate of time series lag 10days = 0.2857142857142857
Facebook classification rate of time series lag 15days = 0.23809523809523808
Facebook classification rate of time series lag 22days = 0.23809523809523808


In [178]:
# Splitting dataset into train and test set
X_train = X[1:21,:]
y_train = y[1:21]
X_test = X[21:42,:]
y_test = y[21:42];
n = 21
loss = 1/n * HingeLoss()
λ = .1
reg = λ*QuadReg()
w = proxgrad(loss, reg, X_train, y_train, maxiters=100) 
yhat_train = impute(loss, X_train*w)
fb_miss_rate_train = (n - sum(yhat_train .== y_train)) / n
yhat_test = impute(loss, X_test*w)
fb_miss_rate_test = (n + 1 - sum(yhat_test .== y_test)) / (n + 1)
println("Facebook classification rate on train set = " * string(fb_miss_rate_train))
print("Facebook classification rate on test set = " * string(fb_miss_rate_test))

Facebook classification rate on train set = 0.5714285714285714
Facebook classification rate on test set = 0.5909090909090909

In [186]:
# Hinge Loss + QuadReg (λ = .1) for time series lags on 1, 5, 10, 15, 22 days on the whole dataset
ts = [1 5 10 15 22]
X_train = X_ts[1:21,:]
y_train = y[1:21]
X_test = X_ts[21:42,:]
y_test = y[21:42];
for i in (1:length(ts))
    n = 21
    loss = 1/n * HingeLoss()
    λ = .1
    reg = λ*QuadReg()
    w = proxgrad(loss, reg, X_train[:,1:(1+ts[i])*7], y_train, maxiters = 1000) 
    yhat_train = impute(loss, X_train[:,1:(1+ts[i])*7]*w)
    fb_miss_rate_train = (n - sum(yhat_train .== y_train)) / n
    yhat_test = impute(loss, X_test[:,1:(1+ts[i])*7]*w)
    fb_miss_rate_test = (n + 1 - sum(yhat_test .== y_test)) / (n + 1)
    println("Facebook classification rate of time series lag on train set " * string(ts[i]) * "days = " * string(fb_miss_rate_train))
    println("Facebook classification rate of time series lag on test set " * string(ts[i]) * "days = " * string(fb_miss_rate_test))
    println()
end

Facebook classification rate of time series lag on train set 1days = 0.5714285714285714
Facebook classification rate of time series lag on test set 1days = 0.5909090909090909

Facebook classification rate of time series lag on train set 5days = 0.23809523809523808
Facebook classification rate of time series lag on test set 5days = 0.5909090909090909

Facebook classification rate of time series lag on train set 10days = 0.09523809523809523
Facebook classification rate of time series lag on test set 10days = 0.5454545454545454

Facebook classification rate of time series lag on train set 15days = 0.14285714285714285
Facebook classification rate of time series lag on test set 15days = 0.5454545454545454

Facebook classification rate of time series lag on train set 22days = 0.09523809523809523
Facebook classification rate of time series lag on test set 22days = 0.5909090909090909



# Amazon

In [197]:
y = convert(Array, amzn[23:64,:Open] .> amzn[22:63,:Open])
amzn = convert(Matrix, amzn[:, :])
amzn = [amzn[:,3:4] amzn[:, 6:10]]
amzn = convert(Array{Float64,2}, amzn)
X = amzn[23:64,:];

In [207]:
# Hinge Loss + QuadReg (λ = .1) on the whole dataset
n = 42
loss = 1/n * HingeLoss()
λ = .1
reg = λ*QuadReg()
w = proxgrad(loss, reg, X, y, maxiters=100) 
yhat = impute(loss, X*w)
amzn_miss_rate = (n - sum(yhat .== y)) / n
print("Amazon classification rate = " * string(amzn_miss_rate))

Amazon classification rate = 0.5

In [208]:
# Creating Timeseries matrix
X_ts = X
for i in (1:22)
    X_ts = [X_ts amzn[23-i:64-i,:]]
end

In [209]:
# Hinge Loss + QuadReg (λ = .1) for time series lags on 1, 5, 10, 15, 22 days on the whole dataset
ts = [1 5 10 15 22]
for i in (1:length(ts))
    n = 42
    loss = 1/n * HingeLoss()
    λ = .1
    reg = λ*QuadReg()
    w = proxgrad(loss, reg, X_ts[:,1:(1+ts[i])*7], y, maxiters=100) 
    yhat = impute(loss, X_ts[:,1:(1+ts[i])*7]*w)
    amzn_miss_rate = (n - sum(yhat .== y)) / n
    println("Amazon classification rate of time series lag " * string(ts[i]) * "days = " * string(amzn_miss_rate))
end

Amazon classification rate of time series lag 1days = 0.5
Amazon classification rate of time series lag 5days = 0.42857142857142855
Amazon classification rate of time series lag 10days = 0.30952380952380953
Amazon classification rate of time series lag 15days = 0.3333333333333333
Amazon classification rate of time series lag 22days = 0.30952380952380953


In [210]:
# Splitting dataset into train and test set
X_train = X[1:21,:]
y_train = y[1:21]
X_test = X[21:42,:]
y_test = y[21:42];
n = 21
loss = 1/n * HingeLoss()
λ = .1
reg = λ*QuadReg()
w = proxgrad(loss, reg, X_train, y_train, maxiters=100) 
yhat_train = impute(loss, X_train*w)
amzn_miss_rate_train = (n - sum(yhat_train .== y_train)) / n
yhat_test = impute(loss, X_test*w)
amzn_miss_rate_test = (n + 1 - sum(yhat_test .== y_test)) / (n + 1)
println("Amazon classification rate on train set = " * string(amzn_miss_rate_train))
print("Amazon classification rate on test set = " * string(amzn_miss_rate_test))

Amazon classification rate on train set = 0.5238095238095238
Amazon classification rate on test set = 0.5

In [211]:
# Hinge Loss + QuadReg (λ = .1) for time series lags on 1, 5, 10, 15, 22 days on the whole dataset
ts = [1 5 10 15 22]
X_train = X_ts[1:21,:]
y_train = y[1:21]
X_test = X_ts[21:42,:]
y_test = y[21:42];
for i in (1:length(ts))
    n = 21
    loss = 1/n * HingeLoss()
    λ = .1
    reg = λ*QuadReg()
    w = proxgrad(loss, reg, X_train[:,1:(1+ts[i])*7], y_train, maxiters = 1000) 
    yhat_train = impute(loss, X_train[:,1:(1+ts[i])*7]*w)
    fb_miss_rate_train = (n - sum(yhat_train .== y_train)) / n
    yhat_test = impute(loss, X_test[:,1:(1+ts[i])*7]*w)
    fb_miss_rate_test = (n + 1 - sum(yhat_test .== y_test)) / (n + 1)
    println("Amazon classification rate of time series lag on train set " * string(ts[i]) * "days = " * string(fb_miss_rate_train))
    println("Amazon classification rate of time series lag on test set " * string(ts[i]) * "days = " * string(fb_miss_rate_test))
    println()
end

Amazon classification rate of time series lag on train set 1days = 0.38095238095238093
Amazon classification rate of time series lag on test set 1days = 0.5454545454545454

Amazon classification rate of time series lag on train set 5days = 0.2857142857142857
Amazon classification rate of time series lag on test set 5days = 0.5

Amazon classification rate of time series lag on train set 10days = 0.14285714285714285
Amazon classification rate of time series lag on test set 10days = 0.45454545454545453

Amazon classification rate of time series lag on train set 15days = 0.23809523809523808
Amazon classification rate of time series lag on test set 15days = 0.5454545454545454

Amazon classification rate of time series lag on train set 22days = 0.47619047619047616
Amazon classification rate of time series lag on test set 22days = 0.5



# Apple

In [212]:
y = convert(Array, aapl[23:64,:Open] .> aapl[22:63,:Open])
aapl = convert(Matrix, aapl[:, :])
aapl = [aapl[:,3:4] aapl[:, 6:10]]
aapl = convert(Array{Float64,2}, aapl)
X = aapl[23:64,:];

In [226]:
# Hinge Loss + QuadReg (λ = .1) on the whole dataset
n = 42
loss = 1/n * HingeLoss()
λ = 0.1
reg = λ*QuadReg()
w = proxgrad(loss, reg, X, y, maxiters=100) 
yhat = impute(loss, X*w)
aapl_miss_rate = (n - sum(yhat .== y)) / n
print("Apple classification rate = " * string(aapl_miss_rate))

Apple classification rate = 0.5

In [231]:
# Creating Timeseries matrix
X_ts = X
for i in (1:22)
    X_ts = [X_ts aapl[23-i:64-i,:]]
end

In [232]:
# Hinge Loss + QuadReg (λ = .1) for time series lags on 1, 5, 10, 15, 22 days on the whole dataset
ts = [1 5 10 15 22]
for i in (1:length(ts))
    n = 42
    loss = 1/n * HingeLoss()
    λ = .1
    reg = λ*QuadReg()
    w = proxgrad(loss, reg, X_ts[:,1:(1+ts[i])*7], y, maxiters=100) 
    yhat = impute(loss, X_ts[:,1:(1+ts[i])*7]*w)
    amzn_miss_rate = (n - sum(yhat .== y)) / n
    println("Apple classification rate of time series lag " * string(ts[i]) * "days = " * string(amzn_miss_rate))
end

Apple classification rate of time series lag 1days = 0.5
Apple classification rate of time series lag 5days = 0.5
Apple classification rate of time series lag 10days = 0.38095238095238093
Apple classification rate of time series lag 15days = 0.30952380952380953
Apple classification rate of time series lag 22days = 0.23809523809523808


In [233]:
# Splitting dataset into train and test set
X_train = X[1:21,:]
y_train = y[1:21]
X_test = X[21:42,:]
y_test = y[21:42];
n = 21
loss = 1/n * HingeLoss()
λ = .1
reg = λ*QuadReg()
w = proxgrad(loss, reg, X_train, y_train, maxiters=100) 
yhat_train = impute(loss, X_train*w)
amzn_miss_rate_train = (n - sum(yhat_train .== y_train)) / n
yhat_test = impute(loss, X_test*w)
amzn_miss_rate_test = (n + 1 - sum(yhat_test .== y_test)) / (n + 1)
println("Apple classification rate on train set = " * string(amzn_miss_rate_train))
print("Apple classification rate on test set = " * string(amzn_miss_rate_test))

Apple classification rate on train set = 0.38095238095238093
Apple classification rate on test set = 0.5909090909090909

In [234]:
# Hinge Loss + QuadReg (λ = .1) for time series lags on 1, 5, 10, 15, 22 days on the whole dataset
ts = [1 5 10 15 22]
X_train = X_ts[1:21,:]
y_train = y[1:21]
X_test = X_ts[21:42,:]
y_test = y[21:42];
for i in (1:length(ts))
    n = 21
    loss = 1/n * HingeLoss()
    λ = .1
    reg = λ*QuadReg()
    w = proxgrad(loss, reg, X_train[:,1:(1+ts[i])*7], y_train, maxiters = 1000) 
    yhat_train = impute(loss, X_train[:,1:(1+ts[i])*7]*w)
    fb_miss_rate_train = (n - sum(yhat_train .== y_train)) / n
    yhat_test = impute(loss, X_test[:,1:(1+ts[i])*7]*w)
    fb_miss_rate_test = (n + 1 - sum(yhat_test .== y_test)) / (n + 1)
    println("Apple classification rate of time series lag on train set " * string(ts[i]) * "days = " * string(fb_miss_rate_train))
    println("Apple classification rate of time series lag on test set " * string(ts[i]) * "days = " * string(fb_miss_rate_test))
    println()
end

Apple classification rate of time series lag on train set 1days = 0.38095238095238093
Apple classification rate of time series lag on test set 1days = 0.5909090909090909

Apple classification rate of time series lag on train set 5days = 0.38095238095238093
Apple classification rate of time series lag on test set 5days = 0.5909090909090909

Apple classification rate of time series lag on train set 10days = 0.38095238095238093
Apple classification rate of time series lag on test set 10days = 0.5909090909090909

Apple classification rate of time series lag on train set 15days = 0.38095238095238093
Apple classification rate of time series lag on test set 15days = 0.5909090909090909

Apple classification rate of time series lag on train set 22days = 0.38095238095238093
Apple classification rate of time series lag on test set 22days = 0.5909090909090909



# Netflix

In [235]:
y = convert(Array, nflx[23:64,:Open] .> nflx[22:63,:Open])
nflx = convert(Matrix, nflx[:, :])
nflx = [nflx[:,3:4] nflx[:, 6:10]]
nflx = convert(Array{Float64,2}, nflx)
X = nflx[23:64,:];

In [237]:
# Hinge Loss + QuadReg (λ = .1) on the whole dataset
n = 42
loss = 1/n * HingeLoss()
λ = 0.1
reg = λ*QuadReg()
w = proxgrad(loss, reg, X, y, maxiters=100) 
yhat = impute(loss, X*w)
aapl_miss_rate = (n - sum(yhat .== y)) / n
print("Netflix classification rate = " * string(aapl_miss_rate))

Netflix classification rate = 0.5

In [238]:
# Creating Timeseries matrix
X_ts = X
for i in (1:22)
    X_ts = [X_ts nflx[23-i:64-i,:]]
end

In [239]:
# Hinge Loss + QuadReg (λ = .1) for time series lags on 1, 5, 10, 15, 22 days on the whole dataset
ts = [1 5 10 15 22]
for i in (1:length(ts))
    n = 42
    loss = 1/n * HingeLoss()
    λ = .1
    reg = λ*QuadReg()
    w = proxgrad(loss, reg, X_ts[:,1:(1+ts[i])*7], y, maxiters=100) 
    yhat = impute(loss, X_ts[:,1:(1+ts[i])*7]*w)
    amzn_miss_rate = (n - sum(yhat .== y)) / n
    println("Netflix classification rate of time series lag " * string(ts[i]) * "days = " * string(amzn_miss_rate))
end

Netflix classification rate of time series lag 1days = 0.5
Netflix classification rate of time series lag 5days = 0.42857142857142855
Netflix classification rate of time series lag 10days = 0.40476190476190477
Netflix classification rate of time series lag 15days = 0.30952380952380953
Netflix classification rate of time series lag 22days = 0.2619047619047619


In [240]:
# Splitting dataset into train and test set
X_train = X[1:21,:]
y_train = y[1:21]
X_test = X[21:42,:]
y_test = y[21:42];
n = 21
loss = 1/n * HingeLoss()
λ = .1
reg = λ*QuadReg()
w = proxgrad(loss, reg, X_train, y_train, maxiters=100) 
yhat_train = impute(loss, X_train*w)
amzn_miss_rate_train = (n - sum(yhat_train .== y_train)) / n
yhat_test = impute(loss, X_test*w)
amzn_miss_rate_test = (n + 1 - sum(yhat_test .== y_test)) / (n + 1)
println("Netflix classification rate on train set = " * string(amzn_miss_rate_train))
print("Netflix classification rate on test set = " * string(amzn_miss_rate_test))

Netflix classification rate on train set = 0.47619047619047616
Netflix classification rate on test set = 0.5454545454545454

In [241]:
# Hinge Loss + QuadReg (λ = .1) for time series lags on 1, 5, 10, 15, 22 days on the whole dataset
ts = [1 5 10 15 22]
X_train = X_ts[1:21,:]
y_train = y[1:21]
X_test = X_ts[21:42,:]
y_test = y[21:42];
for i in (1:length(ts))
    n = 21
    loss = 1/n * HingeLoss()
    λ = .1
    reg = λ*QuadReg()
    w = proxgrad(loss, reg, X_train[:,1:(1+ts[i])*7], y_train, maxiters = 1000) 
    yhat_train = impute(loss, X_train[:,1:(1+ts[i])*7]*w)
    fb_miss_rate_train = (n - sum(yhat_train .== y_train)) / n
    yhat_test = impute(loss, X_test[:,1:(1+ts[i])*7]*w)
    fb_miss_rate_test = (n + 1 - sum(yhat_test .== y_test)) / (n + 1)
    println("Netflix classification rate of time series lag on train set " * string(ts[i]) * "days = " * string(fb_miss_rate_train))
    println("Netflix classification rate of time series lag on test set " * string(ts[i]) * "days = " * string(fb_miss_rate_test))
    println()
end

Netflix classification rate of time series lag on train set 1days = 0.47619047619047616
Netflix classification rate of time series lag on test set 1days = 0.5454545454545454

Netflix classification rate of time series lag on train set 5days = 0.38095238095238093
Netflix classification rate of time series lag on test set 5days = 0.5

Netflix classification rate of time series lag on train set 10days = 0.38095238095238093
Netflix classification rate of time series lag on test set 10days = 0.5

Netflix classification rate of time series lag on train set 15days = 0.38095238095238093
Netflix classification rate of time series lag on test set 15days = 0.5

Netflix classification rate of time series lag on train set 22days = 0.23809523809523808
Netflix classification rate of time series lag on test set 22days = 0.5



# Google

In [242]:
y = convert(Array, goog[23:64,:Open] .> goog[22:63,:Open])
goog = convert(Matrix, goog[:, :])
goog = [goog[:,3:4] goog[:, 6:10]]
goog = convert(Array{Float64,2}, goog)
X = goog[23:64,:];

In [243]:
# Hinge Loss + QuadReg (λ = .1) on the whole dataset
n = 42
loss = 1/n * HingeLoss()
λ = 0.1
reg = λ*QuadReg()
w = proxgrad(loss, reg, X, y, maxiters=100) 
yhat = impute(loss, X*w)
aapl_miss_rate = (n - sum(yhat .== y)) / n
print("Google classification rate = " * string(aapl_miss_rate))

Google classification rate = 0.4523809523809524

In [244]:
# Creating Timeseries matrix
X_ts = X
for i in (1:22)
    X_ts = [X_ts goog[23-i:64-i,:]]
end

In [245]:
# Hinge Loss + QuadReg (λ = .1) for time series lags on 1, 5, 10, 15, 22 days on the whole dataset
ts = [1 5 10 15 22]
for i in (1:length(ts))
    n = 42
    loss = 1/n * HingeLoss()
    λ = .1
    reg = λ*QuadReg()
    w = proxgrad(loss, reg, X_ts[:,1:(1+ts[i])*7], y, maxiters=100) 
    yhat = impute(loss, X_ts[:,1:(1+ts[i])*7]*w)
    amzn_miss_rate = (n - sum(yhat .== y)) / n
    println("Google classification rate of time series lag " * string(ts[i]) * "days = " * string(amzn_miss_rate))
end

Google classification rate of time series lag 1days = 0.4523809523809524
Google classification rate of time series lag 5days = 0.4523809523809524
Google classification rate of time series lag 10days = 0.42857142857142855
Google classification rate of time series lag 15days = 0.4523809523809524
Google classification rate of time series lag 22days = 0.4523809523809524


In [247]:
# Splitting dataset into train and test set
X_train = X[1:21,:]
y_train = y[1:21]
X_test = X[21:42,:]
y_test = y[21:42];
n = 21
loss = 1/n * HingeLoss()
λ = .1
reg = λ*QuadReg()
w = proxgrad(loss, reg, X_train, y_train, maxiters=100) 
yhat_train = impute(loss, X_train*w)
amzn_miss_rate_train = (n - sum(yhat_train .== y_train)) / n
yhat_test = impute(loss, X_test*w)
amzn_miss_rate_test = (n + 1 - sum(yhat_test .== y_test)) / (n + 1)
println("Google classification rate on train set = " * string(amzn_miss_rate_train))
print("Google classification rate on test set = " * string(amzn_miss_rate_test))

Google classification rate on train set = 0.42857142857142855
Google classification rate on test set = 0.45454545454545453

In [248]:
# Hinge Loss + QuadReg (λ = .1) for time series lags on 1, 5, 10, 15, 22 days on the whole dataset
ts = [1 5 10 15 22]
X_train = X_ts[1:21,:]
y_train = y[1:21]
X_test = X_ts[21:42,:]
y_test = y[21:42];
for i in (1:length(ts))
    n = 21
    loss = 1/n * HingeLoss()
    λ = .1
    reg = λ*QuadReg()
    w = proxgrad(loss, reg, X_train[:,1:(1+ts[i])*7], y_train, maxiters = 1000) 
    yhat_train = impute(loss, X_train[:,1:(1+ts[i])*7]*w)
    fb_miss_rate_train = (n - sum(yhat_train .== y_train)) / n
    yhat_test = impute(loss, X_test[:,1:(1+ts[i])*7]*w)
    fb_miss_rate_test = (n + 1 - sum(yhat_test .== y_test)) / (n + 1)
    println("Google classification rate of time series lag on train set " * string(ts[i]) * "days = " * string(fb_miss_rate_train))
    println("Google classification rate of time series lag on test set " * string(ts[i]) * "days = " * string(fb_miss_rate_test))
    println()
end

Google classification rate of time series lag on train set 1days = 0.42857142857142855
Google classification rate of time series lag on test set 1days = 0.5

Google classification rate of time series lag on train set 5days = 0.19047619047619047
Google classification rate of time series lag on test set 5days = 0.5

Google classification rate of time series lag on train set 10days = 0.3333333333333333
Google classification rate of time series lag on test set 10days = 0.5909090909090909

Google classification rate of time series lag on train set 15days = 0.42857142857142855
Google classification rate of time series lag on test set 15days = 0.45454545454545453

Google classification rate of time series lag on train set 22days = 0.0
Google classification rate of time series lag on test set 22days = 0.45454545454545453

