In [60]:
using CSV, Plots, Random, LinearAlgebra, Statistics, DataFrames

In [61]:
sentiments = CSV.read("sentiment.csv")[1]
zoom = CSV.read("zoom_adjclose.csv")[1]
facebook = CSV.read("facebook_adjclose.csv")[1]
amazon = CSV.read("amazon_adjclose.csv")[1]
cisco = CSV.read("cisco_adjclose.csv")[1]
netflix = CSV.read("netflix_adjclose.csv")[1]
intel = CSV.read("intel_adjclose.csv")[1]
tesla = CSV.read("tesla_adjclose.csv")[1]
alibaba = CSV.read("alibaba_adjclose.csv")[1]
microsoft = CSV.read("microsoft_adjclose.csv")[1]
nvidia = CSV.read("nvidia_adjclose.csv")[1];

In [62]:
function abs_err(y,pred)   # absolute error
    return mean(abs.(y-pred))
end

abs_err (generic function with 1 method)

In [63]:
num_of_model = 3
split = [43, 43+44, 43+44*2, 43+44*3, 43+44*4, 43+44*5]  # index for 5-fold

function select_best_model(stock)  # using k-fold cross validation
    err = zeros(num_of_model,5)
    w = zeros(9)
    
    for i in 1:5
        w_set = []
        # setting train and validation set
        split_cur = split[i]
        split2 = Int(round(split_cur*4/5))
        stock_train = stock[1:split2]
        stock_val = stock[split2 + 1:split_cur]
        sent_train = sentiments[1:split2]
        sent_val = sentiments[split2 + 1:split_cur]
    
        # Naive Model, w[1:2]
        Xtrain = [sent_train ones(length(sent_train))]
        Xval = [sent_val ones(length(sent_val))]
        Ytrain = stock_train
        Yval = stock_val
    
        w_cur = Xtrain\Ytrain
        w_set = vcat(w_set, w_cur)
        pred = Xval*w_cur
        err[1,i] = abs_err(Yval,pred)
    
        # AR1 Model, w[3:5]
        Xtrain = [sent_train[2:end] stock_train[1:end-1] ones(length(sent_train)-1)]
        Xval = [sent_val[2:end] stock_val[1:end-1] ones(length(sent_val)-1)]
        Ytrain = stock_train[2:end]
        Yval = stock_val[2:end]
    
        w_cur = Xtrain\Ytrain
        w_set = vcat(w_set, w_cur)
        pred = Xval*w_cur
        err[2,i] = abs_err(Yval,pred)
    
        # AR2 Model, w[6:9]
        Xtrain = [sent_train[3:end] stock_train[2:end-1] stock_train[1:end-2] ones(length(sent_train)-2)]
        Xval = [sent_val[3:end] stock_val[2:end-1] stock_val[1:end-2] ones(length(sent_val)-2)]
        Ytrain = stock_train[3:end]
        Yval = stock_val[3:end]
    
        w_cur = Xtrain\Ytrain
        w_set = vcat(w_set, w_cur)
        pred = Xval*w_cur
        err[3,i] = abs_err(Yval,pred)
        w = hcat(w,w_set)
    end
    
    avg_err = mean(err, dims=2)   # avg_abs_err
    final_model = findmin(avg_err)[2][1]   # select model with min abs error
    test_err = zeros(5)
    model = ["Naive Model", "AR1 Model", "AR2 Model"]
    
    for i in 1:5
        stock_test = stock[split[2]-44:split[2]]
        sent_test = sentiments[split[2]-44:split[2]]
    
        if final_model == 1   # Naive Model
            Xtest = [sent_test ones(length(sent_test))]
            Ytest = stock_test
    
            w_cur = w[1:2, i+1]
            pred = Xtest*w_cur
            test_err[i] = abs_err(Ytest,pred)
        
        elseif final_model == 2   # AR1 Model
            Xtest = [sent_test[2:end] stock_test[1:end-1] ones(length(stock_test)-1)]
            Ytest = stock_test[2:end]
        
            w_cur = w[3:5, i+1]
            pred = Xtest*w_cur
            test_err[i] = abs_err(Ytest,pred)
        
        elseif final_model == 3   # AR2 Model
            Xtest = [sent_test[3:end] stock_test[2:end-1] stock_test[1:end-2] ones(length(stock_test)-2)]
            Ytest = stock_test[3:end]
        
            w_cur = w[6:9, i+1]
            pred = Xtest*w_cur
            test_err[i] = abs_err(Ytest,pred)
        
        else
            print("error")
        end
    end
    string("selected model: ", model[final_model], ", estimated error is: ", mean(test_err))
end

select_best_model (generic function with 1 method)

In [64]:
print(string("Zoom's ", select_best_model(zoom)))

Zoom's selected model: AR1 Model, estimated error is: 5.262542472978059

In [65]:
print(string("Facebook's ", select_best_model(facebook)))

Facebook's selected model: AR2 Model, estimated error is: 3.60482317762198

In [66]:
print(string("Amazon's ", select_best_model(amazon)))

Amazon's selected model: AR1 Model, estimated error is: 38.69454988436996

In [67]:
print(string("Cisco's ", select_best_model(cisco)))

Cisco's selected model: AR1 Model, estimated error is: 0.7080081210430337

In [68]:
print(string("Netflix's ", select_best_model(netflix)))

Netflix's selected model: AR2 Model, estimated error is: 7.438791900378485

In [69]:
print(string("Intel's ", select_best_model(intel)))

Intel's selected model: AR2 Model, estimated error is: 1.1471729741465313

In [70]:
print(string("Tesla's ", select_best_model(tesla)))

Tesla's selected model: AR1 Model, estimated error is: 4.469760184403543

In [71]:
print(string("Alibaba's ", select_best_model(alibaba)))

Alibaba's selected model: AR2 Model, estimated error is: 2.5954142978046635

In [72]:
print(string("Microsoft's ", select_best_model(microsoft)))

Microsoft's selected model: AR2 Model, estimated error is: 2.9370369678142176

In [73]:
print(string("Nvidia's ", select_best_model(nvidia)))

Nvidia's selected model: AR2 Model, estimated error is: 5.976192919151061