# Introduction to Packages

In [None]:
Pkg.add("DataFrames")

Pkg.add("Plots")
Pkg.add("PyPlot")
Pkg.add("StatPlots")

Pkg.add("JuMP")
Pkg.add("Cbc")

# Creating an index fund

The goal of this project is the definition of an index fund, following the Dow Jones. That is, we want to select few stocks of the index, together with weights, that show a similar behavior to the overall index.

We start with price data of all the Dow Jones stocks from 2016. From the averages prices, we define weights of the stocks to be used

## Loading the price data

The data is provided in a file using comma-separated values and three columns:

In [None]:
;head dowjones2016.csv

Julia provides a function to read csv files into arrays:

In [None]:
?readcsv

In [None]:
data = readcsv("dowjones2016.csv");
data[1:5,:]

But we will use the DataFrames package for easier processing.

In [None]:
using DataFrames

In [None]:
?readtable

In [None]:
df = readtable("dowjones2016.csv")
df[1:4, :]

We can now access the columns by name:

In [None]:
df[:price]

Let's compute mean prices for the stocks, using a groupby-and-aggregate approach.

In [None]:
?by

In [None]:
avg = by(df, :symbol) do d
    DataFrame(avgprice = mean(d[:price]))
end

avg[1:4, :]

We can now use these averages to compute weights.

In [None]:
weights = DataFrame(symbol = avg[:symbol], weight = avg[:avgprice] / sum(avg[:avgprice]))

We can also _pivot_ the table into a two-way format.

In [None]:
?unstack

In [None]:
#                    rows   columns  data
prices = unstack(df, :date, :symbol, :price)

prices[1:4, 1:4]

In [None]:
joined = join(df, weights, on=:symbol)

joined[1:4, :]

In [None]:
joined[:contribution] = joined[:weight] .* joined[:price]

In [None]:
joined[1:4, :]

In [None]:
index = by(joined, :date) do d
    DataFrame(value = sum(d[:contribution]))
end

index[1:4, :]

## Visualization the time series

In [None]:
using Plots      # general plotting
using StatPlots  # for DataFrames integration

pyplot()         # backend, based on Python's matplotlib

In [None]:
with(grid=false, legend=false, xticks=false, ylim=(0,300)) do
    plot(df, :date, :price, group=:symbol, c=:grey, alpha=0.4)
    plot!(index, :date, :value, linewidth=2)
end

In [None]:
bar(weights, :symbol, :weight, xrotation=40, color=:weight, grid=false)

## Picking stocks

In [None]:
using JuMP # modeling
using Cbc  # solver backend

In [None]:
syms = [Symbol(s) for s in weights[:symbol]]
days = 1:length(prices[:date])

@show size(syms) size(days);

In [None]:
function find_fund(maxstocks; timelimit=10.0, gaplimit=0.01, lastday=200)
    days = 1:lastday

    fund = Model(solver=CbcSolver(seconds=timelimit, ratioGap=gaplimit))

    # decisions
    @variable(fund, pick[syms], Bin)       # is stock included?
    @variable(fund, fraction[syms] ≥ 0)    # what part of the portfolio

    # auxiliary variables
    @variable(fund, Δ⁺[days] ≥ 0) # positive slack
    @variable(fund, Δ⁻[days] ≥ 0) # negative slack

    # fit to Dow Jones index
    for d in days
        @constraint(fund, sum(prices[d,s] * fraction[s] for s in syms) == index[d, :value] + Δ⁺[d] - Δ⁻[d])
    end

    # can only use stock if picked
    for s in syms
        @constraint(fund, fraction[s] ≤ pick[s])
    end
                
    # few stocks allowed
    @constraint(fund, sum(pick[s] for s in syms) ≤ maxstocks)

    # (why do we total to 1?)                            
    @constraint(fund, sum(fraction[s] for s in syms) == 1.0)
                            
    # minimize the absolute violation (L1 norm)
    @objective(fund, :Min, sum(Δ⁺[d] + Δ⁻[d] for d in days))
                            
                            
    status = solve(fund)
    @show status
    
    getvalue(fraction)
end

In [None]:
sol = find_fund(3, timelimit=5, lastday=100)

In [None]:
solfund = sum(sol[s] * prices[:, s] for s in syms);

In [None]:
with(grid=false, xticks=false, ylim=(0,300)) do
    plot(index, :date, :value, label="Dow Jones")
    plot!(solfund, label="Index Fund")
end