# Equities Pair Trading with Kalman Filter

## Data Preprocessing

In [1]:
using Feather, DataFrames, CSV, DelimitedFiles

#### The Financials Sector

In [2]:
fin = CSV.read("financial_stocks.csv")
fin_ticker_raw = fin.Ticker
size(fin_ticker_raw,1)

1215

In [3]:
df = Feather.read("bar_data_1D/allDaily-A.feather")
size(df)

(359, 6)

In [4]:
df.Date[1], df.Date[end]

("2017-06-26", "2018-11-26")

In [5]:
fin_data_raw = zeros((size(fin_ticker_raw,1), size(df,1)))

1215×359 Array{Float64,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0

In [6]:
for i in 1:size(fin_ticker_raw,1)
    try
        filename = string("bar_data_1D/allDaily-", fin_ticker_raw[i], ".feather")
        df = Feather.read(filename)
        fin_data_raw[i,:] = [df.Close[j] for j in 1:size(df,1)]
    catch y
        #@warn("Exception:", y)
    end
end

In [7]:
fin_data_raw

1215×359 Array{Float64,2}:
   0.0     0.0     0.0     0.0     0.0   …    0.0     0.0     0.0     0.0 
   0.0     0.0     0.0     0.0     0.0        0.0     0.0     0.0     0.0 
   0.0     0.0     0.0     0.0     0.0        0.0     0.0     0.0     0.0 
  23.6    23.25   23.35   23.3    23.65      29.35   29.88   29.99   29.92
  46.35   47.05   48.15   48.75   48.2       42.46   42.46   42.49   43.16
  13.67   13.71   13.37   13.5    13.59  …    6.32    6.43    6.46    6.37
  33.05   32.85   34.4    34.05   34.0       42.11   42.81   43.22   42.51
   0.0     0.0     0.0     0.0     0.0        0.0     0.0     0.0     0.0 
   0.0     0.0     0.0     0.0     0.0        0.0     0.0     0.0     0.0 
   0.0     0.0     0.0     0.0     0.0        0.0     0.0     0.0     0.0 
   0.0     0.0     0.0     0.0     0.0   …    0.0     0.0     0.0     0.0 
  29.85   29.7    30.0    30.55   30.5       37.56   36.52   35.9    36.38
   0.0     0.0     0.0     0.0     0.0        0.0     0.0     0.0     0.0

In [8]:
cnt = 0
for i in 1:size(fin_data_raw,1)
    if fin_data_raw[i,1] != 0
        cnt += 1
    end
end
cnt

465

In [9]:
fin_data = zeros((cnt, size(df,1)))
fin_ticker = Array{String}(undef, cnt)
j = 1
for i in 1:size(fin_data_raw,1)
    if fin_data_raw[i,1] != 0
        fin_data[j,:] = [fin_data_raw[i,k] for k in 1:size(fin_data_raw, 2)]
        fin_ticker[j] = fin_ticker_raw[i]
        j += 1
    end
end

In [10]:
@assert(j==cnt+1)

Filter valid data

In [11]:
open("finticker.csv", "w") do f
    writedlm(f, fin_ticker, ',')
end

open("findata.csv", "w") do f
    writedlm(f, fin_data, ',')
end

#### The Technology Sector

In [12]:
tech = CSV.read("technology_stocks.csv")
tech_ticker_raw = tech.Ticker
size(tech_ticker_raw,1)

327

In [13]:
tech_data_raw = zeros((size(tech_ticker_raw,1), size(df,1)))

327×359 Array{Float64,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.

In [14]:
for i in 1:size(tech_ticker_raw,1)
    try
        filename = string("bar_data_1D/allDaily-", tech_ticker_raw[i], ".feather")
        df = Feather.read(filename)
        tech_data_raw[i,:] = [df.Close[j] for j in 1:size(df,1)]
    catch y
        #@warn("Exception:", y)
    end
end

In [15]:
cnt = 0
for i in 1:size(tech_data_raw,1)
    if tech_data_raw[i,1] != 0
        cnt += 1
    end
end
cnt

132

In [16]:
tech_data = zeros((cnt, size(df,1)))
tech_ticker = Array{String}(undef, cnt)
j = 1
for i in 1:size(tech_data_raw,1)
    if tech_data_raw[i,1] != 0
        tech_data[j,:] = [tech_data_raw[i,k] for k in 1:size(tech_data_raw, 2)]
        tech_ticker[j] = tech_ticker_raw[i]
        j += 1
    end
end
@assert(j==cnt+1)

In [17]:
open("techticker.csv", "w") do f
    writedlm(f, tech_ticker, ',')
end

open("techdata.csv", "w") do f
    writedlm(f, tech_data, ',')
end

In [18]:
readdlm("techdata.csv", ',')

132×359 Array{Float64,2}:
 145.82  143.73  145.83  143.68  144.02  …  176.98  176.78  172.29  174.62
 122.34  122.19  123.74  122.99  123.68     158.9   156.98  156.93  158.4 
  80.57   78.14   79.2    77.74   77.8       89.02   89.68   88.64   90.07
  20.0    20.05   20.5    20.4    20.65      12.55   12.94   12.87   12.73
  43.25   41.89   42.69   41.44   41.31      35.77   35.19   35.05   36.01
  26.69   26.54   26.99   26.64   27.22  …   44.33   46.25   46.7    46.94
  10.81   10.54   10.67   10.21   10.29      10.39   10.84   11.01   10.93
  74.76   74.23   75.11   73.76   73.82      84.17   84.87   84.98   84.95
 133.78  132.37  134.99  129.6   130.31     161.33  163.91  163.66  167.43
  60.22   58.29   58.81   57.58   57.57      49.26   50.85   50.05   50.91
 242.75  235.33  241.05  234.04  233.05  …  227.71  230.0   229.9   235.31
  38.94   38.63   39.2    38.98   38.88      42.04   42.76   42.77   43.36
  16.56   16.16   16.55   16.35   16.34      15.77   16.02   16.17   16.06