In [1]:
using MAT
using PyPlot
using LinearAlgebra
using FFTW

## Load data

In [2]:
folders = [
    ("AFIB", 6), ("AFL", 3), ("APB", 9), ("Bigeminy", 7),
    ("LBBB", 3), ("NSR", 23), ("RBBB", 3), ("Trigeminy", 4)
];

data = Dict()

for (name, num) in folders
    println("Loading ", name)
    for i = 1:num
        data[(name, i)] = matread(string(
                "/home/asd/data/heart/", name, "/", 
                lowercase(name), "_", i, ".mat"
        ))
    end
end

Loading AFIB
Loading AFL
Loading APB
Loading Bigeminy
Loading LBBB
Loading NSR
Loading RBBB
Loading Trigeminy


## Join data

* (1) Select individuals to leave out
* (2) Select part of each dataset to leave out
* (3) Concat data

In [3]:
downsample_rate = 2
padsize = 1000

for (dt, dtname) in [
        #("dat", "1"),
        #("spec_w_64", "32"),
        #("spec_w_128", "64"),
        ("spec_w_256", "128"),
]   
    train = []
    testusers = Dict()
    testsegs = Dict()
    
    for (group, num) in keys(data)
        # Skip first individual per group
        if num == 1
            testusers[string(group, "_", num)] = data[(group, num)][dt]
        end
            
        # Load signal
        signal = data[(group, num)][dt]
        pad = zeros(size(signal, 1), padsize)

        # Downsample
        signal = signal[:, 1:downsample_rate:end]
        pad = pad[:, 1:downsample_rate:end]
        
        # Leave out a random 10% of the timebins
        # for test set (and store that information)
        T = size(signal, 2)
        tstart = rand(1:T)
        tend = min(T, tstart + floor(Int, T/10))
       
        testsegs[string(group, "_", num)] = signal[:, tstart:tend]
        signal = [signal[:, 1:tstart] signal[:, tend:end]]     
        
        if length(train) == 0
            train = signal
        else
            train = [train pad signal]
        end
    end
    
    # Save files
    matwrite(
        string("/home/asd/data/heart/train_", dtname, ".mat"),
        Dict("signal" => train)
    )
    matwrite(
        string("/home/asd/data/heart/testusers_", dtname, ".mat"),
        Dict("testusers" => testusers)
    )
    matwrite(
        string("/home/asd/data/heart/testsegs_", dtname, ".mat"),
        Dict("testsegs" => testsegs)
    )
end

In [None]:
# TODO preprocess spectrogram
        # maybe do this remotely
        # 1 - log transform
        # 1.2 - demean
        # 2 - remove neg decibels
        # 3 - drop dc-ish rows
        # 4 - normalize so that max value is 1

keys(data[("NSR", 1)])