In [3]:
include("compute_ph2.jl")
using LIBSVM
using RDatasets
using Printf
using Statistics
using Random
using .ext
using Plots
using GLM
using StatsBase
using ScikitLearn
using JLD2
@sk_import model_selection: train_test_split
workdir = pwd()

train_ratio = 0.6
validation_ratio = 0.2
test_ratio = 0.2




└ @ PlotlyBase C:\Users\jaydh\.julia\packages\PlotlyBase\NxSlF\src\kaleido.jl:58
│ has been implemented directly in PlotlyBase itself.
│ 
│ By implementing in PlotlyBase.jl, the savefig routines are automatically
│ available to PlotlyJS.jl also.
└ @ ORCA C:\Users\jaydh\.julia\packages\ORCA\U5XaN\src\ORCA.jl:8
└ @ ScikitLearn.Skcore C:\Users\jaydh\.julia\packages\ScikitLearn\ssekP\src\Skcore.jl:179


0.2

In [33]:
function random_undersample(df)
    class_0 = countmap(df.labels)[0];
    class_1 = countmap(df.labels)[1];

    Random.seed!(1);
    df=df[shuffle(1:nrow(df)),:];   

    if class_0 > class_1 
        # randomly undersample class 0

        prob_to_delete = (class_0-class_1)/(class_0);
        rows_to_delete = [];
        for row in 1:length(df.labels)
            if df[row,:labels] == 0
                if rand() < prob_to_delete
                    append!(rows_to_delete,row);
                end
            end
        end
        delete!(df,rows_to_delete);
    elseif class_1 > class_0 
        # randomly undersample class 1

        prob_to_delete = (class_1-class_0)/(class_1);
        rows_to_delete = [];
        for row in 1:length(df.labels)
            if df[row,:labels] == 1
                if rand() < prob_to_delete
                    append!(rows_to_delete,row);
                end
            end
        end
        delete!(df,rows_to_delete);
    end
    return df
end

function parameter_optimisation(Xtrain,ytrain,Xval,yval, costs, gammas)
    Cs = [] 
    Gs = []
    accs = zeros(7,7)
    maxi = 0
    for i=1:1:7
        G = gammas[i]
        for j=1:1:7
            C = costs[j]
            model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
            ypredict, decision_values = svmpredict(model, Xval');
            acc = mean(ypredict .== yval) * 100;
            accs[i,j] = acc
            if acc > maxi 
                #println("current max: ",acc )
                maxi = acc
                append!(Cs,C)
                append!(Gs,G)
            end
        end
    end

    c = [0.0001 0.001 0.01 0.1 1.0 10.0 100.0]
    g = ["Costs->";0.0001;0.001;0.01;0.1;1.0;10.0;100.0]
    accs2=vcat(c,accs)
    accs3=hcat(g,accs2)
    return (Cs[end],Gs[end],accs3)
end

parameter_optimisation (generic function with 1 method)

In [5]:
# hyperparameter ranges
gammas = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0];
costs = gammas;

VR H0 Tumour Cells

In [18]:
# load features into dataframe
df_vr_tumour_h0 = DataFrame(ids=Int64[],times=Int64[],labels=[])
for i in 1:20
    colname = string("feature_",i)
    df_vr_tumour_h0[!,colname] = Float64[]
end
location_tumour_h0 = "persistence_images\\vr\\tumour_h0\\"
files_vr_tumour_h0 = readdir(workdir*"\\"*location_tumour_h0)
for f in files_vr_tumour_h0
    id,time = parse_file(f)
    id = parse(Int64,id)
    time = parse(Int64,time)
    image = load_object(location_tumour_h0*f)
    row = Any[]
    label = getBinaryLabel(id,time)
    append!(row,id)
    append!(row,time)
    append!(row,label)
    for x in image
        append!(row,x)
    end
    push!(df_vr_tumour_h0,row)
end

# rescale features to [0,1]
X = Matrix(df_vr_tumour_h0[:,4:end])

dt = fit(UnitRangeTransform, X; dims=1, unit=true)

Xn = StatsBase.transform(dt,X)
for i in 1:size(X)[1]
    df_vr_tumour_h0[i,:][4:end] = Xn[i,:]
end

# randomly undersample imbalanced data 
df_vr_tumour_h0 = random_undersample(df_vr_tumour_h0)

# randomly sample training, test and validation sets
df_vr_tumour_h0=df_vr_tumour_h0[shuffle(1:nrow(df_vr_tumour_h0)),:];   
X = Matrix(df_vr_tumour_h0[:,4:end]);    
y = df_vr_tumour_h0.labels;
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=1-train_ratio); # train-test split
Xval, Xtest, yval, ytest = train_test_split(Xtest,ytest,test_size= (test_ratio)/(test_ratio+validation_ratio)); # test-validation split

# optimise hyperparameters
C,G,accs = parameter_optimisation(Xtrain,ytrain,Xval,yval,costs,gammas)

# report final accuracy on test set

model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
ypredict, decision_values = svmpredict(model, Xtest');
acc = mean(ypredict .== ytest) * 100;
println("accuracy: ", acc)
println("Cost: ", C, " Gamma: ", G)
display(accs)

accuracy: 79.43925233644859
Cost: 100.0 Gamma: 1.0


8×8 Matrix{Any}:
    "Costs->"   0.0001   0.001    0.01     0.1      1.0     10.0     100.0
   0.0001      47.6636  47.6636  47.6636  47.6636  47.6636  47.6636   47.6636
   0.001       47.6636  47.6636  47.6636  47.6636  47.6636  47.6636   65.4206
   0.01        47.6636  47.6636  47.6636  47.6636  47.6636  65.4206   68.2243
   0.1         47.6636  47.6636  47.6636  47.6636  64.486   69.1589   79.4393
   1.0         47.6636  47.6636  47.6636  63.5514  65.4206  79.4393   83.1776
  10.0         47.6636  47.6636  47.6636  64.486   70.0935  79.4393   82.243
 100.0         47.6636  47.6636  47.6636  64.486   78.5047  79.4393   76.6355

VR H1 Tumour cells

In [19]:
# load features into dataframe
df_vr_tumour_h1 = DataFrame(ids=Int64[],times=Int64[],labels=[])
for i in 1:400
    colname = string("feature_",i)
    df_vr_tumour_h1[!,colname] = Float64[]
end
location_tumour_h1 = "persistence_images\\vr\\tumour_h1\\"
files_vr_tumour_h1 = readdir(workdir*"\\"*location_tumour_h1)
for f in files_vr_tumour_h1
    id,time = parse_file(f)
    id = parse(Int64,id)
    time = parse(Int64,time)
    image = load_object(location_tumour_h1*f)
    row = Any[]
    label = getBinaryLabel(id,time)
    append!(row,id)
    append!(row,time)
    append!(row,label)
    for x in image
        append!(row,x)
    end
    push!(df_vr_tumour_h1,row)
end

# rescale features to [0,1]
X = Matrix(df_vr_tumour_h1[:,4:end])

dt = fit(UnitRangeTransform, X; dims=1, unit=true)

Xn = StatsBase.transform(dt,X)
for i in 1:size(X)[1]
    df_vr_tumour_h1[i,:][4:end] = Xn[i,:]
end

# randomly undersample imbalanced data 
df_vr_tumour_h1 = random_undersample(df_vr_tumour_h1)

# randomly sample training, test and validation sets
df_vr_tumour_h1=df_vr_tumour_h1[shuffle(1:nrow(df_vr_tumour_h1)),:];   
X = Matrix(df_vr_tumour_h1[:,4:end]);    
y = df_vr_tumour_h1.labels;
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=1-train_ratio); # train-test split
Xval, Xtest, yval, ytest = train_test_split(Xtest,ytest,test_size= (test_ratio)/(test_ratio+validation_ratio)); # test-validation split

# optimise hyperparameters
C,G,accs = parameter_optimisation(Xtrain,ytrain,Xval,yval,costs,gammas)

# report final accuracy on test set

model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
ypredict, decision_values = svmpredict(model, Xtest');
acc = mean(ypredict .== ytest) * 100;
println("accuracy: ", acc)
println("Cost: ", C, " Gamma: ", G)
display(accs)

accuracy: 84.11214953271028
Cost: 0.1 Gamma: 1.0


8×8 Matrix{Any}:
    "Costs->"   0.0001   0.001    0.01     0.1      1.0     10.0     100.0
   0.0001      50.4673  50.4673  50.4673  50.4673  50.4673  50.4673   78.5047
   0.001       50.4673  50.4673  50.4673  50.4673  50.4673  78.5047   79.4393
   0.01        50.4673  50.4673  50.4673  50.4673  79.4393  80.3738   81.3084
   0.1         50.4673  50.4673  50.4673  78.5047  77.5701  77.5701   79.4393
   1.0         50.4673  50.4673  50.4673  82.243   79.4393  79.4393   78.5047
  10.0         50.4673  50.4673  50.4673  79.4393  81.3084  81.3084   78.5047
 100.0         50.4673  50.4673  50.4673  50.4673  81.3084  82.243    82.243

VR H0 Macrophages

In [20]:
# load features into dataframe
df_vr_macrophage_h0 = DataFrame(ids=Int64[],times=Int64[],labels=[])
for i in 1:20
    colname = string("feature_",i)
    df_vr_macrophage_h0[!,colname] = Float64[]
end
location_macrophage_h0 = "persistence_images\\vr\\macrophages_h0\\"
files_vr_macrophage_h0 = readdir(workdir*"\\"*location_macrophage_h0)
for f in files_vr_macrophage_h0
    id,time = parse_file(f)
    id = parse(Int64,id)
    time = parse(Int64,time)
    image = load_object(location_macrophage_h0*f)
    row = Any[]
    label = getBinaryLabel(id,time)
    append!(row,id)
    append!(row,time)
    append!(row,label)
    for x in image
        append!(row,x)
    end
    push!(df_vr_macrophage_h0,row)
end

# rescale features to [0,1]
X = Matrix(df_vr_macrophage_h0[:,4:end])

dt = fit(UnitRangeTransform, X; dims=1, unit=true)

Xn = StatsBase.transform(dt,X)
for i in 1:size(X)[1]
    df_vr_macrophage_h0[i,:][4:end] = Xn[i,:]
end

# randomly undersample imbalanced data 
df_vr_macrophage_h0 = random_undersample(df_vr_macrophage_h0)

# randomly sample training, test and validation sets
df_vr_macrophage_h0=df_vr_macrophage_h0[shuffle(1:nrow(df_vr_macrophage_h0)),:];   
X = Matrix(df_vr_macrophage_h0[:,4:end]);    
y = df_vr_macrophage_h0.labels;
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=1-train_ratio); # train-test split
Xval, Xtest, yval, ytest = train_test_split(Xtest,ytest,test_size= (test_ratio)/(test_ratio+validation_ratio)); # test-validation split

# optimise hyperparameters
C,G,accs = parameter_optimisation(Xtrain,ytrain,Xval,yval,costs,gammas)

# report final accuracy on test set

model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
ypredict, decision_values = svmpredict(model, Xtest');
acc = mean(ypredict .== ytest) * 100;
println("accuracy: ", acc)
println("Cost: ", C, " Gamma: ", G)
display(accs)

accuracy: 73.83177570093457
Cost: 10.0 Gamma: 1.0


8×8 Matrix{Any}:
    "Costs->"   0.0001   0.001    0.01     0.1      1.0     10.0     100.0
   0.0001      51.4019  51.4019  51.4019  51.4019  51.4019  51.4019   51.4019
   0.001       51.4019  51.4019  51.4019  51.4019  51.4019  51.4019   57.9439
   0.01        51.4019  51.4019  51.4019  51.4019  51.4019  58.8785   57.9439
   0.1         51.4019  51.4019  51.4019  51.4019  61.6822  61.6822   72.8972
   1.0         51.4019  51.4019  51.4019  63.5514  63.5514  76.6355   74.7664
  10.0         51.4019  51.4019  51.4019  64.486   73.8318  73.8318   68.2243
 100.0         51.4019  51.4019  51.4019  51.4019  62.6168  63.5514   63.5514

VR H1 Macrophages

In [21]:
# load features into dataframe
df_vr_macrophage_h1 = DataFrame(ids=Int64[],times=Int64[],labels=[])
for i in 1:400
    colname = string("feature_",i)
    df_vr_macrophage_h1[!,colname] = Float64[]
end
location_macrophage_h1 = "persistence_images\\vr\\macrophages_h1\\"
files_vr_macrophage_h1 = readdir(workdir*"\\"*location_macrophage_h1)
for f in files_vr_macrophage_h1
    id,time = parse_file(f)
    id = parse(Int64,id)
    time = parse(Int64,time)
    image = load_object(location_macrophage_h1*f)
    row = Any[]
    label = getBinaryLabel(id,time)
    append!(row,id)
    append!(row,time)
    append!(row,label)
    for x in image
        append!(row,x)
    end
    push!(df_vr_macrophage_h1,row)
end

# rescale features to [0,1]
X = Matrix(df_vr_macrophage_h1[:,4:end])

dt = fit(UnitRangeTransform, X; dims=1, unit=true)

Xn = StatsBase.transform(dt,X)
for i in 1:size(X)[1]
    df_vr_macrophage_h1[i,:][4:end] = Xn[i,:]
end

# randomly undersample imbalanced data 
df_vr_macrophage_h1 = random_undersample(df_vr_macrophage_h1)

# randomly sample training, test and validation sets
df_vr_macrophage_h1=df_vr_macrophage_h1[shuffle(1:nrow(df_vr_macrophage_h1)),:];   
X = Matrix(df_vr_macrophage_h1[:,4:end]);    
y = df_vr_macrophage_h1.labels;
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=1-train_ratio); # train-test split
Xval, Xtest, yval, ytest = train_test_split(Xtest,ytest,test_size= (test_ratio)/(test_ratio+validation_ratio)); # test-validation split

# optimise hyperparameters
C,G,accs = parameter_optimisation(Xtrain,ytrain,Xval,yval,costs,gammas)

# report final accuracy on test set

model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
ypredict, decision_values = svmpredict(model, Xtest');
acc = mean(ypredict .== ytest) * 100;
println("accuracy: ", acc)
println("Cost: ", C, " Gamma: ", G)
display(accs)

accuracy: 75.70093457943925
Cost: 10.0 Gamma: 1.0


8×8 Matrix{Any}:
    "Costs->"   0.0001   0.001    0.01     0.1      1.0     10.0     100.0
   0.0001      49.5327  49.5327  49.5327  49.5327  49.5327  49.5327   49.5327
   0.001       49.5327  49.5327  49.5327  49.5327  49.5327  49.5327   59.8131
   0.01        49.5327  49.5327  49.5327  49.5327  49.5327  62.6168   63.5514
   0.1         49.5327  49.5327  49.5327  50.4673  63.5514  62.6168   71.9626
   1.0         49.5327  49.5327  49.5327  59.8131  69.1589  72.8972   69.1589
  10.0         49.5327  49.5327  49.5327  57.9439  64.486   66.3551   66.3551
 100.0         49.5327  49.5327  49.5327  59.8131  63.5514  61.6822   59.8131

Dowker H0 macrophage-tumour

In [23]:
# load features into dataframe
df_dowker_mt_h0 = DataFrame(ids=Int64[],times=Int64[],labels=[])
for i in 1:400
    colname = string("feature_",i)
    df_dowker_mt_h0[!,colname] = Float64[]
end
location_mt_h0 = "persistence_images\\dowker\\macrophage_tumour_h0\\"
files_dowker_mt_h0 = readdir(workdir*"\\"*location_mt_h0)
for f in files_dowker_mt_h0
    id,time = parse_file(f)
    id = parse(Int64,id)
    time = parse(Int64,time)
    image = load_object(location_mt_h0*f)
    row = Any[]
    label = getBinaryLabel(id,time)
    append!(row,id)
    append!(row,time)
    append!(row,label)
    for x in image
        append!(row,x)
    end
    push!(df_dowker_mt_h0,row)
end

# rescale features to [0,1]
X = Matrix(df_dowker_mt_h0[:,4:end])

dt = fit(UnitRangeTransform, X; dims=1, unit=true)

Xn = StatsBase.transform(dt,X)
for i in 1:size(X)[1]
    df_dowker_mt_h0[i,:][4:end] = Xn[i,:]
end

# randomly undersample imbalanced data 
df_dowker_mt_h0 = random_undersample(df_dowker_mt_h0)

# randomly sample training, test and validation sets
df_dowker_mt_h0=df_dowker_mt_h0[shuffle(1:nrow(df_dowker_mt_h0)),:];   
X = Matrix(df_dowker_mt_h0[:,4:end]);    
y = df_dowker_mt_h0.labels;
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=1-train_ratio); # train-test split
Xval, Xtest, yval, ytest = train_test_split(Xtest,ytest,test_size= (test_ratio)/(test_ratio+validation_ratio)); # test-validation split

# optimise hyperparameters
C,G,accs = parameter_optimisation(Xtrain,ytrain,Xval,yval,costs,gammas)

# report final accuracy on test set

model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
ypredict, decision_values = svmpredict(model, Xtest');
acc = mean(ypredict .== ytest) * 100;
println("accuracy: ", acc)
println("Cost: ", C, " Gamma: ", G)
display(accs)


accuracy: 84.11214953271028
Cost: 100.0 Gamma: 0.1


8×8 Matrix{Any}:
    "Costs->"   0.0001   0.001   0.01    0.1      1.0     10.0     100.0
   0.0001      53.271   53.271  53.271  53.271   53.271   53.271    87.8505
   0.001       53.271   53.271  53.271  53.271   53.271   87.8505   88.785
   0.01        53.271   53.271  53.271  53.271   87.8505  88.785    90.6542
   0.1         53.271   53.271  53.271  86.9159  89.7196  89.7196   91.5888
   1.0         53.271   53.271  53.271  85.0467  87.8505  87.8505   82.243
  10.0         53.271   53.271  53.271  83.1776  80.3738  78.5047   69.1589
 100.0         53.271   53.271  53.271  53.271   58.8785  61.6822   60.7477

Dowker H1 macrophage-tumour

In [24]:
# load features into dataframe
df_dowker_mt_h1 = DataFrame(ids=Int64[],times=Int64[],labels=[])
for i in 1:400
    colname = string("feature_",i)
    df_dowker_mt_h1[!,colname] = Float64[]
end
location_mt_h1 = "persistence_images\\dowker\\macrophage_tumour_h1\\"
files_dowker_mt_h1 = readdir(workdir*"\\"*location_mt_h1)
for f in files_dowker_mt_h1
    id,time = parse_file(f)
    id = parse(Int64,id)
    time = parse(Int64,time)
    image = load_object(location_mt_h1*f)
    row = Any[]
    label = getBinaryLabel(id,time)
    append!(row,id)
    append!(row,time)
    append!(row,label)
    for x in image
        append!(row,x)
    end
    push!(df_dowker_mt_h1,row)
end

# rescale features to [0,1]
X = Matrix(df_dowker_mt_h1[:,4:end])

dt = fit(UnitRangeTransform, X; dims=1, unit=true)

Xn = StatsBase.transform(dt,X)
for i in 1:size(X)[1]
    df_dowker_mt_h1[i,:][4:end] = Xn[i,:]
end

# randomly undersample imbalanced data 
df_dowker_mt_h1 = random_undersample(df_dowker_mt_h1)

# randomly sample training, test and validation sets
df_dowker_mt_h1=df_dowker_mt_h1[shuffle(1:nrow(df_dowker_mt_h1)),:];   
X = Matrix(df_dowker_mt_h1[:,4:end]);    
y = df_dowker_mt_h1.labels;
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=1-train_ratio); # train-test split
Xval, Xtest, yval, ytest = train_test_split(Xtest,ytest,test_size= (test_ratio)/(test_ratio+validation_ratio)); # test-validation split

# optimise hyperparameters
C,G,accs = parameter_optimisation(Xtrain,ytrain,Xval,yval,costs,gammas)

# report final accuracy on test set

model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
ypredict, decision_values = svmpredict(model, Xtest');
acc = mean(ypredict .== ytest) * 100;
println("accuracy: ", acc)
println("Cost: ", C, " Gamma: ", G)
display(accs)

accuracy: 83.17757009345794
Cost: 10.0 Gamma: 0.01


8×8 Matrix{Any}:
    "Costs->"   0.0001   0.001    0.01     0.1      1.0     10.0     100.0
   0.0001      48.5981  48.5981  48.5981  48.5981  48.5981  66.3551   71.9626
   0.001       48.5981  48.5981  48.5981  48.5981  67.2897  71.9626   73.8318
   0.01        48.5981  48.5981  48.5981  73.8318  72.8972  74.7664   73.8318
   0.1         48.5981  48.5981  48.5981  71.9626  73.8318  73.8318   73.8318
   1.0         48.5981  48.5981  48.5981  71.028   71.028   66.3551   67.2897
  10.0         48.5981  48.5981  48.5981  57.9439  64.486   61.6822   59.8131
 100.0         48.5981  48.5981  48.5981  55.1402  57.0093  52.3364   50.4673

Dowker H0 Tumour-vessel

In [25]:
# load features into dataframe
df_dowker_tv_h0 = DataFrame(ids=Int64[],times=Int64[],labels=[])
for i in 1:400
    colname = string("feature_",i)
    df_dowker_tv_h0[!,colname] = Float64[]
end
location_tv_h0 = "persistence_images\\dowker\\tumour_vessel_h0\\"
files_dowker_tv_h0 = readdir(workdir*"\\"*location_tv_h0)
for f in files_dowker_tv_h0
    id,time = parse_file(f)
    id = parse(Int64,id)
    time = parse(Int64,time)
    image = load_object(location_tv_h0*f)
    row = Any[]
    label = getBinaryLabel(id,time)
    append!(row,id)
    append!(row,time)
    append!(row,label)
    for x in image
        append!(row,x)
    end
    push!(df_dowker_tv_h0,row)
end

# rescale features to [0,1]
X = Matrix(df_dowker_tv_h0[:,4:end])

dt = fit(UnitRangeTransform, X; dims=1, unit=true)

Xn = StatsBase.transform(dt,X)
for i in 1:size(X)[1]
    df_dowker_tv_h0[i,:][4:end] = Xn[i,:]
end

# randomly undersample imbalanced data 
df_dowker_tv_h0 = random_undersample(df_dowker_tv_h0)

# randomly sample training, test and validation sets
df_dowker_tv_h0=df_dowker_tv_h0[shuffle(1:nrow(df_dowker_tv_h0)),:];   
X = Matrix(df_dowker_tv_h0[:,4:end]);    
y = df_dowker_tv_h0.labels;
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=1-train_ratio); # train-test split
Xval, Xtest, yval, ytest = train_test_split(Xtest,ytest,test_size= (test_ratio)/(test_ratio+validation_ratio)); # test-validation split

# optimise hyperparameters
C,G,accs = parameter_optimisation(Xtrain,ytrain,Xval,yval,costs,gammas)

# report final accuracy on test set

model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
ypredict, decision_values = svmpredict(model, Xtest');
acc = mean(ypredict .== ytest) * 100;
println("accuracy: ", acc)
println("Cost: ", C, " Gamma: ", G)
display(accs)

accuracy: 79.43925233644859
Cost: 100.0 Gamma: 0.01


8×8 Matrix{Any}:
    "Costs->"   0.0001   0.001    0.01     0.1      1.0     10.0     100.0
   0.0001      48.5981  48.5981  48.5981  48.5981  48.5981  48.5981   76.6355
   0.001       48.5981  48.5981  48.5981  48.5981  48.5981  76.6355   81.3084
   0.01        48.5981  48.5981  48.5981  48.5981  76.6355  80.3738   83.1776
   0.1         48.5981  48.5981  48.5981  76.6355  81.3084  81.3084   79.4393
   1.0         48.5981  48.5981  48.5981  76.6355  81.3084  79.4393   73.8318
  10.0         48.5981  48.5981  48.5981  48.5981  71.028   69.1589   69.1589
 100.0         48.5981  48.5981  48.5981  48.5981  49.5327  49.5327   49.5327

Dowker H1 Tumour-vessel

In [26]:
# load features into dataframe
df_dowker_tv_h1 = DataFrame(ids=Int64[],times=Int64[],labels=[])
for i in 1:400
    colname = string("feature_",i)
    df_dowker_tv_h1[!,colname] = Float64[]
end
location_tv_h1 = "persistence_images\\dowker\\tumour_vessel_h1\\"
files_dowker_tv_h1 = readdir(workdir*"\\"*location_tv_h1)
for f in files_dowker_tv_h1
    id,time = parse_file(f)
    id = parse(Int64,id)
    time = parse(Int64,time)
    image = load_object(location_tv_h1*f)
    row = Any[]
    label = getBinaryLabel(id,time)
    append!(row,id)
    append!(row,time)
    append!(row,label)
    for x in image
        append!(row,x)
    end
    push!(df_dowker_tv_h1,row)
end

# rescale features to [0,1]
X = Matrix(df_dowker_tv_h1[:,4:end])

dt = fit(UnitRangeTransform, X; dims=1, unit=true)

Xn = StatsBase.transform(dt,X)
for i in 1:size(X)[1]
    df_dowker_tv_h1[i,:][4:end] = Xn[i,:]
end

# randomly undersample imbalanced data 
df_dowker_tv_h1 = random_undersample(df_dowker_tv_h1)

# randomly sample training, test and validation sets
df_dowker_tv_h1=df_dowker_tv_h1[shuffle(1:nrow(df_dowker_tv_h1)),:];   
X = Matrix(df_dowker_tv_h1[:,4:end]);    
y = df_dowker_tv_h1.labels;
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=1-train_ratio); # train-test split
Xval, Xtest, yval, ytest = train_test_split(Xtest,ytest,test_size= (test_ratio)/(test_ratio+validation_ratio)); # test-validation split

# optimise hyperparameters
C,G,accs = parameter_optimisation(Xtrain,ytrain,Xval,yval,costs,gammas)

# report final accuracy on test set

model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
ypredict, decision_values = svmpredict(model, Xtest');
acc = mean(ypredict .== ytest) * 100;
println("accuracy: ", acc)
println("Cost: ", C, " Gamma: ", G)
display(accs)

accuracy: 55.140186915887845
Cost: 100.0 Gamma: 1.0


8×8 Matrix{Any}:
    "Costs->"   0.0001   0.001    0.01     0.1      1.0     10.0     100.0
   0.0001      42.9907  42.9907  42.9907  42.9907  42.9907  42.9907   54.2056
   0.001       42.9907  42.9907  42.9907  42.9907  42.9907  54.2056   57.0093
   0.01        42.9907  42.9907  42.9907  42.9907  56.0748  54.2056   57.9439
   0.1         42.9907  42.9907  42.9907  56.0748  56.0748  57.0093   59.8131
   1.0         42.9907  42.9907  42.9907  51.4019  57.9439  62.6168   63.5514
  10.0         42.9907  42.9907  42.9907  42.9907  57.0093  53.271    53.271
 100.0         42.9907  42.9907  42.9907  42.9907  50.4673  50.4673   50.4673

Dowker H0 Macrophage-vessel

In [27]:
# load features into dataframe
df_dowker_mv_h0 = DataFrame(ids=Int64[],times=Int64[],labels=[])
for i in 1:400
    colname = string("feature_",i)
    df_dowker_mv_h0[!,colname] = Float64[]
end
location_mv_h0 = "persistence_images\\dowker\\macrophage_vessel_h0\\"
files_dowker_mv_h0 = readdir(workdir*"\\"*location_mv_h0)
for f in files_dowker_mv_h0
    id,time = parse_file(f)
    id = parse(Int64,id)
    time = parse(Int64,time)
    image = load_object(location_mv_h0*f)
    row = Any[]
    label = getBinaryLabel(id,time)
    append!(row,id)
    append!(row,time)
    append!(row,label)
    for x in image
        append!(row,x)
    end
    push!(df_dowker_mv_h0,row)
end

# rescale features to [0,1]
X = Matrix(df_dowker_mv_h0[:,4:end])

dt = fit(UnitRangeTransform, X; dims=1, unit=true)

Xn = StatsBase.transform(dt,X)
for i in 1:size(X)[1]
    df_dowker_mv_h0[i,:][4:end] = Xn[i,:]
end

# randomly undersample imbalanced data 
df_dowker_mv_h0 = random_undersample(df_dowker_mv_h0)

# randomly sample training, test and validation sets
df_dowker_mv_h0=df_dowker_mv_h0[shuffle(1:nrow(df_dowker_mv_h0)),:];   
X = Matrix(df_dowker_mv_h0[:,4:end]);    
y = df_dowker_mv_h0.labels;
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=1-train_ratio); # train-test split
Xval, Xtest, yval, ytest = train_test_split(Xtest,ytest,test_size= (test_ratio)/(test_ratio+validation_ratio)); # test-validation split

# optimise hyperparameters
C,G,accs = parameter_optimisation(Xtrain,ytrain,Xval,yval,costs,gammas)

# report final accuracy on test set

model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
ypredict, decision_values = svmpredict(model, Xtest');
acc = mean(ypredict .== ytest) * 100;
println("accuracy: ", acc)
println("Cost: ", C, " Gamma: ", G)
display(accs)

accuracy: 71.02803738317756
Cost: 1.0 Gamma: 0.1


8×8 Matrix{Any}:
    "Costs->"   0.0001   0.001    0.01     0.1      1.0     10.0     100.0
   0.0001      55.1402  55.1402  55.1402  55.1402  55.1402  55.1402   62.6168
   0.001       55.1402  55.1402  55.1402  55.1402  55.1402  63.5514   65.4206
   0.01        55.1402  55.1402  55.1402  55.1402  62.6168  66.3551   73.8318
   0.1         55.1402  55.1402  55.1402  62.6168  77.5701  76.6355   74.7664
   1.0         55.1402  55.1402  55.1402  55.1402  72.8972  74.7664   75.7009
  10.0         55.1402  55.1402  55.1402  55.1402  55.1402  50.4673   50.4673
 100.0         55.1402  55.1402  55.1402  55.1402  55.1402  55.1402   55.1402

Dowker H1 Macrophage-vessel

In [28]:
# load features into dataframe
df_dowker_mv_h1 = DataFrame(ids=Int64[],times=Int64[],labels=[])
for i in 1:400
    colname = string("feature_",i)
    df_dowker_mv_h1[!,colname] = Float64[]
end
location_mv_h1 = "persistence_images\\dowker\\macrophage_vessel_h1\\"
files_dowker_mv_h1 = readdir(workdir*"\\"*location_mv_h1)
for f in files_dowker_mv_h1
    id,time = parse_file(f)
    id = parse(Int64,id)
    time = parse(Int64,time)
    image = load_object(location_mv_h1*f)
    row = Any[]
    label = getBinaryLabel(id,time)
    append!(row,id)
    append!(row,time)
    append!(row,label)
    for x in image
        append!(row,x)
    end
    push!(df_dowker_mv_h1,row)
end

# rescale features to [0,1]
X = Matrix(df_dowker_mv_h1[:,4:end])

dt = fit(UnitRangeTransform, X; dims=1, unit=true)

Xn = StatsBase.transform(dt,X)
for i in 1:size(X)[1]
    df_dowker_mv_h1[i,:][4:end] = Xn[i,:]
end

# randomly undersample imbalanced data 
df_dowker_mv_h1 = random_undersample(df_dowker_mv_h1)

# randomly sample training, test and validation sets
df_dowker_mv_h1=df_dowker_mv_h1[shuffle(1:nrow(df_dowker_mv_h1)),:];   
X = Matrix(df_dowker_mv_h1[:,4:end]);    
y = df_dowker_mv_h1.labels;
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=1-train_ratio); # train-test split
Xval, Xtest, yval, ytest = train_test_split(Xtest,ytest,test_size= (test_ratio)/(test_ratio+validation_ratio)); # test-validation split

# optimise hyperparameters
C,G,accs = parameter_optimisation(Xtrain,ytrain,Xval,yval,costs,gammas)

# report final accuracy on test set

model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
ypredict, decision_values = svmpredict(model, Xtest');
acc = mean(ypredict .== ytest) * 100;
println("accuracy: ", acc)
println("Cost: ", C, " Gamma: ", G)
display(accs)

accuracy: 58.87850467289719
Cost: 100.0 Gamma: 0.0001


8×8 Matrix{Any}:
    "Costs->"   0.0001   0.001    0.01     0.1      1.0     10.0     100.0
   0.0001      55.1402  55.1402  55.1402  55.1402  55.1402  55.1402   65.4206
   0.001       55.1402  55.1402  55.1402  55.1402  55.1402  65.4206   62.6168
   0.01        55.1402  55.1402  55.1402  55.1402  64.486   63.5514   63.5514
   0.1         55.1402  55.1402  55.1402  58.8785  64.486   64.486    56.0748
   1.0         55.1402  55.1402  55.1402  61.6822  65.4206  62.6168   56.0748
  10.0         55.1402  55.1402  55.1402  58.8785  52.3364  59.8131   53.271
 100.0         55.1402  55.1402  55.1402  51.4019  40.1869  38.3178   41.1215

Dowker-time Macrophages H0

In [29]:
# load features into dataframe
df_dowkertime_m_h0 = DataFrame(ids=Int64[],times=Int64[],labels=[])
for i in 1:400
    colname = string("feature_",i)
    df_dowkertime_m_h0[!,colname] = Float64[]
end
location_m_time_h0 = "persistence_images\\dowkertime\\macrophage_time_h0\\"
files_dowker_m_time_h0 = readdir(workdir*"\\"*location_m_time_h0)
for f in files_dowker_m_time_h0
    id,time = parse_file(f)
    id = parse(Int64,id)
    time = parse(Int64,time)
    image = load_object(location_m_time_h0*f)
    row = Any[]
    label = getBinaryLabel(id,time)
    append!(row,id)
    append!(row,time)
    append!(row,label)
    for x in image
        append!(row,x)
    end
    push!(df_dowkertime_m_h0,row)
end

# rescale features to [0,1]
X = Matrix(df_dowkertime_m_h0[:,4:end])

dt = fit(UnitRangeTransform, X; dims=1, unit=true)

Xn = StatsBase.transform(dt,X)
for i in 1:size(X)[1]
    df_dowkertime_m_h0[i,:][4:end] = Xn[i,:]
end

# randomly undersample imbalanced data 
df_dowkertime_m_h0 = random_undersample(df_dowkertime_m_h0)

# randomly sample training, test and validation sets
df_dowkertime_m_h0=df_dowkertime_m_h0[shuffle(1:nrow(df_dowkertime_m_h0)),:];   
X = Matrix(df_dowkertime_m_h0[:,4:end]);    
y = df_dowkertime_m_h0.labels;
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=1-train_ratio); # train-test split
Xval, Xtest, yval, ytest = train_test_split(Xtest,ytest,test_size= (test_ratio)/(test_ratio+validation_ratio)); # test-validation split

# optimise hyperparameters
C,G,accs = parameter_optimisation(Xtrain,ytrain,Xval,yval,costs,gammas)

# report final accuracy on test set

model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
ypredict, decision_values = svmpredict(model, Xtest');
acc = mean(ypredict .== ytest) * 100;
println("accuracy: ", acc)
println("Cost: ", C, " Gamma: ", G)
display(accs)

accuracy: 77.57009345794393
Cost: 100.0 Gamma: 0.1


8×8 Matrix{Any}:
    "Costs->"   0.0001   0.001   0.01    0.1      1.0     10.0     100.0
   0.0001      46.729   46.729  46.729  46.729   46.729   46.729    71.9626
   0.001       46.729   46.729  46.729  46.729   46.729   71.9626   74.7664
   0.01        46.729   46.729  46.729  46.729   72.8972  78.5047   78.5047
   0.1         46.729   46.729  46.729  73.8318  75.7009  78.5047   84.1121
   1.0         46.729   46.729  46.729  74.7664  77.5701  83.1776   79.4393
  10.0         46.729   46.729  46.729  57.0093  74.7664  74.7664   71.9626
 100.0         46.729   46.729  46.729  46.729   54.2056  57.0093   56.0748

Dowker-time Macrophages H1

In [30]:
# load features into dataframe
df_dowkertime_m_h1 = DataFrame(ids=Int64[],times=Int64[],labels=[])
for i in 1:400
    colname = string("feature_",i)
    df_dowkertime_m_h1[!,colname] = Float64[]
end
location_m_time_h1 = "persistence_images\\dowkertime\\macrophage_time_h1\\"
files_dowker_m_time_h1 = readdir(workdir*"\\"*location_m_time_h1)
for f in files_dowker_m_time_h1
    id,time = parse_file(f)
    id = parse(Int64,id)
    time = parse(Int64,time)
    image = load_object(location_m_time_h1*f)
    row = Any[]
    label = getBinaryLabel(id,time)
    append!(row,id)
    append!(row,time)
    append!(row,label)
    for x in image
        append!(row,x)
    end
    push!(df_dowkertime_m_h1,row)
end

# rescale features to [0,1]
X = Matrix(df_dowkertime_m_h1[:,4:end])

dt = fit(UnitRangeTransform, X; dims=1, unit=true)

Xn = StatsBase.transform(dt,X)
for i in 1:size(X)[1]
    df_dowkertime_m_h1[i,:][4:end] = Xn[i,:]
end

# randomly undersample imbalanced data 
df_dowkertime_m_h1 = random_undersample(df_dowkertime_m_h1)

# randomly sample training, test and validation sets
df_dowkertime_m_h1=df_dowkertime_m_h1[shuffle(1:nrow(df_dowkertime_m_h1)),:];   
X = Matrix(df_dowkertime_m_h1[:,4:end]);    
y = df_dowkertime_m_h1.labels;
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=1-train_ratio); # train-test split
Xval, Xtest, yval, ytest = train_test_split(Xtest,ytest,test_size= (test_ratio)/(test_ratio+validation_ratio)); # test-validation split

# optimise hyperparameters
C,G,accs = parameter_optimisation(Xtrain,ytrain,Xval,yval,costs,gammas)

# report final accuracy on test set

model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
ypredict, decision_values = svmpredict(model, Xtest');
acc = mean(ypredict .== ytest) * 100;
println("accuracy: ", acc)
println("Cost: ", C, " Gamma: ", G)
display(accs)

accuracy: 79.43925233644859
Cost: 10.0 Gamma: 1.0


8×8 Matrix{Any}:
    "Costs->"   0.0001   0.001    0.01     0.1      1.0     10.0     100.0
   0.0001      54.2056  54.2056  54.2056  54.2056  54.2056  54.2056   54.2056
   0.001       54.2056  54.2056  54.2056  54.2056  54.2056  54.2056   55.1402
   0.01        54.2056  54.2056  54.2056  54.2056  54.2056  57.0093   56.0748
   0.1         54.2056  54.2056  54.2056  54.2056  58.8785  74.7664   72.8972
   1.0         54.2056  54.2056  54.2056  56.0748  71.9626  75.7009   74.7664
  10.0         54.2056  54.2056  54.2056  69.1589  71.028   70.0935   71.9626
 100.0         54.2056  54.2056  54.2056  63.5514  71.028   70.0935   70.0935

Dowker-time Tumour H0

In [31]:
# load features into dataframe
df_dowkertime_t_h0 = DataFrame(ids=Int64[],times=Int64[],labels=[])
for i in 1:400
    colname = string("feature_",i)
    df_dowkertime_t_h0[!,colname] = Float64[]
end
location_t_time_h0 = "persistence_images\\dowkertime\\tumour_time_h0\\"
files_dowker_t_time_h0 = readdir(workdir*"\\"*location_t_time_h0)
for f in files_dowker_t_time_h0
    id,time = parse_file(f)
    id = parse(Int64,id)
    time = parse(Int64,time)
    image = load_object(location_t_time_h0*f)
    row = Any[]
    label = getBinaryLabel(id,time)
    append!(row,id)
    append!(row,time)
    append!(row,label)
    for x in image
        append!(row,x)
    end
    push!(df_dowkertime_t_h0,row)
end

# rescale features to [0,1]
X = Matrix(df_dowkertime_t_h0[:,4:end])

dt = fit(UnitRangeTransform, X; dims=1, unit=true)

Xn = StatsBase.transform(dt,X)
for i in 1:size(X)[1]
    df_dowkertime_t_h0[i,:][4:end] = Xn[i,:]
end

# randomly undersample imbalanced data 
df_dowkertime_t_h0 = random_undersample(df_dowkertime_t_h0)

# randomly sample training, test and validation sets
df_dowkertime_t_h0=df_dowkertime_t_h0[shuffle(1:nrow(df_dowkertime_t_h0)),:];   
X = Matrix(df_dowkertime_t_h0[:,4:end]);    
y = df_dowkertime_t_h0.labels;
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=1-train_ratio); # train-test split
Xval, Xtest, yval, ytest = train_test_split(Xtest,ytest,test_size= (test_ratio)/(test_ratio+validation_ratio)); # test-validation split

# optimise hyperparameters
C,G,accs = parameter_optimisation(Xtrain,ytrain,Xval,yval,costs,gammas)

# report final accuracy on test set

model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
ypredict, decision_values = svmpredict(model, Xtest');
acc = mean(ypredict .== ytest) * 100;
println("accuracy: ", acc)
println("Cost: ", C, " Gamma: ", G)
display(accs)

accuracy: 77.57009345794393
Cost: 100.0 Gamma: 1.0


8×8 Matrix{Any}:
    "Costs->"   0.0001   0.001   0.01    0.1      1.0     10.0     100.0
   0.0001      46.729   46.729  46.729  46.729   46.729   46.729    57.9439
   0.001       46.729   46.729  46.729  46.729   46.729   57.9439   58.8785
   0.01        46.729   46.729  46.729  46.729   57.9439  58.8785   67.2897
   0.1         46.729   46.729  46.729  57.9439  57.9439  69.1589   75.7009
   1.0         46.729   46.729  46.729  68.2243  70.0935  75.7009   77.5701
  10.0         46.729   46.729  46.729  63.5514  71.028   77.5701   74.7664
 100.0         46.729   46.729  46.729  66.3551  73.8318  73.8318   73.8318

Dowker-time Tumour H1

In [32]:
# load features into dataframe
df_dowkertime_t_h1 = DataFrame(ids=Int64[],times=Int64[],labels=[])
for i in 1:400
    colname = string("feature_",i)
    df_dowkertime_t_h1[!,colname] = Float64[]
end
location_t_time_h1 = "persistence_images\\dowkertime\\tumour_time_h1\\"
files_dowker_t_time_h1 = readdir(workdir*"\\"*location_t_time_h1)
for f in files_dowker_t_time_h1
    id,time = parse_file(f)
    id = parse(Int64,id)
    time = parse(Int64,time)
    image = load_object(location_t_time_h1*f)
    row = Any[]
    label = getBinaryLabel(id,time)
    append!(row,id)
    append!(row,time)
    append!(row,label)
    for x in image
        append!(row,x)
    end
    push!(df_dowkertime_t_h1,row)
end

# rescale features to [0,1]
X = Matrix(df_dowkertime_t_h1[:,4:end])

dt = fit(UnitRangeTransform, X; dims=1, unit=true)

Xn = StatsBase.transform(dt,X)
for i in 1:size(X)[1]
    df_dowkertime_t_h1[i,:][4:end] = Xn[i,:]
end

# randomly undersample imbalanced data 
df_dowkertime_t_h1 = random_undersample(df_dowkertime_t_h1)

# randomly sample training, test and validation sets
df_dowkertime_t_h1=df_dowkertime_t_h1[shuffle(1:nrow(df_dowkertime_t_h1)),:];   
X = Matrix(df_dowkertime_t_h1[:,4:end]);    
y = df_dowkertime_t_h1.labels;
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=1-train_ratio); # train-test split
Xval, Xtest, yval, ytest = train_test_split(Xtest,ytest,test_size= (test_ratio)/(test_ratio+validation_ratio)); # test-validation split

# optimise hyperparameters
C,G,accs = parameter_optimisation(Xtrain,ytrain,Xval,yval,costs,gammas)

# report final accuracy on test set

model = svmtrain(Xtrain', ytrain ; gamma=G , cost=C)
ypredict, decision_values = svmpredict(model, Xtest');
acc = mean(ypredict .== ytest) * 100;
println("accuracy: ", acc)
println("Cost: ", C, " Gamma: ", G)
display(accs)

accuracy: 82.2429906542056
Cost: 10.0 Gamma: 1.0


8×8 Matrix{Any}:
    "Costs->"   0.0001   0.001    0.01     0.1      1.0     10.0     100.0
   0.0001      41.1215  41.1215  41.1215  41.1215  41.1215  41.1215   73.8318
   0.001       41.1215  41.1215  41.1215  41.1215  41.1215  73.8318   79.4393
   0.01        41.1215  41.1215  41.1215  41.1215  72.8972  81.3084   82.243
   0.1         41.1215  41.1215  41.1215  70.0935  80.3738  82.243    83.1776
   1.0         41.1215  41.1215  41.1215  78.5047  81.3084  84.1121   76.6355
  10.0         41.1215  41.1215  41.1215  71.9626  77.5701  78.5047   71.028
 100.0         41.1215  41.1215  41.1215  41.1215  68.2243  68.2243   68.2243