# CDalgs - correlations explorations on fsv dataset

There are examples of correlations between agents features.

Load libraries

In [1]:
using BSON, DataFrames
using LinRegOutliers, StatsBase
using Plots

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPrecompiling DataFrames [a93c6f00-e57d-5684-b7b6-d8193f3e46c0]
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPrecompiling LinRegOutliers [6d4de0fb-32d9-4c65-aac1-cc9ed8b94b1a]
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPrecompiling Plots [91a5bcdd-55d7-5caf-9e0b-520d859cae80]


Load dataset

In [2]:
dataset_a = BSON.load("data/dataset_a.bson")
data = dataset_a[:data_tensor].*(-1);
params = Matrix(dataset_a[:parameters]);

Normalise data

In [3]:
function normalise(d)
    z(s, μ, σ, ϵ) = (s - μ) / (σ + ϵ)
    for i=1:size(d)[2]
        tmp = d[:, i]
        d[:, i] = z.(tmp, mean(tmp), std(tmp), 10e-10)
    end
    return d
end

deflection = normalise(data[:, :, 1]);
loads = normalise(data[:, :, 2]);

for i=1:4
    params[:,i] .-= minimum(params[:,i])
    params[:,i] ./= maximum(params[:,i])
end


Plot with outliers filtering

In [7]:
function corroutliers(df::DataFrame, xlabel::String, ylabel::String)
    reg = createRegressionSetting(@formula(y~x), df)
    outliers = ccf(reg)["outliers"]
    p = plot(xlabel = xlabel, 
             ylabel = ylabel)
    scatter!(df[findall(x -> x ∉ outliers, 1:size(df)[1]), :x], 
             df[findall(x -> x ∉ outliers, 1:size(df)[1]), :y], 
             label = false)
    #=
    scatter!(df[findall(x -> x ∈ outliers, 1:size(df)[1]), :x], 
             df[findall(x -> x ∈ outliers, 1:size(df)[1]), :y], 
             color=:red, label = "outliers")
    =#
    return p
end

corroutliers (generic function with 1 method)

Show correlations

In [12]:
peak_load = []
for l in eachrow(data[:,:,2])
    append!(peak_load, maximum(l))
end

p1 = corroutliers(DataFrame(y=params[:, 1], x=peak_load), "peak load", "Ec")
p2 = corroutliers(DataFrame(y=params[:, 2], x=peak_load), "peak load", "Fc")
p3 = corroutliers(DataFrame(y=params[:, 3], x=peak_load), "peak load", "Ft")
p4 = corroutliers(DataFrame(y=params[:, 4], x=peak_load), "peak load", "Gf")

p = plot(p1, p2, p3, p4, layout=(2,2))#, xticks=(1, ""), yticks=(1, ""))
#p[:plot_title] = "Params by peak load"
plot(p)

"/Users/drvojtex/Library/Mobile Documents/com~apple~CloudDocs/Codes/Maen/examples/fsv/peak_loads.pdf"

In [11]:
peak_deflection = []
for (l, d) in zip(eachrow(data[:,:,2]), eachrow(data[:,:,1]))
    append!(peak_deflection, d[argmax(l)])
end

p1 = corroutliers(DataFrame(y=params[:, 1], x=peak_deflection), "peak deflection", "Ec")
p2 = corroutliers(DataFrame(y=params[:, 2], x=peak_deflection), "peak deflection", "Fc")
p3 = corroutliers(DataFrame(y=params[:, 3], x=peak_deflection), "peak deflection", "Ft")
p4 = corroutliers(DataFrame(y=params[:, 4], x=peak_deflection), "peak deflection", "Gf")

p = plot(p1, p2, p3, p4, layout=(2,2))#, xticks=(1, ""), yticks=(1, ""))
#p[:plot_title] = "Params by peak deflection"
plot(p)

"/Users/drvojtex/Library/Mobile Documents/com~apple~CloudDocs/Codes/Maen/examples/fsv/peak_deflection.pdf"