# Prerequisites

Precompile packages and include project source files

# Dimensionality reduction

### Init

Sample encoded database for better performance

In [None]:
dataSize = 3000
indexes = sample(1:size(encodedDB, 2), dataSize, replace=false)
data = getCol(encodedDB, indexes)
outDim = 2

Use the whole database

In [None]:
indexes = 1:size(encodedDB, 2)
data = encodedDB
outdim = 2

### PCA

In [None]:
DimRedMethod = "PCA"
model = fit(PCA, data; maxoutdim=outDim)
pred = MultivariateStats.transform(model, data)

### T-sne

In [None]:
DimRedMethod = "T-sne"
reduce_dims = 0
max_iter = 3000
perplexity = 100.0
pred = permutedims(tsne(distances[indexes, indexes], distance=true, outDim, reduce_dims, max_iter, perplexity))

# Clustering

The closer the silhouette value is to 1.0 the better the clustering is

### Init

In [None]:
cluster_count=8

### Based on first first in preference

In [None]:
ClustMethod = "Party"
labels = database[1, indexes]
clusters = clusterize(labels, candidates, parties)
mean(silhouettes(labels, distances[indexes, indexes]))

### K-means

In [None]:
ClustMethod = "K-means"
KmeansRes = kmeans(data, cluster_count; maxiter=200)
labels = KmeansRes.assignments
clusters = clusterize(labels)
mean(silhouettes(labels, distances[indexes, indexes]))

### Gaussian mixtures

In [None]:
ClustMethod = "GM"
data_T = permutedims(data)
gm = GaussianMixture(n_components=cluster_count).fit(data_T)
labels = gm.predict(data_T) .+ 1
clusters = clusterize(labels)
mean(silhouettes(labels, distances[indexes, indexes]))

## Clustering Validation

In [None]:
mean(silhouettes(labels, distances[indexes, indexes]))

## Clustering visualization

Save template clusters for later to match cluster colours based on it

In [None]:
template = clusters

In [None]:
unify_labels!(template, clusters)

In [None]:
visualize(pred, clusters, DimRedMethod, ClustMethod, output=true)

### Creating graph out of clustered database

In [None]:
@time G = createClusteredMetaGraph(g, clusters, labels)

## Drawing clustered graph

In [None]:
@time drawClusteredMetaGraph(G)

## Experiment

In [3]:
using Opinion_diffusion

LoadError: ArgumentError: Package Opinion_diffusion not found in current path:
- Run `import Pkg; Pkg.add("Opinion_diffusion")` to install the Opinion_diffusion package.


In [4]:
import("src/OpinionDiffusion.jl")

LoadError: syntax: invalid "import" statement: expected identifier

In [None]:
@sk_import mixture : GaussianMixture
Base.show(io::IO, f::Float64) = @printf(io, "%1.4f", f)

Parse input data

In [None]:
input_filename = "ED-00001-00000002.toc"
@time parties, candidates, election = parse_data2(input_filename)

In [None]:
model_config = Dict(
    "weight_func" => Dict(
        "type" => "power",
        "power" => 2
    ),
    "dist_metric" => "L1",
    "edge_init_func" => Dict(
        "type" => "exp",
        "base" => 1/2,
        "offset" => -6.28
    )
)

In [None]:
model = OpinionDiffusion.Spearman_model(election, length(candidates), model_config)

In [None]:
model = load("logs/2021-06-15_01-03-54/model.jld2")

In [None]:
exp_config = Dict(
    "diffusions" => 5,
    "checkpoint" => 1,
    "sample_size" => 3000,
    "reduce_dim_config" => Dict(
        "used" => true,
        "method" => "PCA",
        "PCA" => Dict(
            "out_dim" => 2
        ),
        "tsne" => Dict(
            "out_dim" => 2,
            "reduce_dims" => 0,
            "max_iter" => 3000,
            "perplexity" => 100.0
        )
    ),
    "clustering_config" => Dict(
        "used" => true,
        "method" => "Party",
        "K-means" => Dict(
            "cluster_count" => 8
        ),
        "GM" => Dict(
            "cluster_count" => 8
        )
    )
)

In [None]:
abstract type B end
struct A <: B
    x::Int64
end
function suc(b::Vector{A})
    println(b[1].x + 2)
end
suc([A(42), A(69)])

In [None]:
experiment = OpinionDiffusion.Experiment(model, parties, candidates, exp_config)

In [None]:
diffusion_config = Dict(
        "vertex_diff_config" => Dict(
            "evolve_vertices" => 1000,
            "method" => "averageOne"
        ),
        "edge_diff_config" => Dict(
            "evolve_edges" => 1000,
            "dist_metric" => "L1",
            "edge_diff_func" => Dict(
                "type" => "exp",
                "base" => 1/2
            )
        )
    )

In [None]:
run_experiment!()

In [None]:
visualizeStatistics(experiment)

In [None]:
visualizeElections(experiment)

### Evaluate clustering

In [None]:
dist_metric = Euclidean()
@time distances = pairwise(distMetric, [voter.opinion for voter in experiment.sampled_voters], dims=2)

In [None]:
labels = [voter.label for voter in experiment.sampled_voters]
mean(silhouettes(labels, distances))

### Create a new log

In [None]:
logdir = "logs/" * Dates.format(now(), "yyyy-mm-dd_HH-MM-SS")
expCounter = 1
mkpath(logdir)

#### Configure init variables

In [None]:
initConfig = Dict(
    "inputFileName" => "ED-00001-00000002.soi",
    "weightFunc" => Dict(
        "type" => "power",
        "power" => 2
    ),
    "distMetric" => "L1",
    "edgeInitFunc" => Dict(
        "type" => "exp",
        "base" => 1/2,
        "offset" => -6.28
    )
)
YAML.write_file("$(logdir)/initConfig.yml", initConfig)

In [None]:
initConfig = YAML.load_file("$(logdir)/initConfig.yml")

### Init

init database

In [None]:
@time parties, candidates, election = initDB(initConfig["inputFileName"])

encode database

In [None]:
weightFunc = parseFunction(initConfig["weightFunc"])
weights = map(weightFunc, 1.0:length(candidates))
weights = translateRange(minimum(weights), maximum(weights), 0.0, 1.0, weights)

In [None]:
weightFunc = parseFunction(initConfig["weightFunc"])
weights = map(weightFunc, 1:length(candidates))
weights = weights / sum(weights)

In [None]:
@time opinions = infer_opinions(election, weights)

calculate distance matrix

In [None]:
distMetric = parseMetric(initConfig["distMetric"])
@time distances = pairwise(distMetric, opinions, dims=2)

init graph

In [None]:
#edgeInitFunc = parseFunction(initConfig["edgeInitFunc"]) 10x slower
edgeInitFunc = x->(1/2)^(x + 5.14)
distMetric = parseMetric(initConfig["distMetric"])
edges = generate_edges(opinions, distMetric, edgeInitFunc)
@time g = initGraph(size(opinions, 2), edges)

Log initial state

In [None]:
logger(g, database, logdir, 0)

Alternatively you can load logged state

In [None]:
x, y, z = jldopen("example.jld2", "r") do file
    file["x"], file["y"], file["z"], file["a"]
end

In [None]:
print(x,y,z)

In [None]:
#WIP Load state
g, election, opinions, stats = loadLog("$(logdir)")
push!(stats, stat)

## Create a new experiment

In [None]:
expDir = "$(logdir)/experiment_$(expCounter)"
mkpath(expDir)
expCounter += 1

stats = Vector{Statistics}()
stat = Statistics(g, getElectionResult(election))
push!(stats, stat)

counter = 1

#### Configure experiment variables

In [None]:
expConfig = Dict(
    "diffusionConfig" => Dict(
        "diffusions" => 5,
        "vertexDiffConfig" => Dict(
            "evolveVertices" => 1000,
            "method" => "averageOne"
        ),
        "edgeDiffConfig" => Dict(
            "evolveEdges" => 1000,
            "distMetric" => "L1",
            "edgeDiffFunc" => Dict(
                "type" => "exp",
                "base" => 1/2
            )
        )
    ),
    "reduceDimConfig" => Dict(
        "used" => true,
        "dataSize" => 3000,
        "method" => "PCA",
        "PCA" => Dict(
            "outDim" => 2
        ),
        "tsne" => Dict(
            "outDim" => 2,
            "reduce_dims" => 0,
            "max_iter" => 3000,
            "perplexity" => 100.0
        )
    ),
    "clusteringConfig" => Dict(
        "used" => true,
        "method" => "Party",
        "K-means" => Dict(
            "clusterCount" => 8
        ),
        "GM" => Dict(
            "clusterCount" => 8
        )
    )
)
YAML.write_file("$(expDir)/expConfig.yml", initConfig)

sampling for visualizations

In [None]:
indexes = Nothing
sampled_voters = Nothing
sampled_opinions = Nothing
if expConfig["reduceDimConfig"]["used"]
    mkpath(expDir * "/images/voters")
    indexes = sample(1:size(opinions, 2), expConfig["reduceDimConfig"]["dataSize"], replace=false)
    sampled_voters = getCol(election, indexes)
    sampled_opinions = getCol(opinions, indexes)
end

#### Run the experiment

In [None]:
diffusionConfig = expConfig["diffusionConfig"]
if expConfig["reduceDimConfig"]["used"] && counter == 1
    visualizeVoters(sampled_opinions, sampled_voters, candidates, parties, expConfig, expDir * "/images", 0)
end

for i in 1:diffusionConfig["diffusions"]
    if initConfig["encoding"] == "spearmann"
        diffusion!(g, encodedDB, diffusionConfig)
    else
        diffusion!(g, database, encodedDB, diffusionConfig)
    end
    
    stat = Statistics(g, getElectionResult(database))
    push!(stats, stat)
    
    logger(g, database, expDir, counter)
    
    if expConfig["reduceDimConfig"]["used"]
        visualizeVoters(sampled_opinions, sampled_voters, candidates, parties, expConfig, expDir * "/images", counter)      
    end
    counter += 1
end

In [None]:
visualizeStatistics(database, stats::Vector{Statistics}, expDir * "/images")

In [None]:
visualizeElections(candidates, parties, stats::Vector{Statistics}, expDir * "/images")

## Aditional analysis

In [None]:
stats