### Init

Sample encoded database for better performance

In [None]:
dataSize = 3000
indexes = sample(1:size(encodedDB, 2), dataSize, replace=false)
data = getCol(encodedDB, indexes)
outDim = 2

Use the whole database

In [None]:
indexes = 1:size(encodedDB, 2)
data = encodedDB
outdim = 2

### PCA

In [None]:
DimRedMethod = "PCA"
model = fit(PCA, data; maxoutdim=outDim)
pred = MultivariateStats.transform(model, data)

### T-sne

In [None]:
DimRedMethod = "T-sne"
reduce_dims = 0
max_iter = 3000
perplexity = 100.0
pred = permutedims(tsne(distances[indexes, indexes], distance=true, outDim, reduce_dims, max_iter, perplexity))

# Clustering

The closer the silhouette value is to 1.0 the better the clustering is

### Init

In [None]:
cluster_count=8

### Based on first first in preference

In [None]:
ClustMethod = "Party"
labels = database[1, indexes]
clusters = clusterize(labels, candidates, parties)
mean(silhouettes(labels, distances[indexes, indexes]))

### K-means

In [None]:
ClustMethod = "K-means"
KmeansRes = kmeans(data, cluster_count; maxiter=200)
labels = KmeansRes.assignments
clusters = clusterize(labels)
mean(silhouettes(labels, distances[indexes, indexes]))

### Gaussian mixtures

In [None]:
ClustMethod = "GM"
data_T = permutedims(data)
gm = GaussianMixture(n_components=cluster_count).fit(data_T)
labels = gm.predict(data_T) .+ 1
clusters = clusterize(labels)
mean(silhouettes(labels, distances[indexes, indexes]))

## Clustering Validation

In [None]:
mean(silhouettes(labels, distances[indexes, indexes]))

## Clustering visualization

Save template clusters for later to match cluster colours based on it

In [None]:
template = clusters

In [None]:
unify_labels!(template, clusters)

In [None]:
visualize(pred, clusters, DimRedMethod, ClustMethod, output=true)

### Creating graph out of clustered database

In [None]:
@time G = createClusteredMetaGraph(g, clusters, labels)

## Drawing clustered graph

In [None]:
@time drawClusteredMetaGraph(G)

# Main ________________________________

In [None]:
using Revise

In [None]:
using OpinionDiffusion

In [None]:
@sk_import mixture : GaussianMixture
Base.show(io::IO, f::Float64) = @printf(io, "%1.4f", f)

In [None]:
OpinionDiffusion.Plots.PlotlyBackend()

Parse input data

In [None]:
input_filename = "ED-00001-00000002.toc"
@time parties, candidates, election = parse_data2(input_filename)

In [None]:
model_config = Dict(
    "weight_func" => Dict(
        "type" => "exp",
        "base" => 1/2
    ),
    "dist_metric" => "L1",
    "edge_init_func" => Dict(
        "type" => "exp",
        "base" => 1/2,
        "offset" => -6.28
    )
)

In [None]:
model = Spearman_model(election, length(candidates), model_config)

In [None]:
model.log_dir

In [None]:
model = OpinionDiffusion.load("logs/2021-06-24_15-40-59/model.jld2", "model")

In [None]:
exp_config = Dict(
    "sample_size" => 3000,
    "voter_visualization_config" => Dict(
        "used" => true,
        "reduce_dim_config" => Dict(
            "used" => true,
            "method" => "PCA",
            "PCA" => Dict(
                "out_dim" => 2
            ),
            "tsne" => Dict(
                "out_dim" => 2,
                "reduce_dims" => 0,
                "max_iter" => 3000,
                "perplexity" => 100.0
            )
        ),
        "clustering_config" => Dict(
            "used" => true,
            "method" => "Party",
            "K-means" => Dict(
                "cluster_count" => 8
            ),
            "GM" => Dict(
                "cluster_count" => 8
            )
        )
    )
)

In [1]:
using JLD2

In [2]:
methods(jldsave)

LoadError: UndefVarError: jldsave not defined

In [None]:
experiment = Experiment(model, candidates, parties, OpinionDiffusion.Plots.PlotlyBackend, exp_config)

In [None]:
diffusion_config = Dict(
        "diffusions" => 5,
        "checkpoint" => 1,
        "voter_diff_config" => Dict(
            "evolve_vertices" => 100000,
            "method" => "averageAll"
        ),
        "edge_diff_config" => Dict(
            "evolve_edges" => 100000,
            "dist_metric" => "L1",
            "edge_diff_func" => Dict(
                "type" => "exp",
                "base" => 1/2
            )
        )
    )

In [None]:
diffusion_metrics = run_experiment!(experiment, candidates, parties, diffusion_config)

In [None]:
OpinionDiffusion.visualize_metrics(experiment, candidates, parties)

In [None]:
using Reexport
using Interact

In [None]:

px=widget(0:0.01:.3, label="px")
hbox(px)

In [None]:
OpinionDiffusion.plot(experiment.visualizations.voter_visualizations[step], experiment.visualizations.degree_distributions[step], layout = (2, 1), size = (980,1200))

In [None]:
dist_metric = Euclidean()
@time distances = pairwise(distMetric, [voter.opinion for voter in experiment.sampled_voters], dims=2)

In [None]:
labels = [voter.label for voter in experiment.sampled_voters]
mean(silhouettes(labels, distances))

In [None]:
logdir = "logs/" * Dates.format(now(), "yyyy-mm-dd_HH-MM-SS")
expCounter = 1
mkpath(logdir)

In [None]:
weightFunc = parseFunction(initConfig["weightFunc"])
weights = map(weightFunc, 1.0:length(candidates))
weights = translateRange(minimum(weights), maximum(weights), 0.0, 1.0, weights)

In [None]:
distMetric = parseMetric(initConfig["distMetric"])
@time distances = pairwise(distMetric, opinions, dims=2)