# Prerequisites

Precompile packages and include project source files

# Dimensionality reduction

### Init

Sample encoded database for better performance

In [None]:
dataSize = 3000
indexes = sample(1:size(encodedDB, 2), dataSize, replace=false)
data = getCol(encodedDB, indexes)
outDim = 2

Use the whole database

In [None]:
indexes = 1:size(encodedDB, 2)
data = encodedDB
outdim = 2

### PCA

In [None]:
DimRedMethod = "PCA"
model = fit(PCA, data; maxoutdim=outDim)
pred = MultivariateStats.transform(model, data)

### T-sne

In [None]:
DimRedMethod = "T-sne"
reduce_dims = 0
max_iter = 3000
perplexity = 100.0
pred = permutedims(tsne(distances[indexes, indexes], distance=true, outDim, reduce_dims, max_iter, perplexity))

# Clustering

The closer the silhouette value is to 1.0 the better the clustering is

### Init

In [None]:
cluster_count=8

### Based on first first in preference

In [None]:
ClustMethod = "Party"
labels = database[1, indexes]
clusters = clusterize(labels, candidates, parties)
mean(silhouettes(labels, distances[indexes, indexes]))

### K-means

In [None]:
ClustMethod = "K-means"
KmeansRes = kmeans(data, cluster_count; maxiter=200)
labels = KmeansRes.assignments
clusters = clusterize(labels)
mean(silhouettes(labels, distances[indexes, indexes]))

### Gaussian mixtures

In [None]:
ClustMethod = "GM"
data_T = permutedims(data)
gm = GaussianMixture(n_components=cluster_count).fit(data_T)
labels = gm.predict(data_T) .+ 1
clusters = clusterize(labels)
mean(silhouettes(labels, distances[indexes, indexes]))

## Clustering Validation

In [None]:
mean(silhouettes(labels, distances[indexes, indexes]))

## Clustering visualization

Save template clusters for later to match cluster colours based on it

In [None]:
template = clusters

In [None]:
unify_labels!(template, clusters)

In [None]:
visualize(pred, clusters, DimRedMethod, ClustMethod, output=true)

### Creating graph out of clustered database

In [None]:
@time G = createClusteredMetaGraph(g, clusters, labels)

## Drawing clustered graph

In [None]:
@time drawClusteredMetaGraph(G)

## Experiment

In [1]:
using Revise

In [2]:
using OpinionDiffusion

┌ Info: Precompiling OpinionDiffusion [8a824c69-2f4a-47fa-94c7-8095e16cc636]
└ @ Base loading.jl:1317
  ** incremental compilation may be fatally broken for this module **



In [None]:
@sk_import mixture : GaussianMixture
Base.show(io::IO, f::Float64) = @printf(io, "%1.4f", f)

Parse input data

In [3]:
input_filename = "ED-00001-00000002.toc"
@time parties, candidates, election = parse_data2(input_filename)

  0.298200 seconds (568.53 k allocations: 55.977 MiB, 60.74% gc time, 4.45% compilation time)


(["G.P.", "Lab", "F.F.", "S.P.", "S.F.", "P.D.", "Csp", "F.G."], OpinionDiffusion.Candidate[OpinionDiffusion.Candidate("Robert Bonnie", 1), OpinionDiffusion.Candidate("Joan Burton", 2), OpinionDiffusion.Candidate("Deirdre Doherty Ryan", 3), OpinionDiffusion.Candidate("Joe Higgins", 4), OpinionDiffusion.Candidate("Brian Lenihan", 3), OpinionDiffusion.Candidate("Mary Lou Mc Donald", 5), OpinionDiffusion.Candidate("Tom Morrissey", 6), OpinionDiffusion.Candidate("John Thomas Smyth C.C.", 7), OpinionDiffusion.Candidate("Sheila Terry", 8)], [[[5], [3], [7], [1, 2, 4, 6, 8, 9]], [[5], [3], [7], [1, 2, 4, 6, 8, 9]], [[5], [3], [7], [1, 2, 4, 6, 8, 9]], [[5], [3], [7], [1, 2, 4, 6, 8, 9]], [[5], [3], [7], [1, 2, 4, 6, 8, 9]], [[5], [3], [7], [1, 2, 4, 6, 8, 9]], [[5], [3], [7], [1, 2, 4, 6, 8, 9]], [[5], [3], [7], [1, 2, 4, 6, 8, 9]], [[5], [3], [7], [1, 2, 4, 6, 8, 9]], [[5], [3], [7], [1, 2, 4, 6, 8, 9]]  …  [[7], [5], [4], [3], [2], [1], [9], [8], [6]], [[4], [2], [7], [9], [3], [5], [1, 6, 

In [36]:
model_config = Dict(
    "weight_func" => Dict(
        "type" => "power",
        "power" => 2
    ),
    "dist_metric" => "L1",
    "edge_init_func" => Dict(
        "type" => "exp",
        "base" => 1/2,
        "offset" => -6.28
    )
)

Dict{String, Any} with 3 entries:
  "weight_func"    => Dict{String, Any}("power"=>2, "type"=>"power")
  "dist_metric"    => "L1"
  "edge_init_func" => Dict{String, Any}("base"=>0.5, "offset"=>-6.28, "type"=>"…

In [37]:
model = Spearman_model(election, length(candidates), model_config)

Initializing voters:
  0.169026 seconds (68.93 k allocations: 7.620 MiB, 91.81% compilation time)
Initializing edges:
 43.188127 seconds (15 allocations: 1.001 MiB)
Initializing graph:
23350
29988
  0.052825 seconds (64.51 k allocations: 5.057 MiB, 86.38% compilation time)
Initializing logging
  3.738584 seconds (5.63 M allocations: 308.532 MiB, 1.00% gc time, 6.19% compilation time)


Spearman_model(OpinionDiffusion.Spearman_voter[OpinionDiffusion.Spearman_voter(1, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.37447141106860493, 0.5079226162019765), OpinionDiffusion.Spearman_voter(2, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.556027797060845, 0.43389700471396647), OpinionDiffusion.Spearman_voter(3, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.3342854251199012, 0.5075700203042157), OpinionDiffusion.Spearman_voter(4, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.49357803060544597, 0.3955672413304974), OpinionDiffusion.Spearman_voter(5, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.5282198436306692, 0.5301129227689705), OpinionDiffusion.Spearman_voter(6, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.5206773887244525, 0.4182733644879518), OpinionDiffusion.Spearman_voter(7, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.5793319172059563, 0.4510450292813902), OpinionDiffusion.Spearman_voter(8, [45.0, 45.0, 

In [6]:
model = OpinionDiffusion.load("logs/2021-06-15_10-46-57/model.jld2", "model")

Spearman_model(OpinionDiffusion.Spearman_voter[OpinionDiffusion.Spearman_voter(1, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.39557550249521234, 0.49983517683269973), OpinionDiffusion.Spearman_voter(2, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.514964532814092, 0.4239904796224007), OpinionDiffusion.Spearman_voter(3, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.6720151959600401, 0.44162497206844753), OpinionDiffusion.Spearman_voter(4, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.5257990006411783, 0.549032723721798), OpinionDiffusion.Spearman_voter(5, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.5218320634747935, 0.5414160993101209), OpinionDiffusion.Spearman_voter(6, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.6055845598713583, 0.45168130778516713), OpinionDiffusion.Spearman_voter(7, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.33415930329498794, 0.5989808093499354), OpinionDiffusion.Spearman_voter(8, [45.0, 45.0,

In [39]:
exp_config = Dict(
    "sample_size" => 3000,
    "reduce_dim_config" => Dict(
        "used" => true,
        "method" => "PCA",
        "PCA" => Dict(
            "out_dim" => 2
        ),
        "tsne" => Dict(
            "out_dim" => 2,
            "reduce_dims" => 0,
            "max_iter" => 3000,
            "perplexity" => 100.0
        )
    ),
    "clustering_config" => Dict(
        "used" => true,
        "method" => "Party",
        "K-means" => Dict(
            "cluster_count" => 8
        ),
        "GM" => Dict(
            "cluster_count" => 8
        )
    )
)

Dict{String, Any} with 3 entries:
  "reduce_dim_config" => Dict{String, Any}("method"=>"PCA", "used"=>true, "tsne…
  "sample_size"       => 3000
  "clustering_config" => Dict{String, Any}("method"=>"Party", "used"=>true, "GM…

In [41]:
experiment = Experiment(model, parties, candidates, exp_config)

Experiment(Spearman_model(OpinionDiffusion.Spearman_voter[OpinionDiffusion.Spearman_voter(1, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.37447141106860493, 0.5079226162019765), OpinionDiffusion.Spearman_voter(2, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.556027797060845, 0.43389700471396647), OpinionDiffusion.Spearman_voter(3, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.3342854251199012, 0.5075700203042157), OpinionDiffusion.Spearman_voter(4, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.49357803060544597, 0.3955672413304974), OpinionDiffusion.Spearman_voter(5, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.5282198436306692, 0.5301129227689705), OpinionDiffusion.Spearman_voter(6, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.5206773887244525, 0.4182733644879518), OpinionDiffusion.Spearman_voter(7, [45.0, 45.0, 4.0, 45.0, 1.0, 45.0, 9.0, 45.0, 45.0], 0.5793319172059563, 0.4510450292813902), OpinionDiffusion.Spearman_voter(8, [4

In [14]:
show(experiment.diffusion_metrics)

OpinionDiffusion.Spearman_metrics([0], [2.21656369415378], [31], [[2.494331065759637, 12.705082032813126, 7.669734560490863, 21.481926103774843, 26.964118980925704, 8.016539949313058, 7.903161264505802, 0.44684540482859814, 12.318260637588368]], [[9.291123856950186, 13.08759614957094, 11.391176841106814, 13.325700650630623, 14.079150178589956, 9.43483875031494, 11.373669838305693, 6.282929838602108, 11.733813895928742]], [[1.0, 6.0, 4.0, 7.0, 8.0, 2.0, 3.0, 0.0, 5.0]])

In [42]:
diffusion_config = Dict(
        "diffusions" => 5,
        "checkpoint" => 1,
        "voter_diff_config" => Dict(
            "evolve_vertices" => 1000,
            "method" => "averageAll"
        ),
        "edge_diff_config" => Dict(
            "evolve_edges" => 1000,
            "dist_metric" => "L1",
            "edge_diff_func" => Dict(
                "type" => "exp",
                "base" => 1/2
            )
        )
    )

Dict{String, Any} with 4 entries:
  "edge_diff_config"  => Dict{String, Any}("dist_metric"=>"L1", "evolve_edges"=…
  "voter_diff_config" => Dict{String, Any}("method"=>"averageAll", "evolve_vert…
  "diffusions"        => 5
  "checkpoint"        => 1

In [34]:
OpinionDiffusion.nv(model.social_network)
length(model.voters)

29988

In [54]:
diffusion_metrics = run_experiment!(experiment, diffusion_config)

OpinionDiffusion.Spearman_metrics([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2.070828331332533, 2.0711617980525543, 2.0712284913965586, 2.0712284913965586, 2.071295184740563, 2.0714285714285716, 2.071628651460584, 2.071762038148593, 2.0718954248366015, 2.072095504868614, 2.0721621982126184, 2.072295584900627, 2.0723622782446314, 2.0724956649326396, 2.0726290516206483, 2.072762438308657, 2.0728291316526612, 2.072895824996665, 2.073295985060691], [33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33], [[2.494331065759637, 12.705082032813126, 7.669734560490863, 21.481926103774843, 26.964118980925704, 8.016539949313058, 7.903161264505802, 0.44684540482859814, 12.318260637588368], [2.494331065759637, 12.705082032813126, 7.669734560490863, 21.481926103774843, 26.964118980925704, 8.016539949313058, 7.903161264505802, 0.44684540482859814, 12.318260637588368], [2.494331065759637, 12.705082032813126, 7.669734560490863, 21.481926103774843, 26.964118980925704

In [107]:
reshape(names, 1, length(names))

1×9 Matrix{String}:
 "Robert Bonnie-G.P."  "Joan Burton-Lab"  …  "Sheila Terry-F.G."

In [106]:
names = [candidate.name * "-" * parties[candidate.party] for candidate in candidates]

9-element Vector{String}:
 "Robert Bonnie-G.P."
 "Joan Burton-Lab"
 "Deirdre Doherty Ryan-F.F."
 "Joe Higgins-S.P."
 "Brian Lenihan-F.F."
 "Mary Lou Mc Donald-S.F."
 "Tom Morrissey-P.D."
 "John Thomas Smyth C.C.-Csp"
 "Sheila Terry-F.G."

In [108]:
OpinionDiffusion.visualize_metrics(experiment)

### Evaluate clustering

In [None]:
dist_metric = Euclidean()
@time distances = pairwise(distMetric, [voter.opinion for voter in experiment.sampled_voters], dims=2)

In [None]:
labels = [voter.label for voter in experiment.sampled_voters]
mean(silhouettes(labels, distances))

### Create a new log

In [None]:
logdir = "logs/" * Dates.format(now(), "yyyy-mm-dd_HH-MM-SS")
expCounter = 1
mkpath(logdir)

#### Configure init variables

In [None]:
initConfig = Dict(
    "inputFileName" => "ED-00001-00000002.soi",
    "weightFunc" => Dict(
        "type" => "power",
        "power" => 2
    ),
    "distMetric" => "L1",
    "edgeInitFunc" => Dict(
        "type" => "exp",
        "base" => 1/2,
        "offset" => -6.28
    )
)
YAML.write_file("$(logdir)/initConfig.yml", initConfig)

In [None]:
initConfig = YAML.load_file("$(logdir)/initConfig.yml")

### Init

init database

In [None]:
@time parties, candidates, election = initDB(initConfig["inputFileName"])

encode database

In [None]:
weightFunc = parseFunction(initConfig["weightFunc"])
weights = map(weightFunc, 1.0:length(candidates))
weights = translateRange(minimum(weights), maximum(weights), 0.0, 1.0, weights)

In [None]:
weightFunc = parseFunction(initConfig["weightFunc"])
weights = map(weightFunc, 1:length(candidates))
weights = weights / sum(weights)

In [None]:
@time opinions = infer_opinions(election, weights)

calculate distance matrix

In [None]:
distMetric = parseMetric(initConfig["distMetric"])
@time distances = pairwise(distMetric, opinions, dims=2)

init graph

In [None]:
#edgeInitFunc = parseFunction(initConfig["edgeInitFunc"]) 10x slower
edgeInitFunc = x->(1/2)^(x + 5.14)
distMetric = parseMetric(initConfig["distMetric"])
edges = generate_edges(opinions, distMetric, edgeInitFunc)
@time g = initGraph(size(opinions, 2), edges)

Log initial state

In [None]:
logger(g, database, logdir, 0)

Alternatively you can load logged state

In [None]:
x, y, z = jldopen("example.jld2", "r") do file
    file["x"], file["y"], file["z"], file["a"]
end

In [None]:
print(x,y,z)

In [None]:
#WIP Load state
g, election, opinions, stats = loadLog("$(logdir)")
push!(stats, stat)

## Create a new experiment

In [None]:
expDir = "$(logdir)/experiment_$(expCounter)"
mkpath(expDir)
expCounter += 1

stats = Vector{Statistics}()
stat = Statistics(g, getElectionResult(election))
push!(stats, stat)

counter = 1

#### Configure experiment variables

In [None]:
expConfig = Dict(
    "diffusionConfig" => Dict(
        "diffusions" => 5,
        "vertexDiffConfig" => Dict(
            "evolveVertices" => 1000,
            "method" => "averageOne"
        ),
        "edgeDiffConfig" => Dict(
            "evolveEdges" => 1000,
            "distMetric" => "L1",
            "edgeDiffFunc" => Dict(
                "type" => "exp",
                "base" => 1/2
            )
        )
    ),
    "reduceDimConfig" => Dict(
        "used" => true,
        "dataSize" => 3000,
        "method" => "PCA",
        "PCA" => Dict(
            "outDim" => 2
        ),
        "tsne" => Dict(
            "outDim" => 2,
            "reduce_dims" => 0,
            "max_iter" => 3000,
            "perplexity" => 100.0
        )
    ),
    "clusteringConfig" => Dict(
        "used" => true,
        "method" => "Party",
        "K-means" => Dict(
            "clusterCount" => 8
        ),
        "GM" => Dict(
            "clusterCount" => 8
        )
    )
)
YAML.write_file("$(expDir)/expConfig.yml", initConfig)

sampling for visualizations

In [None]:
indexes = Nothing
sampled_voters = Nothing
sampled_opinions = Nothing
if expConfig["reduceDimConfig"]["used"]
    mkpath(expDir * "/images/voters")
    indexes = sample(1:size(opinions, 2), expConfig["reduceDimConfig"]["dataSize"], replace=false)
    sampled_voters = getCol(election, indexes)
    sampled_opinions = getCol(opinions, indexes)
end

#### Run the experiment

In [None]:
diffusionConfig = expConfig["diffusionConfig"]
if expConfig["reduceDimConfig"]["used"] && counter == 1
    visualizeVoters(sampled_opinions, sampled_voters, candidates, parties, expConfig, expDir * "/images", 0)
end

for i in 1:diffusionConfig["diffusions"]
    if initConfig["encoding"] == "spearmann"
        diffusion!(g, encodedDB, diffusionConfig)
    else
        diffusion!(g, database, encodedDB, diffusionConfig)
    end
    
    stat = Statistics(g, getElectionResult(database))
    push!(stats, stat)
    
    logger(g, database, expDir, counter)
    
    if expConfig["reduceDimConfig"]["used"]
        visualizeVoters(sampled_opinions, sampled_voters, candidates, parties, expConfig, expDir * "/images", counter)      
    end
    counter += 1
end

In [None]:
visualizeStatistics(database, stats::Vector{Statistics}, expDir * "/images")

In [None]:
visualizeElections(candidates, parties, stats::Vector{Statistics}, expDir * "/images")

## Aditional analysis

In [None]:
stats