In [None]:
from const import globs

D, V, M, k, gamma = globs('l')

In [None]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

From now on let's use the following simulated data for further testing

In [None]:
from simulator import Simulator
from samplers import MC_sample_Z, MC_sample_B, MC_sample_H, MC_sample_GK
from transformation_functions import update_Theta, update_Sigma, update_E, update_C
from generator_functions import sample_Z_from_W
from graph_loss import permutation_topics, graph_loss

# 3. MAIN CYCLE TESTS

### Generating Target Data

These data will be used as input (matrix Z and W) and target (matrix G) for our simulation. 

In [None]:
simulated_data = Simulator(D, V, M, k, gamma, seed=1888)
simulated_data.generate_all_data()

In [None]:
# Input Data:
simulated_data.W

In [None]:
# Target graph
plt.matshow(simulated_data.G, cmap='Blues')

### Generating Initial Data

These are used as our initial guess for Sigma, K, B, Theta and G

In [None]:
# # Initial guesses
initial = Simulator(D, V, M, k, gamma, 2020)
initial.sample_GK()
initial.sample_B()
initial.sample_H()

In [None]:
# Initial graph
plt.matshow(initial.G, cmap='Blues')

## 3.1 Test graph loss

We use the true versions of the data that isn't related to B, G

In [None]:
%%time
max_iterations = 4000
np.random.seed(25041945)

# Initialization
Sigma = initial.Sigma.copy()
K = np.linalg.inv(Sigma)
B = initial.B.copy()
Theta = initial.Theta.copy()
G = initial.G.copy()

alpha = np.ones(V)  # Uninformative prior
b = k - 1

# Skipping part
Z = simulated_data.Z.copy()
E = simulated_data.E.copy()
C = simulated_data.C.copy()

H = simulated_data.H.copy()
Theta = update_Theta(Theta, H)  # get Theta from H

sampled_Bs = []
permutations = []
waiting_times = []
sampled_Gs = []
graph_losses = []

for iteration in range(max_iterations):
    
    # Step 1
    #Z, E, C = MC_sample_Z(Z, simulated_data.W, Theta, B, E, C, debug=True) 
    
    # Step 2
    B = MC_sample_B(alpha, C)
    sampled_Bs.append(B)
    
    permute = permutation_topics(B_true=simulated_data.B, B_sampled=B)
    permutations.append(permute)
    
    # Step 3
    #H = MC_sample_H(E, Sigma, H_current=H, burn_in=10)
    
    # Step 4    
    G_old = G.copy()
    wt_sample, G_new, K = MC_sample_GK(G, H, b, debug=False)
    
    G = G_new
    Sigma = np.linalg.inv(K)
    
    waiting_times.append(wt_sample)
    sampled_Gs.append(G.copy())
    
    graph_loss = graph_loss(G_true=simulated_data.G, G_sampled=G, permutation=permute)
    graph_losses.append(graph_loss)
    
    if iteration % 100 == 0:  # Plot every 20 iterations
        print('Finished iteration '+str(iteration))
        #plt.matshow(G, cmap='Blues')
        #plt.suptitle("Graph of iteration " + str(iteration), x=0.5, y=1, ha='center')
        #plt.show()

In [None]:
graph_loss = pd.Series(graph_losses)

print("Graph loss metrics")
print(graph_loss.describe())

graph_loss.hist(bins=30)
plt.suptitle('G graph loss histogram', x=0.5, y=1, ha='center')
plt.show()

graph_loss.plot(title="Graph loss series")
plt.show()

wait_times = pd.Series(waiting_times)
wait_times.plot(title="Waiting times series")
plt.show()

In [None]:
import seaborn as sn
fig, ax = plt.subplots(1,1)
sn.heatmap(permutations, cmap='Blues',ax=ax[0])

In [None]:
plt.scatter(wait_times, graph_loss)
plt.xlabel('Waiting times')
plt.ylabel('Graph losses')
plt.show()