In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline


import torch
from torch import nn
from torch import distributions
from torch.nn.parameter import Parameter

import models
from models import Renorm_Dynamic
from EI_calculation import approx_ei
use_cuda = torch.cuda.is_available()
device = torch.device('cuda:0') if use_cuda else torch.device('cpu')

## Boolean Network

In [None]:
class BnSim(object):
    def __init__(self, number_of_vertice):
        # 初始化节点数量
        self.number_of_vertice = number_of_vertice
        self.edges = self.gen_edge(False, [], 0, '4_vertice_bn')

    def gen_edge(self, use_group_list, group_list, random_del_edges_ratio, adj_template_name):
        
        edges = np.zeros((self.number_of_vertice, self.number_of_vertice))
        # 初始化邻接矩阵 所有连边均为1
        
        if use_group_list == True:
            #使用分组列表    
            for i in range(len(group_list)-1):
                for j in range(self.number_of_vertice):
                    for k in range(self.number_of_vertice):
                        if i < len(group_list)-2:
                            if j>=group_list[i] and j<group_list[i+1] and k>=group_list[i+1] and k<group_list[i+2]:
                                # 依照分组列表删除不需要的连边
                                if np.random.rand() > random_del_edges_ratio:
                                    edges[j][k] = 1.
                        else:
                            if j >=group_list[i] and j<group_list[i+1] and k<group_list[1]:
                                if np.random.rand() > random_del_edges_ratio:
                                    edges[j][k] = 1.
            print(edges)
        elif use_group_list == False:
            if adj_template_name == "4_vertice_bn":
                self.number_of_vertice = 4
                edges = np.zeros((self.number_of_vertice, self.number_of_vertice))
                edges[0][2] = 1
                edges[0][3] = 1
                edges[1][2] = 1
                edges[1][3] = 1
                edges[2][0] = 1
                edges[3][0] = 1
                edges[2][1] = 1
                edges[3][1] = 1
                
        
        return edges

    def sample_one_step(self, state_now, edges):
        # 返回复杂系统下一时刻的状态
        state_next = torch.zeros(self.number_of_vertice)
        # 初始化复杂系统下一时刻的状态
        for i in range(self.number_of_vertice):
            # 对每个节点的状态进行遍历
            n_prob = 0.
            p_prob = 0.
            degree = 0
            # 初始化节点的邻居中状态为1的数量、状态为0的数量以及节点的度
            for j in range(self.number_of_vertice):
                # 遍历节点的所有一阶邻居，计算上述三个值：n_prob、p_prob、degree
                if edges[i][j] == 1.:
                    if state_now[j] == 0.:
                        n_prob += 1
                    elif state_now[j] == 1.:
                        p_prob += 1
                    degree += 1
            if p_prob < degree:
                #如果不是所有的邻居状态都为一，则该节点下一时刻有0.7的概率为0、0.3的概率为1
                state_next[i] = torch.multinomial(torch.Tensor([0.7,0.3]), 1)
            else:
                # 如果所有的邻居状态都为1，则该节点下意识的状态一定为1
                state_next[i] = 1.
        return state_next
        #返回下一时刻的状态

    def sample1(self, number_of_data):
        # 生成动力学时间序列
        # number_of_data为生成的时间序列个数
        # T为时间序列的长度，一般设置为2
        # group_list为分组列表，用于指导生成邻接矩阵
        
        # 调用gen_edge函数生成邻接矩阵
        n = self.number_of_vertice
        # n为节点数量，若adj_template_name与初始化sim时的节点数目不符合，则会在self.gen_edge中更新self.number_of_vertice
        state = torch.zeros((number_of_data, 2**n))
        state_next = torch.zeros([number_of_data, 2**n])
        # 初始化时间序列信息，此时各个时间序列中各个节点在各个时刻的状态均为0
        for num in range(number_of_data):
            #使用循环对各个时间序列进行操作
            rands = torch.multinomial(torch.Tensor([0.5, 0.5]), n, replacement=True)
            state[num, :]=self.encoding(rands)*1.0
            # 随机生成初始状态，状态为0和1的概率均为50%
            for i in range(1):
            # 从初始状态开始向后演化(T-1)个时刻，该过程会调用sample_one_step函数
                sn = self.sample_one_step(rands, edges=self.edges)
                state_next[num, :] = self.encoding(sn)*1.0
        return state, state_next, self.edges
    def sample(self, number_of_data):
        # 生成动力学时间序列
        # number_of_data为生成的时间序列个数
        # T为时间序列的长度，一般设置为2
        # group_list为分组列表，用于指导生成邻接矩阵
        
        # 调用gen_edge函数生成邻接矩阵
        n = self.number_of_vertice
        # n为节点数量，若adj_template_name与初始化sim时的节点数目不符合，则会在self.gen_edge中更新self.number_of_vertice
        state = torch.zeros((number_of_data, n))
        state_next = torch.zeros([number_of_data, n])
        # 初始化时间序列信息，此时各个时间序列中各个节点在各个时刻的状态均为0
        for num in range(number_of_data):
            #使用循环对各个时间序列进行操作
            rands = torch.multinomial(torch.Tensor([0.5, 0.5]), n, replacement=True)
            state[num, :]=rands
            # 随机生成初始状态，状态为0和1的概率均为50%
            for i in range(1):
            # 从初始状态开始向后演化(T-1)个时刻，该过程会调用sample_one_step函数
                sn = self.sample_one_step(rands, edges=self.edges)
                state_next[num, :] = sn
        return state, state_next, self.edges
    def encoding(self, a):
        if a.size()[0]==0:return -1
        
        out = torch.zeros(2**self.number_of_vertice)
        v = 0
        for i in range(a.size()[0]):
            v = v * 2 + int(a[i].item())
        out[v] = 1
        return out

def decimalToBinary(num, lst=[]):
    """This function converts decimal number
    to binary and prints it"""
    if num > 1:
        lst = decimalToBinary(num // 2, lst)
    lst.append(num % 2)
    return lst
def discrete_ei(input_sz, n_of_nds, netfunc):
    input_x = torch.zeros([input_sz**n_of_nds,n_of_nds])
    for i in range(input_sz**n_of_nds):
        binary = decimalToBinary(i, [])
        ll = len(binary)
        for j in range(ll, n_of_nds):
            binary = [0] + binary
        xx = torch.Tensor(np.array(binary)*1.0).unsqueeze(0)
        #print(xx)
        input_x[i,:]=xx
    pyx_,_,_ = netfunc(input_x)
    pyx_ = torch.cat((torch.exp(pyx_.unsqueeze(2)),1-torch.exp(pyx_.unsqueeze(2))), 2)
    #print(pyx_.size(),pyx_)
    pyx_ = torch.softmax(pyx_, dim=2)
    #print(pyx_.size(),pyx_)
    pyx = torch.ones([input_sz**n_of_nds,input_sz**n_of_nds])
    for i in range(input_x.size()[0]):
        for j in range(input_x.size()[0]):
            binary = decimalToBinary(j, [])
            ll = len(binary)
            probability = 1.0
            for k in range(n_of_nds):
                if k < n_of_nds - ll:
                    probability *= pyx_[i,k,0]
                else:
                    probability *= pyx_[i,k,binary[k-n_of_nds+ll]]
            pyx[i, j] = probability

    #print(pyx.size(), pyx.sum(1), pyx)
    logpyx=torch.log(pyx)
    logpyx = torch.where(torch.isinf(logpyx), torch.zeros(logpyx.size()), logpyx)
    entropy = pyx * logpyx
    sumz = torch.sum(pyx, 0).unsqueeze(0)
    logsumz = torch.log(sumz)
    logsumz = torch.where(torch.isinf(logsumz), torch.zeros(logsumz.size()), logsumz)
    logsumz = logsumz.repeat(input_sz**n_of_nds, 1)
    #print(logsumz)

    #print(logsumz)
    first_term = torch.sum(entropy)
    second_term = torch.sum(pyx * logsumz)

    final = (first_term - second_term)/(input_sz**n_of_nds) + np.log(input_sz**n_of_nds)
    final = final / np.log(2.0)
    return final, final*np.log(2.0)/np.log(input_sz**n_of_nds), pyx, first_term, second_term
def test_model(batch_size,n_of_nds, net,scale,nll,sim):
    batch_size1 = batch_size*10
    state,state_next,_ = sim.sample(batch_size)
    if use_cuda:
        state=state.cuda()
        state_next = state_next.cuda()
    predict, latent, latent_p = net(state) 
    predict = nn.functional.log_softmax(predict, dim=1)
    loss = nll(predict, state_next)
    ssp = net.encoding(state_next)
    sigmas = torch.sqrt(torch.mean((ssp-latent_p)**2, 0))
    sigmas_matrix = torch.diag(sigmas)
    ei = approx_ei(scale, scale, sigmas_matrix.data, lambda x:(net.dynamics(x.unsqueeze(0))+x.unsqueeze(0)), 
                   num_samples = 1000, L=100, easy=True, device=device)
    #one_ei = discrete_ei(input_sz, n_of_nds, lambda x: net(x))
    #whole_sigmas = torch.sqrt(torch.mean((state_next-predict)**2, 0))
    #inv_sigma = torch.inverse(torch.diag(whole_sigmas))
    #whole_ei = approx_ei(scale, scale, inv_sigma.data, lambda x:net(x)[0], 1000, 10)
    return ei, sigmas,loss.item()#, one_ei#, whole_ei
def calc_ei_loss(latent_p, state_next, net, scale): 
    ssp = net.encoding(state_next)
    prediction = ssp#.detach()
    real = latent_p#.detach()
    # detach the variables of ssp and latent_p which only optimize the dynamics NN.
    sigmas = torch.sqrt(torch.mean((prediction - real)**2, 0))
    sigmas_matrix = torch.diag(sigmas)
    ei = approx_ei(scale, scale, sigmas_matrix.data, lambda x:(net.dynamics(x.unsqueeze(0))+x.unsqueeze(0)), 
                   num_samples = 1000, L=100, easy=True, device=device)
    return ei, sigmas

In [None]:
def RGB_to_Hex(rgb):
  
    RGB = list(rgb)
    color = '#'
    for i in RGB:
        num = int(i)
        color += str(hex(num))[-2:].replace('x', '0').upper()
    return color
    
def generate_colors(N=12,colormap='hsv'):
    
    step = max(int(255/N),1)
    cmap = plt.get_cmap(colormap)
    rgb_list = []
    hex_list = []
    for i in range(N):
        id = step*i # cmap(int)->(r,g,b,a) in 0~1
        id = 255 if id>255 else id
        rgba_color = cmap(id)
        rgb = [int(d*255) for d in rgba_color[:3]]
        rgb_list.append(tuple(rgb))
        hex_list.append(RGB_to_Hex(rgb))
    return rgb_list,hex_list
    
rgb_list,hex_list = generate_colors(6,'cool')
print(rgb_list)
print(hex_list)

### Learned Mapping

In [None]:
hidden_units = 64


torch.manual_seed(1024)
scale = 1
batch_size =100
nll = nn.functional.binary_cross_entropy_with_logits
n_of_nods = 4
sim = BnSim(number_of_vertice = n_of_nods)
net = Renorm_Dynamic(sym_size = n_of_nods, latent_size = scale, effect_size = n_of_nods, 
                     hidden_units = hidden_units, normalized_state = False, device=device)
net_m = Renorm_Dynamic(sym_size = n_of_nods, latent_size = n_of_nods, effect_size = n_of_nods, 
                     hidden_units = hidden_units, normalized_state = False, device=device)
net = net.cuda() if use_cuda else net
net_m = net_m.cuda() if use_cuda else net_m
optimizer = torch.optim.Adam([p for p in net.parameters() if p.requires_grad==True], lr=1e-4)
optimizer_m = torch.optim.Adam([p for p in net_m.parameters() if p.requires_grad==True], lr=1e-4)

random_samples = []

for t in range(500001):    
    state,state_next,_ = sim.sample(batch_size)
    if use_cuda:
        state=state.cuda()
        state_next = state_next.cuda()
    predict, latent, latent_p = net(state)
    predict_m, latent_m, latent_pm = net_m(state)
    #predict = nn.functional.log_softmax(predict, dim=1)
    loss = nll(predict, state_next)
    loss_m = nll(predict_m, state_next)
    optimizer.zero_grad()
    optimizer_m.zero_grad()
    loss.backward(retain_graph=True)
    loss_m.backward(retain_graph=True)
    optimizer.step()
    optimizer_m.step()
           
    if t % 500 ==0:
        ei, sigmas,_ = test_model(batch_size, n_of_nods, net, scale, nll,sim)
        ei_m, sigmas_m, _ = test_model(batch_size, n_of_nods, net_m, n_of_nods, nll, sim)
        print('iter %s:' % t)
        print('Macro: loss = %.3f' % loss.item(), ', dEI= %.3f' % ei[0],
             ', eff= %.3f' % ei[1], ', std = %.3f' % sigmas.mean().item())
        print('Micro: loss = %.3f' % loss_m.item(), ', dEI= %.3f' % ei_m[0],
             ', eff= %.3f' % ei_m[1], ', std = %.3f' % sigmas_m.mean().item())
        print('Causal Emergence = %.3f' % (ei[0]-ei_m[0]))
        
        if t % 5000 == 0:
            
            xx = torch.zeros(state.size()[0],1)
            for i in range(state.size()[0]):
                v = 0
                for j in range(state.size()[1]):
                    v = v * 2 + int(state[i,j].item())
                xx[i] = v
            if latent.size()[1]==1:
                yy = latent
                if use_cuda:
                    xx=xx.cpu()
                    yy=yy.cpu()
                for i in range(yy.size()[1]):
                    plt.plot(xx.data, yy[:,i].data,'.')
                plt.xlabel('Learned Latent State')
                plt.ylabel('Real Latent State')
            else:
                lowrank=latent
                if latent.size()[1]>2:
                    lowrank = torch.pca_lowrank(latent, q=2)[0]
                plotx = xx.cpu() if use_cuda else xx
                ploty = lowrank.cpu() if use_cuda else lowrank
                plt.scatter(ploty.data[:, 0], ploty.data[:, 1], c = plotx.view(-1).data,cmap=plt.cm.Spectral)
            plt.show()

In [None]:
xx = torch.zeros(state.size()[0],1)
for i in range(state.size()[0]):
    v = 0
    for j in range(state.size()[1]):
        v = v * 2 + int(state[i,j].item())
    xx[i] = v
    
yy = latent
if use_cuda:
    xx=xx.cpu()
    yy=yy.cpu()
for i in range(yy.size()[1]):
    bools = (yy[:,i]<0)
    plt.plot(xx[bools].data, yy[bools,i].data,'o',markersize=3, label='Class 1',color='orange')
    bools = (yy[:,i]<4)&(yy[:,i]>2)
    plt.plot(xx[bools].data, yy[bools,i].data,'s',markersize=4, label='Class 2',color='g')
    bools = (yy[:,i]<2)&(yy[:,i]>0)
    plt.plot(xx[bools].data, yy[bools,i].data,'<',markersize=4, label='Class 3',color='b')
    bools = (yy[:,i]>10)
    plt.plot(xx[bools].data, yy[bools,i].data,'*',markersize=5, label='Class 4',color='firebrick')
plt.xlabel('Encoded Micro States')
plt.ylabel('Latent Macro State')
plt.legend()
plt.savefig('Boolean_class.svg', dpi=600, format='svg')
plt.show()

### Scale Search

In [None]:
L = 10
hidden_units = 64
sigma=1
#torch.manual_seed(2050)
experiments = 4
batch_size =100
epochs=100001
nll = nn.functional.binary_cross_entropy_with_logits

err_scale = []
ei_scale=[]
for scale in [4,3,2,1]:
    print('*********************',scale,'**************************')
    err_experiment=[]
    multi_err_experiment=[]
    ei_experiment=[]
    for experiment in range(experiments):
        n_of_nods = 4
        sim = BnSim(number_of_vertice = n_of_nods)
        net = Renorm_Dynamic(sym_size = n_of_nods, latent_size = scale, effect_size = n_of_nods, 
                             hidden_units = hidden_units, normalized_state = False, device=device)
        net_m = Renorm_Dynamic(sym_size = n_of_nods, latent_size = n_of_nods, effect_size = n_of_nods, 
                             hidden_units = hidden_units, normalized_state = False, device=device)
        net = net.cuda() if use_cuda else net
        net_m = net_m.cuda() if use_cuda else net_m
        optimizer = torch.optim.Adam([p for p in net.parameters() if p.requires_grad==True], lr=1e-4)
        optimizer_m = torch.optim.Adam([p for p in net_m.parameters() if p.requires_grad==True], lr=1e-4)

        random_samples = []

        for t in range(epochs):    
            state,state_next,_ = sim.sample(batch_size)
            if use_cuda:
                state=state.cuda()
                state_next = state_next.cuda()
            predict, latent, latent_p = net(state)
            predict_m, latent_m, latent_pm = net_m(state)
            #predict = nn.functional.log_softmax(predict, dim=1)
            loss = nll(predict, state_next)
            loss_m = nll(predict_m, state_next)
            optimizer.zero_grad()
            optimizer_m.zero_grad()
            loss.backward(retain_graph=True)
            loss_m.backward(retain_graph=True)
            optimizer.step()
            optimizer_m.step()
            if t % 500 ==0:
                ei, sigmas,_ = test_model(batch_size, n_of_nods, net, scale, nll,sim)
                ei_m, sigmas_m, _ = test_model(batch_size, n_of_nods, net_m, n_of_nods, nll, sim)
                print('iter %s:' % t)
                print('Macro: loss = %.3f' % loss.item(), ', dEI= %.3f' % ei[0],
                     ', eff= %.3f' % ei[1], ', std = %.3f' % sigmas.mean().item())
                print('Micro: loss = %.3f' % loss_m.item(), ', dEI= %.3f' % ei_m[0],
                     ', eff= %.3f' % ei_m[1], ', std = %.3f' % sigmas_m.mean().item())
                print('Causal Emergence = %.3f' % (ei[0]-ei_m[0]))
        #test
        ei, sigmas,_ = test_model(batch_size*10, n_of_nods, net, scale, nll, sim) 
        ei_m, sigmas_m, _ = test_model(batch_size*10, n_of_nods, net_m, n_of_nods, nll, sim)
        err_experiment.append(loss)
        
        #mutual information
        ei_experiment.append(ei[0]-ei_m[0])
        
    err_scale.append(err_experiment)
    ei_scale.append(ei_experiment)

In [None]:
rgb_list,hex_list = generate_colors(2,'cool')
print(rgb_list)
print(hex_list)
#Bollean data
ces=ei_scale

scales=torch.Tensor([4,3,2,1])
means=[]
stds=[]

for ce in ces:
    m=np.mean(ce)
    std=np.std(ce)
    means.append(m)
    stds.append(std)

plt.plot(scales, means,'o')
plt.errorbar(scales, means, stds,color=hex_list[0])
plt.bar(scales, means, width=0.3, facecolor=hex_list[1], edgecolor='white')
plt.title('Relationship between dCE and Scale(q)',fontsize=14) 
plt.xlabel('Scale ($q$)',fontsize=14)
plt.ylabel('Casual Emergence (dCE)',fontsize=14)
plt.axhline(0, color='black', linestyle='--')
plt.savefig('Boolean_casual_emergence.svg', dpi=600, format='svg')
plt.show()

### Mutual Information

In [None]:
import entropy_estimators as ee

In [None]:
def moving_average(interval, windowsize):
    window = np.ones(int(windowsize)) / float(windowsize)
    re = np.convolve(interval, window, 'same')
    return re

#### Verification for Theorem 2 when Scale(q)=2

In [None]:
hidden_units =32


torch.manual_seed(1024)
scale = 2
batch_size =100
nll = nn.functional.binary_cross_entropy_with_logits
n_of_nods = 4
sim = BnSim(number_of_vertice = n_of_nods)
net = Renorm_Dynamic(sym_size = n_of_nods, latent_size = scale, effect_size = n_of_nods, 
                     hidden_units = hidden_units, normalized_state = False, device=device)
net_m = Renorm_Dynamic(sym_size = n_of_nods, latent_size = n_of_nods, effect_size = n_of_nods, 
                     hidden_units = hidden_units, normalized_state = False, device=device)
net = net.cuda() if use_cuda else net
net_m = net_m.cuda() if use_cuda else net_m
optimizer = torch.optim.Adam([p for p in net.parameters() if p.requires_grad==True], lr=1e-4)
optimizer_m = torch.optim.Adam([p for p in net_m.parameters() if p.requires_grad==True], lr=1e-4)

random_samples = []

Isst=[]
Illt=[]
Isshat=[]
Irrt=[]
I_yt_yt1_ms=[]
I_xt_xt1hat_ms=[]
errs=[]
Ts=[]

for t in range(50001):    
    state,state_next,_ = sim.sample(batch_size)
    if use_cuda:
        state=state.cuda()
        state_next = state_next.cuda()
    predict, latent, latent_p = net(state)
    predict_m, latent_m, latent_pm = net_m(state)
    #predict = nn.functional.log_softmax(predict, dim=1)
    loss = nll(predict, state_next)
    loss_m = nll(predict_m, state_next)
    optimizer.zero_grad()
    optimizer_m.zero_grad()
    loss.backward(retain_graph=True)
    loss_m.backward(retain_graph=True)
    optimizer.step()
    optimizer_m.step()
           
    if t % 500 ==0:
       
        I_xt_xt1=ee.mi(state.cpu(),state_next.cpu(),k=3)
        I_yt_yt1=ee.mi(latent.cpu().detach(),latent_p.cpu().detach(),k=17)
        I_xt_xt1hat=ee.mi(state.cpu(),predict.cpu().detach(),k=7)

        #I_yt_yt1_m=np.mean([ee.mi(torch.cat([latent_m.cpu().detach()[j] for i in range(6)],0),torch.cat([latent_pm.cpu().detach()[j] for i in range(6)],0)) for j in range(batch_size)])
        #I_xt_xt1hat_m=np.mean([ee.mi(torch.cat([state.cpu()[j] for i in range(6)],0),torch.cat([predict_m.cpu().detach()[j] for i in range(6)],0)) for j in range(batch_size)])

        Isst.append(I_xt_xt1)
        Isshat.append(I_xt_xt1hat)
        Irrt.append(I_yt_yt1)

        #I_yt_yt1_ms.append(I_yt_yt1_m)
        #I_xt_xt1hat_ms.append(I_xt_xt1hat_m)

        errs.append(np.std([I_yt_yt1,I_xt_xt1hat,I_xt_xt1]))
        Ts.append(t)
        
        print(I_xt_xt1,I_yt_yt1,I_xt_xt1hat)

plt.plot(Ts,Isst,label='I(xt,xt+1)')
plt.plot(Ts,Irrt,label='I(yt,yt+1)')
plt.plot(Ts,Isshat,label='I(xt,xthat)')
#plt.plot(Ts,I_yt_yt1_ms,label='I(xt,xthat)m')
#plt.plot(Ts,I_xt_xt1hat_ms,label='I(xt,xthat)m')
plt.plot(Ts,errs,label='error')
plt.ylim(0,3.5)
plt.legend(loc='best')
plt.xlabel('Iter')
plt.ylabel('Value')
plt.show()

In [None]:
Isst_rol_mean = moving_average(Isst,10)
Irrt_rol_mean = moving_average(Irrt,10)
Isshat_rol_mean = moving_average(Isshat,10)

#plt.plot(Ts,Isst,label='$I(x_t,x_{t+1})$',color='gold',linestyle='-.')
#plt.plot(Ts,Irrt,label='$I(y_t,y(t+1))$',color='skyblue',linestyle='-.')
#plt.plot(Ts,Isshat,label='$I(x_t,\hat{x}_{t+1})$',color='thistle',linestyle='-.')
#plt.plot(Ts,I_yt_yt1_ms,label='I(xt,xthat)m')
#plt.plot(Ts,I_xt_xt1hat_ms,label='I(xt,xthat)m')
#plt.plot(Ts,errs,label='error',color=hex_list[3])
plt.plot(Ts[4:96],Isst_rol_mean[4:96],label='$I(x_t,x_{t+1})$',color='blueviolet')
plt.plot(Ts[4:96],Irrt_rol_mean[4:96],label='$I(y_t,y(t+1))$',color='teal')
plt.plot(Ts[4:96],Isshat_rol_mean[4:96],label='$I(x_t,\hat{x}_{t+1})$',color='b')
plt.ylim(0,3)
plt.legend(loc='best')
plt.title('Verification for Theorem 2 when Scale(q)=2',fontsize=14) 
plt.xlabel('Iter',fontsize=13)
plt.ylabel('Mutual Information Scale(q)=2',fontsize=13)
plt.savefig('Boolean_Mutual_Information_Scale2.svg', dpi=600, format='svg')
plt.show()

#### Verification for Theorem 6 with Scale and Mutual Information 

In [None]:
hidden_units = 32


torch.manual_seed(1024)
scale = [1,2,3,4]
batch_size =100
nll = nn.functional.binary_cross_entropy_with_logits
n_of_nods = 4
sim = BnSim(number_of_vertice = n_of_nods)
net_1 = Renorm_Dynamic(sym_size = n_of_nods, latent_size = scale[0], effect_size = n_of_nods, 
                     hidden_units = hidden_units, normalized_state = False, device=device)
net_2 = Renorm_Dynamic(sym_size = n_of_nods, latent_size = scale[1], effect_size = n_of_nods, 
                     hidden_units = hidden_units, normalized_state = False, device=device)
net_3 = Renorm_Dynamic(sym_size = n_of_nods, latent_size = scale[2], effect_size = n_of_nods, 
                     hidden_units = hidden_units, normalized_state = False, device=device)
net_4 = Renorm_Dynamic(sym_size = n_of_nods, latent_size = scale[3], effect_size = n_of_nods, 
                     hidden_units = hidden_units, normalized_state = False, device=device)
net_1 = net_1.cuda() if use_cuda else net_1
net_2 = net_2.cuda() if use_cuda else net_2
net_3 = net_3.cuda() if use_cuda else net_3
net_4 = net_4.cuda() if use_cuda else net_4
optimizer_1 = torch.optim.Adam([p for p in net_1.parameters() if p.requires_grad==True], lr=1e-4)
optimizer_2 = torch.optim.Adam([p for p in net_2.parameters() if p.requires_grad==True], lr=1e-4)
optimizer_3 = torch.optim.Adam([p for p in net_3.parameters() if p.requires_grad==True], lr=1e-4)
optimizer_4 = torch.optim.Adam([p for p in net_4.parameters() if p.requires_grad==True], lr=1e-4)

random_samples = []

Isshat_1=[]
Isshat_2=[]
Isshat_3=[]
Isshat_4=[]
#Isshat_m=[]
I_xt_yt_1s=[]
I_xt_yt_2s=[]
I_xt_yt_3s=[]
I_xt_yt_4s=[]
#errs=[]
Ts=[]

for t in range(50001):    
    state,state_next,_ = sim.sample(batch_size)
    if use_cuda:
        state=state.cuda()
        state_next = state_next.cuda()
    predict_1, latent_1, latent_p1 = net_1(state)
    predict_2, latent_2, latent_p2 = net_2(state)
    predict_3, latent_3, latent_p3 = net_3(state)
    predict_4, latent_4, latent_p4 = net_4(state)
    #predict = nn.functional.log_softmax(predict, dim=1)
    loss_1 = nll(predict_1, state_next)
    loss_2 = nll(predict_2, state_next)
    loss_3 = nll(predict_3, state_next)
    loss_4 = nll(predict_4, state_next)
    optimizer_1.zero_grad()
    optimizer_2.zero_grad()
    optimizer_3.zero_grad()
    optimizer_4.zero_grad()
    loss_1.backward(retain_graph=True)
    loss_2.backward(retain_graph=True)
    loss_3.backward(retain_graph=True)
    loss_4.backward(retain_graph=True)
    optimizer_1.step()
    optimizer_2.step()
    optimizer_3.step()
    optimizer_4.step()
           
    if t % 500 ==0:
       
        I_xt_xt1hat_1=ee.mi(state.cpu(),predict_1.cpu().detach(),k=9)
        I_xt_yt_1=ee.mi(state.cpu(),latent_1.cpu().detach(),k=9)

        I_xt_yt_2=ee.mi(state.cpu(),latent_2.cpu().detach(),k=9)
        I_xt_xt1hat_2=ee.mi(state.cpu(),predict_2.cpu().detach(),k=9)

        I_xt_xt1hat_3=ee.mi(state.cpu(),predict_3.cpu().detach(),k=9)
        I_xt_yt_3=ee.mi(state.cpu(),latent_3.cpu().detach(),k=9)

        I_xt_xt1hat_4=ee.mi(state.cpu(),predict_4.cpu().detach(),k=9)
        I_xt_yt_4=ee.mi(state.cpu(),latent_4.cpu().detach(),k=9)

        Isshat_1.append(I_xt_xt1hat_1)
        Isshat_2.append(I_xt_xt1hat_2)
        Isshat_3.append(I_xt_xt1hat_3)
        Isshat_4.append(I_xt_xt1hat_4)
        I_xt_yt_1s.append(I_xt_yt_1)
        I_xt_yt_2s.append(I_xt_yt_2)
        I_xt_yt_3s.append(I_xt_yt_3)
        I_xt_yt_4s.append(I_xt_yt_4)

        #errs.append(np.std([np.mean(I_yt_yt1),np.mean(I_xt_xt1hat),np.mean(I_xt_xt1)]))
        Ts.append(t)
        
        print(I_xt_xt1hat_4,I_xt_yt_1,I_xt_yt_2,I_xt_yt_3,I_xt_yt_4)

plt.plot(Ts,Isshat_4,label='I(xt,xt_hat)')
#plt.plot(Ts,Isshat_m,label='I(xt,xt_hat_m)')
plt.plot(Ts,I_xt_yt_1s,label='I(xt,yt_1)')
plt.plot(Ts,I_xt_yt_2s,label='I(xt,yt_2)')
plt.plot(Ts,I_xt_yt_3s,label='I(xt,yt_3)')
plt.plot(Ts,I_xt_yt_4s,label='I(xt,yt_4)')
#plt.plot(Ts,I_xt_xt1hat_ms,label='I(xt,xthat)m')
#plt.plot(Ts,errs,label='error')
plt.legend(loc='best')
plt.xlabel('Iter')
plt.ylabel('Value')
plt.show()

In [None]:
Isshat_4_rol_mean = moving_average(Isshat_4,10)
Isshat_3_rol_mean = moving_average(Isshat_3,10)
Isshat_2_rol_mean = moving_average(Isshat_2,10)
Isshat_1_rol_mean = moving_average(Isshat_1,10)
I_xt_yt_1s_rol_mean = moving_average(I_xt_yt_1s,10)
I_xt_yt_2s_rol_mean = moving_average(I_xt_yt_2s,10)
I_xt_yt_3s_rol_mean = moving_average(I_xt_yt_3s,10)
I_xt_yt_4s_rol_mean = moving_average(I_xt_yt_4s,10)

plt.plot(Ts,Isshat_3,label='I(xt,xt_hat)',color='deepskyblue')
#plt.plot(Ts,Isshat_m,label='I(xt,xt_hat_m)')
plt.plot(Ts,I_xt_yt_1s,label='I(xt,yt_1)',color='gold')
plt.plot(Ts,I_xt_yt_2s,label='I(xt,yt_2)',color='lime')
plt.plot(Ts,I_xt_yt_3s,label='I(xt,yt_3)',color='lightcoral')
plt.plot(Ts,I_xt_yt_4s,label='I(xt,yt_4)',color='violet')
plt.plot(Ts[6:95],Isshat_3_rol_mean[6:95],label='I(xt,xt_hat) Rolling Mean',color='b')
plt.plot(Ts[6:95],I_xt_yt_1s_rol_mean[6:95],label='I(xt,yt_1) Rolling Mean',color='darkorange')
plt.plot(Ts[6:95],I_xt_yt_2s_rol_mean[6:95],label='I(xt,yt_2) Rolling Mean',color='g')
plt.plot(Ts[6:95],I_xt_yt_3s_rol_mean[6:95],label='I(xt,yt_3) Rolling Mean',color='r')
plt.plot(Ts[6:95],I_xt_yt_4s_rol_mean[6:95],label='I(xt,yt_4) Rolling Mean',color='purple')
#plt.plot(Ts,I_xt_xt1hat_ms,label='I(xt,xthat)m')
#plt.plot(Ts,errs,label='error')
plt.legend(loc='best',fontsize=7)
plt.ylim(0,3.5)
plt.xlabel('Iter',fontsize=14)
plt.ylabel('Mutual Information',fontsize=14)
#plt.savefig('Boolean Mutual Information rm.svg', dpi=600, format='svg')
plt.show()

In [None]:
rgb_list,hex_list = generate_colors(12,'cubehelix')
print(rgb_list)
print(hex_list)
#plt.plot(Ts,Isshat_3,label='$I(x_t,\hat{x}_t)$',color='skyblue',linestyle='-.')
#plt.plot(Ts,Isshat_m,label='I(xt,xt_hat_m)')
#plt.plot(Ts,I_xt_yt_1s,label='$I(x_t,y_{t}^{q=1})$',color='orange',linestyle=':')
#plt.plot(Ts,I_xt_yt_2s,label='$I(x_t,y_{t}^{q=2})$',color='limegreen',linestyle=':')
#plt.plot(Ts,I_xt_yt_3s,label='$I(x_t,y_{t}^{q=3})$',color='violet',linestyle=':')
#plt.plot(Ts,I_xt_yt_4s,label='$I(x_t,y_{t}^{q=4})$',color='deepskyblue',linestyle=':')
plt.plot(Ts[6:95],Isshat_3_rol_mean[6:95],label='$I(x_t,\hat{x}_{t+1})$',color='b')
plt.plot(Ts[6:95],I_xt_yt_1s_rol_mean[6:95],label='$I(x_t,y_{t}^{q=1})$',color='darkorange')
plt.plot(Ts[6:95],I_xt_yt_2s_rol_mean[6:95],label='$I(x_t,y_{t}^{q=2})$',color='g')
plt.plot(Ts[6:95],I_xt_yt_3s_rol_mean[6:95],label='$I(x_t,y_{t}^{q=3})$',color='r')
plt.plot(Ts[6:95],I_xt_yt_4s_rol_mean[6:95],label='$I(x_t,y_{t}^{q=4})$',color='purple')
#plt.plot(Ts,I_xt_xt1hat_ms,label='I(xt,xthat)m')
#plt.plot(Ts,errs,label='error')
plt.legend(loc='best')
plt.title('Verification for Theorem 6',fontsize=14) 
plt.ylim(0,3)
plt.xlabel('Iter',fontsize=14)
plt.ylabel('Mutual Information',fontsize=14)
plt.savefig('Boolean_Mutual_Information_rm.svg', dpi=600, format='svg')
plt.show()