# RG Flow of Preactivations

In this notebook we will test the Effective Theory from the fourth chapter of the book. More specifically we will look at the distribution of preactivations in the first layer, second layer and some last layer.

It is highly recommended to read the book or at least the third chapter.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import math

import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

import pingouin as pg

import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import random
random.seed(69)
random.seed(69)
torch.manual_seed(69)

## First layer - Gaussian

The distribution of preactivations in the first layer should be Gaussian. Lets check it out.

In [31]:
a = 256
n = a
L = 3
inp_shape = a
out_shape = a

In [32]:
L/n

0.01171875

In [33]:
def try_gpu(i=0):
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')
device = try_gpu()

In [34]:
device

device(type='cuda', index=0)

In [35]:
def count_para(nets):
    c = 0
    for net in nets:
        c += sum([p.numel() for p in net.parameters() if p.requires_grad])
    return c

In [36]:
def initialization(t):
    with torch.no_grad():
        if isinstance(t, nn.Linear):
            t.weight = nn.init.normal_(t.weight, mean=0.0, std=2.00**0.5)
            if t.bias is not None:
                nn.init.constant_(t.bias.data, 0)

In [37]:
def create_first_layer(inp_shape, n):
    net = nn.Sequential(nn.Linear(inp_shape, n))
    net.apply(initialization)
    net = net.eval()
    return net

def create_mid_layer(n):
    net = nn.Sequential(nn.ReLU(), nn.Linear(n, n))
    net.apply(initialization)
    net = net.eval()
    return net

def create_mid_layers(n, l):
    blks = []
    for _ in range(l):
        blks.append(nn.Sequential(nn.ReLU(), nn.Linear(n, n)))
    net = nn.Sequential(*blks)
    net.apply(initialization)
    net = net.eval()
    return net

def create_top_layer(out_shape, l):
    net = nn.Sequential(nn.Linear(n, out_shape), nn.ReLU())
    net.apply(initialization)
    net = net.eval()
    return net

In [38]:
def create_layers(n, l, inp_shape=n, out_shape=n):
    f_l = create_first_layer(inp_shape,n)
    s_l = create_mid_layer(n)
    mid = create_mid_layers(n, (l-3)//2)
    top = create_mid_layers(n, (l-3) - (l-3)//2)
    top = nn.Sequential(top, nn.ReLU(), nn.Linear(n, out_shape))
    return f_l, s_l, mid, top

In [39]:
net = create_layers(n,L) #(f_l, s_l, mid, m_l, top)
count_para(net)

197376

In [40]:
x = torch.rand((1, inp_shape), dtype=torch.float)
x = (x - x.mean(dim=1))/x.std(dim=1)
x = x.to(device)
x.shape

torch.Size([1, 256])

In [41]:
f_l_zs = torch.tensor([]).to(device)
s_l_zs = torch.tensor([]).to(device)
mid_zs = torch.tensor([]).to(device)
top_zs = torch.tensor([]).to(device)
zs = [f_l_zs,s_l_zs,mid_zs,top_zs]

In [42]:
num_nets = 2000
for _ in range(num_nets):
    with torch.no_grad():
        x_temp = x
        layers = create_layers(n,L)
        for i in range(4):
            layers[i].to(device)
            z = layers[i](x_temp)
            zs[i] = torch.cat((z, zs[i]), dim=0)
            x_temp = z
        torch.cuda.empty_cache()

In [43]:
zs[0].shape

torch.Size([2000, 256])

In [44]:
pg.multivariate_ttest(zs[3].cpu())

Unnamed: 0,T2,F,df1,df2,pval
hotelling,298.497144,1.017265,256,1744,0.418929


In [45]:
zs[3][:4]

tensor([[-102.6563, -313.7347,   54.2371,  ...,  132.3438,   33.7585,
          -59.9196],
        [  78.9085,  174.9452, -243.0746,  ...,  207.6752,  134.7668,
          -85.4098],
        [ 413.4193,  -21.8748,  101.5424,  ..., -219.3279, -193.8071,
         -162.1559],
        [ -41.3945,  167.6516,    1.6018,  ..., -134.7312, -241.7362,
         -147.2643]], device='cuda:0')

In [46]:
z_Ls = zs[3]

In [48]:
def sample_con_cor(zs, a_1, a_2, a_3, a_4):
    exp_all = (zs[:,a_1] * zs[:,a_2] * zs[:,a_3] * zs[:,a_4]).mean()
    exp_12 = (zs[:,a_1] * zs[:,a_2]).mean()
    exp_34 = (zs[:,a_3] * zs[:,a_4]).mean()
    exp_13 = (zs[:,a_1] * zs[:,a_3]).mean()
    exp_24 = (zs[:,a_2] * zs[:,a_4]).mean()
    exp_14 = (zs[:,a_1] * zs[:,a_4]).mean()
    exp_23 = (zs[:,a_2] * zs[:,a_3]).mean()
    #print(exp_all,exp_12, exp_34,exp_13,exp_24,exp_14,exp_23)
    con_cor = exp_all - exp_12*exp_34 - exp_13*exp_24 - exp_14*exp_23
    #print(exp_12*exp_34,exp_13*exp_24,exp_14*exp_23)
    return con_cor

count = 0
samples = 1000
for _ in range(samples):
    i=random.randint(0,out_shape-1)
    idxs = random.choices(list(range(out_shape)),k=4)
    con_c = sample_con_cor(z_Ls, *idxs)
    K = (z_Ls[:,i] * z_Ls[:,i]).mean() - z_Ls[:,i].mean()*z_Ls[:,i].mean()
    count+= abs(con_c)/(K*K)
count/samples

tensor(0.0212, device='cuda:0')

In [None]:
#0.0187, 0.0186, 0.0230