In [1]:
import torch
import torch.nn as nn
import numpy as np
from matplotlib import pyplot as plt

In [2]:
model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar100_vgg16_bn", pretrained=True)
model.load_state_dict(torch.load("VGG16_cifar100_sigmoid_origin_1.pth"))
hout = np.load('Hout_VGG16_train.npy', allow_pickle=True)
hout = [np.array(h, dtype=np.float32) for h in hout]

Using cache found in C:\Users\huawei/.cache\torch\hub\chenyaofo_pytorch-cifar-models_master


In [3]:
from sklearn.mixture import GaussianMixture

def gH(cov, covv=None, mu=np.nan, muu=np.nan):
    if cov.ndim == 0:
        det = cov
    else:
        raise ValueError("cov should be 0D array")
    if det == 0: det = 1e-10

    if covv == None:  
        return 0.5 * (1 + np.log(2*np.pi)) + 0.5 * np.log(det)
    
    if covv.ndim == 0:
        dett = covv
    else:
        raise ValueError("covv should be 0D array")
    if dett == 0: dett = 1e-10
    return 0.5 * (np.log(2*np.pi)) + 0.5 * np.log(dett) + 0.5 * ((mu-muu)**2 + det) / dett

def Hy(X, n=30):
    clf = GaussianMixture(n_components=n, covariance_type='diag', random_state=1)
    clf.fit(X)
    h = 0
    cov = clf.covariances_.ravel()
    mu = clf.means_.ravel()
    w = clf.weights_.ravel()
    for i in range(n):
        for j in range(n):
            h += w[i] * w[j] * gH(cov[i], cov[j], mu[i], mu[j])
    return h

def Hx(p):
    return np.sum(-p * np.log(p)/np.log(2))

def MI(px, y, xy):
    return Hx(px) + Hy(y) - Hy(xy)

In [4]:
x = np.array([len(h) for h in hout])
px = x / x.sum()
y = np.concatenate(hout, axis=0)
xy = np.concatenate([np.c_[hout[i],[i for _ in hout[i]]] for i in range(len(hout))], axis=0)

In [5]:
def Hy_x(xy, n=3):
    xy = [xy[xy[:,-1]==i][:,:-1] for i in range(len(hout))]
    hy_x = 0
    for i, h in enumerate(xy):
        hy_x += Hy(h, n)*px[i]
    return hy_x

In [6]:
def Hyy(xy, n=3):
    xy = [xy[xy[:,-1]==i][:,:-1] for i in range(len(hout))]
    cov = []
    mu = []
    w = []
    for i, h in enumerate(xy):
        clf = GaussianMixture(n_components=n, covariance_type='diag', random_state=1)
        clf.fit(h)
        for k in range(n):
            cov.append(clf.covariances_.ravel()[k])
            mu.append(clf.means_.ravel()[k])
            w.append(clf.weights_.ravel()[k] * px[i])
    h = 0
    for i in range(len(w)):
        for j in range(len(w)):
            h += w[i] * w[j] * gH(cov[i], cov[j], mu[i], mu[j])
    return h

In [7]:
def MIxy(sli):
    # return Hyy(xy[:,sli+[-1]])-Hy_x(xy[:,sli+[-1]]) 
    # 0.55641402, 0.04674744
    return Hy(y[:,sli])-Hy_x(xy[:,sli+[-1]])
    # 0.67405948, 0.05763917 30 3

In [8]:
# 不同神经元与最终输出的互信息
from tqdm import tqdm
MIs = []
for i in tqdm(range(y.shape[1])):
    MIs.append(MIxy([i]))
MIs = np.array(MIs)

100%|██████████| 512/512 [19:24<00:00,  2.27s/it]


In [9]:
y = np.concatenate(hout, axis=0)
hvar = [y[:, i].var() for i in range(y.shape[1])] # Task Variance
hvar = np.array(hvar)

In [10]:
b = model.classifier[6].bias.data.cpu().numpy()
W = model.classifier[6].weight.data.cpu().numpy()
B = np.tile(b/W.shape[1], (W.shape[1],1)).T # 认为每条连接对bias的贡献相同
b_ = B / W
b_ = b_.sum(axis=0)

In [11]:
np.corrcoef([MIs, hvar, b_])

array([[1.        , 0.67405948, 0.05763917],
       [0.67405948, 1.        , 0.09920422],
       [0.05763917, 0.09920422, 1.        ]])

In [12]:
from scipy import stats
# 斯皮尔曼相关系数，皮尔逊相关系数
stats.spearmanr(MIs, hvar), stats.pearsonr(MIs, hvar), \
stats.spearmanr(MIs, b_), stats.pearsonr(MIs, b_), \
stats.spearmanr(hvar, b_), stats.pearsonr(hvar, b_)

(SignificanceResult(statistic=0.6839239758929286, pvalue=7.431099647394277e-72),
 PearsonRResult(statistic=0.674059478107472, pvalue=4.26665438743269e-69),
 SignificanceResult(statistic=0.017767556734301505, pvalue=0.6883590572119154),
 PearsonRResult(statistic=0.057639171445923384, pvalue=0.19287517331417617),
 SignificanceResult(statistic=0.04720983036930225, pvalue=0.28632413217271147),
 PearsonRResult(statistic=0.09920421798866629, pvalue=0.024781708081683142))