Goal of worksheet: show that capacity of generator G is sufficient for estimating increasing complicated functions

Generator to estimate function


Goal $G(z)=f(z)$

Where f is a known function and G is a the generator function that estimates f.

$z\in \mathcal{N}_{n_z}(0,1)$



In [None]:
from utils import *
import sklearn


In [None]:
class basefunction():
    """Basic linear function, initialize once and keep A during experiment"""
    def __init__(self,opt):
        X_train,y_train,_,_ = load_labeled_data("data/exp1_standard5000.npz")
        X_train = X_train[y_train==0]

        cov = np.cov(X_train[:,:opt.specsize],rowvar=False)
        U, s, _ = np.linalg.svd(cov)
        l = opt.latent_dim
        
        self.A = np.dot(U[:,:l],np.sqrt(np.diag(s[:l])))
        self.bias = np.mean(X_train[:,:opt.specsize],axis=0)
        # self.A = np.random.rand(opt.specsize,opt.latent_dim)
        # self.A = np.ones((opt.specsize,opt.latent_dim))*0.1
        # self.bias = np.ones(opt.specsize)*0.2
    def forward(self,z):
        x = (np.transpose(self.A@np.transpose(z))+self.bias) #*0.5
        x += np.transpose(self.A@np.transpose(z**2)) #*0.5
        return x

$y=f(z)\rightarrow \mathbb{R}^n$

$z=f^{-1}(y)$

$y=f(f^{-1}(y))=G^*(z)$

Where $G^*$ denotes the optimal G

In [None]:
class opt():
    pass
opt.batch_size = 1024
opt.numclasses = 1
opt.latent_dim = 100
opt.specsize = 573
opt.limitclasses = [0,2,6,7,8]
fun = basefunction(opt)

z = np.random.normal(0,1,(opt.batch_size,opt.latent_dim,))
y = fun.forward(z)
opt.specsize = np.shape(y)[1]

#example function output
# print(y[0,:])
# plt.plot(y[0,:])
# plt.title("Single realization")



$X\approx f(z)=Az+\mu$

The best A matrix is given by a lower rank approximation of the covariance matrix.

To get a baseline we make a linear estimation of A using least squares.

In [None]:
# test on original data
X_train,y_train,X_test,y_test = load_labeled_data("data/exp1_standard5000.npz")
X_train = X_train[y_train==0]
X_test = X_test[y_test==0]
y_train = y_train[y_train==0]
y_test = y_test[y_test==0]

opt.batch_size=len(X_train)
z = np.random.normal(0,1,(opt.batch_size,opt.latent_dim,))
A_hat = np.linalg.inv(z.T.dot(z)).dot(z.T).dot(X_train).T
#apply estimater
y_hat = A_hat.dot(z.T) #reconstruct original data

reg = sklearn.linear_model.LinearRegression().fit(z,X_train)


lossfun = torch.nn.MSELoss()
print("MSEloss unbiased lsq original ",lossfun(torch.tensor(X_train),torch.tensor(y_hat.T)).item())
error = error_val(X_train,y_hat.T)
print("FD lsq original",error.FID())
print("MSEloss linear regression original ",lossfun(torch.tensor(X_train),torch.tensor(reg.predict(z))).item())
error = error_val(X_train,reg.predict(z))
print("FD linear regression original",error.FID())
print("MSEloss train/test ",lossfun(torch.tensor(X_train[0:len(y_test)]),torch.tensor(X_test[0:len(y_train)])).item())
error = error_val(X_train,X_test)
print("FD train/test",error.FID())

In [None]:
# Generate data
opt.batchsize=1024
z = np.random.normal(0,1,(opt.batch_size,opt.latent_dim,))
y = fun.forward(z)

# Linear estimater
A_hat = np.linalg.inv(z.T.dot(z)).dot(z.T).dot(y).T
print(np.shape(A_hat))
print(np.shape(fun.A))
#apply estimater
y_hat = A_hat.dot(z.T)

reg = sklearn.linear_model.LinearRegression().fit(z,y)
lossfun = torch.nn.MSELoss()
# print(reg.coef_)
print("Number of parameters estimated correctly by regression",np.sum(np.abs(fun.A-reg.coef_)<0.0001))
print("Number of parameters estimated correctly by lsq",np.sum(np.abs(fun.A-A_hat)<0.0001))

print("MSEloss unbiased lsq",lossfun(torch.tensor(y),torch.tensor(y_hat.T)).item())
print("MSEloss linear regression",lossfun(torch.tensor(y),torch.tensor(reg.predict(z))).item())
error_reg = error_val(y,reg.predict(z))
error_lsq = error_val(y,y_hat.T)
print("Regression frechet distance: ",error_reg.FID())
print("Linear estimate frechet distance: ",error_lsq.FID())

plot_spec_stats(y,y_hat.T,range=[0,len(y.T)],compare_var=True)
plot_spec_stats(y,reg.predict(z),range=[0,len(y.T)],compare_var=True)

Goal G(z) -> f(z)
To do this we minimize error:

$\min_{\theta_G}\mathbf{E}||f(z)-G(z)||^2$

G(z) is a neural network

In [None]:
# G = cGANgenerator_fc_skipconnect(opt) # initialize fully connected generator
# G = simplest_generator(opt) #single layer
# G = cGANgenerator_fc_minuslayer(opt)
# G= cGANgenerator_fc_addlayer(opt)
# G = cGANgenerator_fc(opt)
G = cDC_Generator_fc(opt)



labels = torch.LongTensor(np.zeros((opt.batch_size))) #labels not taken in to account -> all labels 0
# print(G)
lossfun = torch.nn.MSELoss()
# optim = torch.optim.Adam(G.parameters(),lr=0.0001)
optim = torch.optim.SGD(G.parameters(),lr=0.0005)
losslist = []
fidlist = []

last_loss = 1000
best_loss = 1000
notincreasing_count = 0
BestG=copy.deepcopy(G)

for i in tqdm(range(1000)):
    # sample inputnoise
    z = np.random.normal(0,1,(opt.batch_size,opt.latent_dim,))
    # create real samples
    y = torch.FloatTensor(fun.forward(z))
    # dreate fake samples
    yhat = G(torch.FloatTensor(z),labels)
    #calculate loss and update
    loss = lossfun(yhat,y.squeeze(1))
    error = error_val(yhat.detach().numpy(),y.squeeze(1).detach().numpy())
    FD = Variable(torch.tensor(error.FID()),requires_grad=True)
    # loss = FD
    loss.backward()
    optim.step()

    losslist.append(loss.item())
    fidlist.append(FD.item())
    if i%100==0:
        yhat = BestG(torch.FloatTensor(z),labels)
        if opt.specsize<9:
            plt.figure()
            plt.hist(yhat.detach().numpy()[:,0],bins=20,alpha=0.5)
            plt.hist(y.detach().numpy()[:,0],bins=20,alpha=0.5)
            plt.legend(["G(z)","f(z)"])
            plt.xlim(-1,1)
            plt.savefig(str("test/feature_1_iter_"+str(i)))
        else:
            plt = plot_spec_stats(y.detach().numpy(),yhat.detach().numpy(),compare_var=True,range=[0,opt.specsize])
            # plt.ylim(-5,5)
            plt.savefig(str("test/spec_stats_iter_"+str(i)))

    if loss.item()>best_loss:
        notincreasing_count+=1
    else:
        BestG=copy.deepcopy(G)
        notincreasing_count=0
    last_loss = loss.item()
    
    if last_loss<best_loss: #remember best loss value
        best_loss=last_loss
    if notincreasing_count>100:
        yhat = BestG(torch.FloatTensor(z),labels)
        if opt.specsize<9:
            plt.figure()
            plt.hist(yhat.detach().numpy()[:,0],bins=20,alpha=0.5)
            plt.hist(y.detach().numpy()[:,0],bins=20,alpha=0.5)
            plt.legend(["G(z)","f(z)"])
            plt.xlim(-1,1)
            plt.savefig(str("test/simplestgen/feature_1_iter_"+str(i)))
        else:
            plt = plot_spec_stats(y.detach().numpy(),yhat.detach().numpy(),compare_var=True,range=[0,opt.specsize])
            # plt.ylim(-5,5)
            plt.savefig(str("test/simplestgen/spec_stats_iter_"+str(i)))
        break
yhat = G(torch.FloatTensor(z),labels)
plt = plot_spec_stats(y.detach().numpy(),yhat.detach().numpy(),compare_var=True,range=[0,opt.specsize])
plt.savefig("test/simplestgen/spec_stats_iter_final")

print("Lowest achieved loss",best_loss)
print("Lowest FD distance: ",np.min(fidlist))

In [None]:
print(G)
print(G.__dict__)
# print(G.model[0].weight)
# print(fun.A)

In [None]:
plt.figure()
plt.plot(losslist)
plt.title('loss over time')
plt.xlabel('iteration')
plt.ylabel('MSE loss')

# print(yhat.detach().numpy()[0])
plt.figure()
plt.plot(yhat.detach().numpy()[0])
plt.plot(yhat.detach().numpy()[1])
plt.title("2 generated realizations")

plt.figure()
plt.plot(fidlist)
plt.ylim(0)
plt.title("FD over time")
# plt.ylim([0,100])
print(fidlist)


In [None]:
# # validation
G.eval()
print(G)
# print(G.model[0].weight)
# labels = torch.LongTensor(np.zeros((1))) #labels not taken in to account -> all labels 0

# labels = torch.LongTensor(np.zeros((100000)))
z = np.random.normal(0,1,(100000,opt.latent_dim,))
# # create real samples
y = torch.FloatTensor(fun.forward(z)).squeeze(1).detach().numpy()
yhat = G(torch.FloatTensor(z),labels).detach().numpy()

# xhat = scaler.inverse_transform(yhat) # for FD calculation
# yfz = scaler.inverse_transform(y) # for FD calculation



In [None]:
# plot distributions
plt.figure()
plt.hist(yhat[:,0],bins=20,alpha=0.5)
plt.hist(y[:],bins=20,alpha=0.5)
plt.legend(["G(z)","f(z)"])
plt.show()

print(np.shape(y))
print(np.shape(yhat))
print(np.mean(np.std(y)))
# plot_spec_stats(y,yhat,compare_var=True,range=[0,len(yhat[0])])

In [None]:
plt.figure()
plt.imshow(np.corrcoef(y,rowvar=False))
plt.title('Original')
plt.colorbar()

plt.figure()
plt.imshow(np.corrcoef(yhat,rowvar=False))
plt.title('Fake')
plt.colorbar()

In [None]:
# COV = np.random.uniform(0,1,size=(2,2))
# COV = (COV + COV.T)/2
# # print(COV)
# U, S, _ = np.linalg.svd(COV)

# l=2
# print(S)
# print(np.sqrt(np.diag(S[:l])*np.sqrt(np.diag(S[:l]))
# A = U[:,:l] @ np.sqrt(np.diag(S[:l]))
# print(COV)
# print(A@A.T)
# print(np.linalg.norm(COV-A@A.T))