In [1]:
import math

import matplotlib.pyplot as plt
import numpy as np
import torch
from tqdm.auto import tqdm
from scipy.stats import norm
import matplotlib as mpl
mpl.use("TkAgg")
import ddpm
import datasets

In [13]:
sizes = range(100, 2900, 400)
sizes = range(3300, 5000, 400)
names = [f"square_{s}" for s in sizes]

In [None]:
for i in range(len(sizes)):
    print(sizes[i])
    !python ddpm.py --dataset square --experiment_name {names[i]} --num_epochs 50 --dataset_size {sizes[i]} --beta_schedule ours --num_timesteps 1000

3300
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
Training model 0...
END LOSS WAS 0.7883929517014866
Saving model...
Training model 1...
END LOSS WAS 0.8827568950630185
Saving model...
Training model 2...
END LOSS WAS 0.8948706064644449
Saving model...
Training model 3...
END LOSS WAS 0.9953284821443878
Saving model...
Training model 4...
END LOSS WAS 0.8387160900933767
Saving model...
Training model 5...
END LOSS WAS 0.792220150172525
Saving model...
Training model 6...
END LOSS WAS 0.6525430478538816
Saving model...
Training model 7...
END LOSS WAS 0.846561387179412
Saving model...
Training model 8...
END LOSS WAS 0.944341589561114
Saving model...
Training model 9...
END LOSS WAS 0.89935436081184
Saving model...
Training model 10...
END LOSS WAS 0.8581838666633628
Saving model...
Training model 11...
END LOSS WAS 0.9227606092040953
Saving model...
Training model 12...
END LOSS WAS 0.8545517535584487
Saving model...
Training model 13...
END LOSS WAS 0.7768750818378969
Sa

In [11]:
def calculate_stats(model_dir, dataset='point', score='model'):
    num_scheduler_timesteps = 1000
    noise_scheduler = ddpm.NoiseScheduler(num_timesteps=num_scheduler_timesteps, beta_schedule='ours')
    num_timesteps = len(noise_scheduler)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    models = [ddpm.MLP(input_dim=2) for _ in range(num_timesteps)]
    for t in range(num_timesteps):
        path = model_dir + f"/model{t}.pth"
        models[t].load_state_dict(torch.load(path))
        models[t].to(device)
        models[t].eval()
    eval_batch_size = 100000
    plot_step = 1
    num_timesteps = len(noise_scheduler.betas)
    curr_vars = torch.sqrt(1 - torch.exp(-2 * noise_scheduler.times))
    sample = torch.randn(eval_batch_size, 2).to(device).to(device)
    timesteps = list(range(num_timesteps))[::-1][:-5]
    print("last std", curr_vars[timesteps[-1]])
    samples = []
    steps = []
    for i, t in enumerate(tqdm(timesteps)):
        t_int = t
        t = torch.from_numpy(np.repeat(t, eval_batch_size)).long().to(device)
        with torch.no_grad():
            variance = torch.sqrt(1 - noise_scheduler.alphas_cumprod[t])
            v = curr_vars[t].cpu().numpy()
            if score == 'model':
                residual = models[t_int](sample, t)
            else:
                residual = sample / variance
        sample = noise_scheduler.step(residual, t[0], sample)
        if (i + 1) % plot_step == 0:
            sample_cpu = sample.cpu()
            samples.append(sample_cpu.numpy())
            steps.append(i + 1)
    print(samples[-1])
    return process_square(samples[-1], mode='median')
def process_square(samples, r=3, mode='median'):
    t = 0
    d = []
    for s in samples:
        dists = [abs(s[0] - r), abs(s[0] + r), abs(s[1] - r), abs(s[1] + r)]
        m = min(dists)
        d.append(m)
    # d = sorted(d)[:-round(len(d)*0.02)]
    if mode == 'median':
        return sorted(d)[len(d)//2]
    for m in d:
        t += m**2
    t /= len(d)
    return np.sqrt(t)

In [12]:
s = []
for i in range(len(sizes)):
    print(sizes[i], names[i])
    s.append(np.abs(calculate_stats(f"exps/{names[i]}", dataset='square', score='model')))
print(s)
plt.clf()
plt.scatter(sizes, np.array(s))
plt.yscale('log')
plt.title("median error vs dataset size")
plt.ylabel("median error")
plt.xlabel("dataset size")
plt.savefig(f'static/devs_square.png', bbox_inches='tight')
plt.show()

100 square_100
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
last std tensor(4.2208e-05, device='cuda:0', dtype=torch.float64)


  0%|          | 0/45 [00:00<?, ?it/s]

[[-1.64826575  2.99872772]
 [-2.99006477 -0.50314895]
 [ 1.77686995  1.84478933]
 ...
 [ 1.77888957  2.8973803 ]
 [ 2.99471784  2.21419741]
 [-2.99072528 -0.72244898]]
500 square_500
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
last std tensor(4.2208e-05, device='cuda:0', dtype=torch.float64)


  0%|          | 0/45 [00:00<?, ?it/s]

[[ 3.00376705 -1.4585616 ]
 [ 0.10081579 -2.99407349]
 [-2.99109324 -1.59602885]
 ...
 [ 2.99425463  2.85610399]
 [-2.99501285  0.95785781]
 [ 0.61249264 -3.00406183]]
900 square_900
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
last std tensor(4.2208e-05, device='cuda:0', dtype=torch.float64)


  0%|          | 0/45 [00:00<?, ?it/s]

[[ 1.00473842  2.99920833]
 [ 2.46445873 -3.00317763]
 [-2.99467551 -1.62423302]
 ...
 [-2.99871169  2.12725218]
 [-2.97971666 -3.00111218]
 [ 0.31233414  3.00184258]]
1300 square_1300
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
last std tensor(4.2208e-05, device='cuda:0', dtype=torch.float64)


  0%|          | 0/45 [00:00<?, ?it/s]

[[-2.57826139 -2.9931353 ]
 [-3.00098973  1.12282135]
 [-3.00228268 -1.61155334]
 ...
 [-1.93149914  3.00033266]
 [-3.0005271   2.72631702]
 [-2.99934823  1.95536906]]
1700 square_1700
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
last std tensor(4.2208e-05, device='cuda:0', dtype=torch.float64)


  0%|          | 0/45 [00:00<?, ?it/s]

[[ 1.57293442 -2.99952751]
 [-0.41646793 -3.00097617]
 [-2.33944994  2.99829267]
 ...
 [-3.00077073  0.02159166]
 [ 0.64416453  2.99766105]
 [ 1.04463829 -2.99766895]]
2100 square_2100
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
last std tensor(4.2208e-05, device='cuda:0', dtype=torch.float64)


  0%|          | 0/45 [00:00<?, ?it/s]

[[-3.00068083  0.28152951]
 [ 0.89915467 -3.00166531]
 [ 2.99842278 -2.45978814]
 ...
 [-3.00048949 -0.10187411]
 [-2.86839094  3.00058721]
 [ 2.99763768 -2.33593283]]
2500 square_2500
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
last std tensor(4.2208e-05, device='cuda:0', dtype=torch.float64)


  0%|          | 0/45 [00:00<?, ?it/s]

[[-2.99966079  0.7490067 ]
 [-2.99909234  2.02180291]
 [ 3.00062481 -1.55088826]
 ...
 [ 2.99807289  1.38332462]
 [ 3.00103032 -1.03280146]
 [ 0.79312873 -3.00109854]]
[0.010505383737573482, 0.003032553392020798, 0.0019632330676753185, 0.0018738624552425698, 0.0013212902354555744, 0.0013010420442616244, 0.0012681355097159397]


In [8]:
model_dir = f"exps/{names[-1]}"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
noise_scheduler = ddpm.NoiseScheduler(num_timesteps=1000, beta_schedule='ours')
num_timesteps = len(noise_scheduler.times)
models = [ddpm.MLP(input_dim=1) for _ in range(num_timesteps)]
for t in range(num_timesteps):
    path = model_dir + f"/model{t}.pth"
    models[t].load_state_dict(torch.load(path))
    models[t].to(device)
    models[t].eval()

sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50


In [9]:
curr_stds = torch.sqrt(1 - torch.exp(-2 * noise_scheduler.times))
t = 10
v = curr_stds[t].item()
print(v)
x_scale = np.linspace(-v * 5, v * 5, 1000)
# x_scale = np.linspace(-10, 10, 1000)
inputs = torch.tensor(x_scale, device=device).unsqueeze(1)
times = torch.ones(len(inputs)).to(device) * t
model_residuals = models[t](inputs, times)
true_residuals = inputs / v

0.0003080510901544058


In [10]:
plt.plot(x_scale, model_residuals.data.cpu().numpy(), label='model')
plt.plot(x_scale, true_residuals.data.cpu().numpy(), label='true')
for y in (np.arange(11)-5)*v:
    plt.axvline(y, alpha=1 if y == 0 else 0.2)
plt.legend()
plt.savefig("score.png")
plt.show()

In [14]:
errors = []
for t in range(50):
    print(t)
    v = curr_vars[t].cpu().numpy()
    x_range = np.linspace(-v*5, v*5, 1000)
    diff = x_range[1] - x_range[0]
    l2 = 0
    difc = 0
    pc = 0
    tot = 0
    for i in x_range:
        v = curr_vars[t]
        pdf = norm.pdf(i, 0, v.item())
        model_val = model(torch.tensor([[i]], device=device, dtype=torch.float32), torch.ones(1, device=device, dtype=torch.float32)*t)
        true_val = torch.tensor([[i]], device=device) / torch.sqrt(1 - torch.exp(-2 * noise_scheduler.times[t]))
        error = (model_val.data.cpu().numpy() - true_val.data.cpu().numpy())[0]
        l2 += (error**2)*diff*pdf
        difc += diff
        pc += pdf
        tot += diff*pdf
    print(l2, difc, tot, v.item()*2)
    errors.append(l2*v.item()*v.item())
    # break

0


NameError: name 'curr_vars' is not defined

In [96]:
x_range = np.linspace(-10, 10, 1000)
diff = x_range[1] - x_range[0]
tot = 0
for i in x_range:
    tot += diff * norm.pdf(i, 0, 10)
print(tot)

0.6831737563750486


In [29]:
model_path = "exps/point_1d_smallgamma7300/model.pth"
model = ddpm.MLP(input_dim=1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load(model_path))
model.to(device)
model.eval()
scores = []
inputs = torch.linspace(-10, 10, 1000).unsqueeze(1).to(device)
noise_scheduler = ddpm.NoiseScheduler(num_timesteps=1000, beta_schedule="ours")
print(noise_scheduler.betas.shape)
times = torch.ones(len(inputs), device=device)*9
residuals = model(inputs, times)

MIN GAMMA tensor(3.1610e-06, dtype=torch.float64) LEN 84
torch.Size([84])
