In [22]:
import math

import matplotlib.pyplot as plt
import numpy as np
import torch
from tqdm.auto import tqdm
from scipy.stats import norm
import matplotlib as mpl
mpl.use("TkAgg")
import ddpm
import datasets

In [23]:
sizes = list(range(1300, 5000, 400))
sizes = [1300, 1700, 2100]
sizes = [2500]
names = [f"point_1d{s}" for s in sizes]

In [24]:
for i in range(len(sizes)):
    print(sizes[i])
    !python ddpm.py --dataset point1d --experiment_name {names[i]} --num_epochs 70 --dataset_size {sizes[i]} --dimension 1 --beta_schedule ours --num_timesteps 1000

2500
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
Training model 0...
END LOSS WAS 0.7904228160643548
Saving model...
Training model 1...
END LOSS WAS 0.9619184204891211
Saving model...
Training model 2...
END LOSS WAS 0.7841194233993087
Saving model...
Training model 3...
END LOSS WAS 0.7899011020909297
Saving model...
Training model 4...
END LOSS WAS 1.419254871508043
Saving model...
Training model 5...
END LOSS WAS 0.44902069010219103
Saving model...
Training model 6...
END LOSS WAS 0.4085136009128901
Saving model...
Training model 7...
END LOSS WAS 0.17906562695219597
Saving model...
Training model 8...
END LOSS WAS 0.0013621787821117208
Saving model...
Training model 9...
END LOSS WAS 0.008133978391982084
Saving model...
Training model 10...
END LOSS WAS 0.025939876999114556
Saving model...
Training model 11...
END LOSS WAS 0.035207026760656714
Saving model...
Training model 12...
END LOSS WAS 0.015552954958675865
Saving model...
Training model 13...
END LOSS WAS 0.00

In [29]:
def calculate_stats(model_dir, dataset='point', score='model'):
    num_scheduler_timesteps = 1000
    noise_scheduler = ddpm.NoiseScheduler(num_timesteps=num_scheduler_timesteps, beta_schedule='ours')
    print(len(noise_scheduler.times))
    num_timesteps = len(noise_scheduler)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    models = [ddpm.MLP(input_dim=1) for _ in range(num_timesteps)]
    for t in range(num_timesteps):
        path = model_dir + f"/model{t}.pth"
        models[t].load_state_dict(torch.load(path))
        models[t].to(device)
        models[t].eval()
    eval_batch_size = 10000
    plot_step = 1
    num_timesteps = len(noise_scheduler.betas)
    curr_vars = torch.sqrt(1 - torch.exp(-2 * noise_scheduler.times))
    sample = torch.randn(eval_batch_size*2, 1).to(device)[eval_batch_size:, :]
    timesteps = list(range(num_timesteps))[::-1][:-1]
    samples = []
    steps = []
    for i, t in enumerate(tqdm(timesteps)):
        t_int = t
        t = torch.from_numpy(np.repeat(t, eval_batch_size)).long().to(device)
        with torch.no_grad():
            variance = torch.sqrt(1 - noise_scheduler.alphas_cumprod[t])
            v = curr_vars[t].cpu().numpy()
            if score == 'model':
                residual = models[t_int](sample, t)
            else:
                residual = sample / variance
        sample = noise_scheduler.step(residual, t[0], sample)
        if (i + 1) % plot_step == 0:
            sample_cpu = sample.cpu()
            samples.append(sample_cpu.numpy())
            steps.append(i + 1)
    if dataset == 'ret':
        return samples
    elif dataset == 'square':
        return process_point(samples[-1], mode='median')
    elif dataset == 'point':
        m = process_point(samples[-1], mode='median')
        # print("SCORE AT MEAN", models[0](torch.tensor([[m]], device=device), device=device))
        return process_point(samples[-1], mode='median')
    else:
        raise ValueError("INVALID DATASET")

def process_square(samples, r=3):
    t = 0
    d = []
    for s in samples:
        dists = [abs(s[0] - r), abs(s[0] + r), abs(s[1] - r), abs(s[1] + r)]
        m = min(dists)
        d.append(m)
    # d = sorted(d)[:-round(len(d)*0.02)]
    if mode == 'median':
        return sorted(d)[len(d)//2]
    for m in d:
        t += m**2
    t /= len(d)
    return np.sqrt(t)
def process_point(samples, mode='median'):
    return np.median(samples.squeeze())
    # print("MEAN", )
    # return np.median(samples.squeeze())
    # centered = samples.squeeze() - np.mean(samples.squeeze())
    # print(np.mean(samples.squeeze()))
    # return np.median(centered)

In [30]:
s = []
for i in range(len(sizes)):
    print(sizes[i], names[i])
    s.append(np.abs(calculate_stats(f"exps/{names[i]}", dataset='square', score='model')))
print(s)
plt.clf()
plt.scatter(sizes, np.array(s))
plt.yscale('log')
plt.title("median error vs dataset size")
plt.ylabel("median error")
plt.xlabel("dataset size")
plt.savefig(f'static/devs_point.png', bbox_inches='tight')

plt.show()

2500 point_1d2500
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
50


  0%|          | 0/49 [00:00<?, ?it/s]

[9.869145078009969e-06]


In [12]:
s = []
for i in range(len(sizes)):
    print(sizes[i], names[i])
    s.append(np.abs(calculate_stats(f"exps/{names[i]}", dataset='point', score='true')))
print(s)
plt.clf()
plt.scatter(sizes, np.array(s))
plt.yscale('log')
plt.title("median error vs dataset size")
plt.ylabel("median error")
plt.xlabel("dataset size")
plt.savefig(f'static/devs_point.png', bbox_inches='tight')

plt.show()

100 point_1d_100
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
50
torch.Size([100000, 1])
[49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


  0%|          | 0/50 [00:00<?, ?it/s]

500 point_1d_500
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
50
torch.Size([100000, 1])
[49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


  0%|          | 0/50 [00:00<?, ?it/s]

900 point_1d_900
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
50
torch.Size([100000, 1])
[49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


  0%|          | 0/50 [00:00<?, ?it/s]

1300 point_1d_1300
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
50
torch.Size([100000, 1])
[49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


  0%|          | 0/50 [00:00<?, ?it/s]

1700 point_1d_1700
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
50
torch.Size([100000, 1])
[49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


  0%|          | 0/50 [00:00<?, ?it/s]

2100 point_1d_2100
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
50
torch.Size([100000, 1])
[49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


  0%|          | 0/50 [00:00<?, ?it/s]

2500 point_1d_2500
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
50
torch.Size([100000, 1])
[49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


  0%|          | 0/50 [00:00<?, ?it/s]

2900 point_1d_2900
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
50
torch.Size([100000, 1])
[49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


  0%|          | 0/50 [00:00<?, ?it/s]

3300 point_1d_3300
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
50
torch.Size([100000, 1])
[49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


  0%|          | 0/50 [00:00<?, ?it/s]

3700 point_1d_3700
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
50
torch.Size([100000, 1])
[49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


  0%|          | 0/50 [00:00<?, ?it/s]

4100 point_1d_4100
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
50
torch.Size([100000, 1])
[49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


  0%|          | 0/50 [00:00<?, ?it/s]

4500 point_1d_4500
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
50
torch.Size([100000, 1])
[49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


  0%|          | 0/50 [00:00<?, ?it/s]

4900 point_1d_4900
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50
50
torch.Size([100000, 1])
[49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


  0%|          | 0/50 [00:00<?, ?it/s]

[0.22580300421758892, 0.005846942787256269, 0.00625606872608015, 0.0002929405931876405, 0.0023586053415945193, 0.0018379001564902096, 0.0032048560584663727, 0.002909046204849555, 0.0001022496828747902, 0.001387017701437943, 0.0001644522932970577, 0.0006371301944660385, 0.0020390610085395]


In [9]:
print(names[-1])
model_dir = f"exps/{names[-1]}"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
noise_scheduler = ddpm.NoiseScheduler(num_timesteps=1000, beta_schedule='ours')
num_timesteps = len(noise_scheduler.times)
models = [ddpm.MLP(input_dim=1) for _ in range(num_timesteps)]
for t in range(num_timesteps):
    path = model_dir + f"/model{t}.pth"
    models[t].load_state_dict(torch.load(path))
    models[t].to(device)
    models[t].eval()

point_1d2100
sMIN GAMMA tensor(4.3165e-06, dtype=torch.float64) LEN 50


In [10]:
curr_stds = torch.sqrt(1 - torch.exp(-2 * noise_scheduler.times))
t = 1
v = curr_stds[t].item()
print(v)
x_scale = np.linspace(-v * 5, v * 5, 1000)
# x_scale = np.linspace(-10, 10, 1000)
inputs = torch.tensor(x_scale, device=device).unsqueeze(1)
times = torch.ones(len(inputs)).to(device) * t
model_residuals = models[t](inputs, times)
true_residuals = inputs / v

7.73468595888991e-06


In [11]:
plt.plot(x_scale, model_residuals.data.cpu().numpy(), label='model')
plt.plot(x_scale, true_residuals.data.cpu().numpy(), label='true')
for y in (np.arange(11)-5)*v:
    plt.axvline(y, alpha=1 if y == 0 else 0.2)
plt.legend()
plt.savefig("score.png")
plt.show()

In [19]:
errors = []
for t in range(50):
    print(t)
    v = curr_stds[t].cpu().numpy()
    x_range = np.linspace(-v*5, v*5, 1000)
    diff = x_range[1] - x_range[0]
    l2 = 0
    for i in x_range:
        pdf = norm.pdf(i, 0, v.item())
        model_val = models[t](torch.tensor([[i]], device=device, dtype=torch.float32), torch.ones(1, device=device, dtype=torch.float32)*t)
        true_val = torch.tensor([[i]], device=device) / torch.sqrt(1 - torch.exp(-2 * noise_scheduler.times[t]))
        error = (model_val.data.cpu().numpy() - true_val.data.cpu().numpy())[0]
        l2 += (error**2)*diff*pdf
    print(l2*(v.item()**4))
    errors.append(l2*v.item()*v.item()*v.item()*v.item())
print(errors)

0
[3.47192889e-22]
1
[9.51978681e-21]
2
[1.17412671e-22]
3
[5.12170362e-21]
4
[1.29290347e-20]
5
[1.62810439e-20]
6
[3.08192374e-19]
7
[9.88568862e-19]
8
[5.68421272e-20]
9
[8.46656761e-22]
10
[4.3766878e-21]
11
[1.70555658e-16]
12
[2.08086225e-15]
13
[1.5645525e-14]
14
[2.36461162e-14]
15
[1.61084977e-15]
16
[2.71470376e-16]
17
[2.75057039e-15]
18
[4.97358914e-14]
19
[5.63298972e-13]
20
[2.02711878e-10]
21
[2.77407705e-11]
22
[1.27654733e-09]
23
[6.19178477e-10]
24
[1.22727453e-09]
25
[1.05412133e-08]
26
[4.83867935e-07]
27
[1.39655499e-06]
28
[1.05489044e-06]
29
[3.81663789e-06]
30
[4.89464249e-06]
31
[0.00011548]
32
[0.00031512]
33
[0.00012894]
34
[0.00023664]
35
[0.0001182]
36
[0.00046972]
37
[0.00042685]
38
[0.00017074]
39
[0.00041509]
40
[0.00162259]
41
[0.00321515]
42
[0.00123136]
43
[0.00029006]
44
[0.00017282]
45


KeyboardInterrupt: 

In [96]:
x_range = np.linspace(-10, 10, 1000)
diff = x_range[1] - x_range[0]
tot = 0
for i in x_range:
    tot += diff * norm.pdf(i, 0, 10)
print(tot)

0.6831737563750486


In [29]:
model_path = "exps/point_1d_smallgamma7300/model.pth"
model = ddpm.MLP(input_dim=1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load(model_path))
model.to(device)
model.eval()
scores = []
inputs = torch.linspace(-10, 10, 1000).unsqueeze(1).to(device)
noise_scheduler = ddpm.NoiseScheduler(num_timesteps=1000, beta_schedule="ours")
print(noise_scheduler.betas.shape)
times = torch.ones(len(inputs), device=device)*9
residuals = model(inputs, times)

MIN GAMMA tensor(3.1610e-06, dtype=torch.float64) LEN 84
torch.Size([84])
