In [1]:
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import torch
import os
from denoising_diffusion_pytorch import Unet1D, GaussianDiffusion1D, Trainer1D, Dataset1D

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
proj_dir = Path("..") / "Data/r77-mini-data-fortnight"
print(proj_dir.joinpath("input"))

data_dir = proj_dir
input_dir = data_dir.joinpath("input")
fixed_input_dir = input_dir.joinpath("fixed")
temporal_input_dir = input_dir.joinpath("temporal")
target_dir = data_dir.joinpath("target")
fixed_target_dir = target_dir.joinpath("fixed")
temporal_target_dir = target_dir.joinpath("temporal")

../Data/r77-mini-data-fortnight/input


In [3]:
files = list(temporal_target_dir.iterdir())


index_dir = Path("..")/"Index"
files_index = list(index_dir.iterdir())
indices = np.load(files_index[0]).squeeze()

#First hour of data
index = indices[0]
tt = np.load(files[index]).squeeze()
tt.shape


(144, 70, 100, 3)

In [4]:
files_ti = list(temporal_input_dir.iterdir())

In [5]:
data = []

for j in range(360):
    index = indices[j]

    tt = np.load(files[index]).squeeze()
    tt = np.transpose(tt, (0, 2, 3, 1))[:, :, :, :64]  #shape: (144, 100, 3, 64)

    ti = np.load(files_ti[index]).squeeze()
    ti = np.transpose(ti, (0, 2, 1))[:, :, :64]  #shape: (144, 3, 64)

    #ensure ti has the same second dimension as tt
    ti_expanded = np.repeat(ti[:, None, :, :], tt.shape[1], axis=1)  #shape: (144, 100, 3, 64)

    #calculate diff and reshape
    diff_data = tt - ti_expanded  #shape: (144, 100, 3, 64)
    reshaped_tt = diff_data.reshape(-1, 3, 64)  #shape: (14400, 3, 64)

    data.append(reshaped_tt)


data = np.concatenate(data).reshape(-1, 3, 64)



In [6]:
data.shape

(5184000, 3, 64)

In [7]:
def normalise(vector):
    min_val = np.min(vector)
    max_val = np.max(vector)
    normalised_vector = (vector - min_val) / (max_val - min_val)
    return normalised_vector, min_val, max_val

In [8]:
training_data = normalise(data)[0]

In [9]:
model = Unet1D(
    dim = 64,
    dim_mults = (1, 2, 4, 8),
    channels = 3
)

diffusion = GaussianDiffusion1D(
    model,
    seq_length = 64,
    timesteps = 100,
    objective = 'pred_v'
)

training_seq =  torch.from_numpy(training_data)

trainer = Trainer1D(
    diffusion,
    dataset = training_seq,
    train_batch_size = 10, #set batch size here (take 100 samples, one grid)
    train_lr = 1e-4,
    train_num_steps = 1000,         # total training steps (1000)
    gradient_accumulate_every = 2,    # gradient accumulation steps
    ema_decay = 0.995,                # exponential moving average decay
    amp = True,                       # turn on mixed precision
)
trainer.train()

# after a lot of training

sampled_diff_seq = diffusion.sample(batch_size = 10000)
sampled_diff_seq.shape

dataloader_config = DataLoaderConfiguration(split_batches=True)
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 133.44it/s]
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 146.90it/s]
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 139.98it/s]
loss: 0.0064: 100%|██████████| 1000/1000 [00:57<00:00, 17.37it/s]


training complete


sampling loop time step: 100%|██████████| 100/100 [00:23<00:00,  4.26it/s]


torch.Size([10000, 3, 64])

In [11]:
sampled_diff_seq = diffusion.sample(batch_size = 10000)

sampling loop time step: 100%|██████████| 100/100 [00:23<00:00,  4.26it/s]


In [12]:
sample = sampled_diff_seq.cpu().numpy()
folder = 'Sample_diffs_B'
if not os.path.exists(folder):
    os.makedirs(folder)

file_path = os.path.join(folder, 'sample.npy')
np.save(file_path, sample)