In [2]:
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import torch
import os
from denoising_diffusion_pytorch import Unet1D, GaussianDiffusion1D, Trainer1D, Dataset1D

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
proj_dir = Path("..") / "Data/r77-mini-data-fortnight"
print(proj_dir.joinpath("input"))

data_dir = proj_dir
input_dir = data_dir.joinpath("input")
fixed_input_dir = input_dir.joinpath("fixed")
temporal_input_dir = input_dir.joinpath("temporal")
target_dir = data_dir.joinpath("target")
fixed_target_dir = target_dir.joinpath("fixed")
temporal_target_dir = target_dir.joinpath("temporal")

../Data/r77-mini-data-fortnight/input


In [4]:
files = list(temporal_target_dir.iterdir())

index_dir = Path("..")/"Index"
files_index = list(index_dir.iterdir())
indices = np.load(files_index[0]).squeeze()

#First hour of data
index = indices[0]
tt = np.load(files[index]).squeeze()
tt.shape

(144, 70, 100, 3)

In [5]:

tt_all = np.stack([
    np.transpose(np.load(files[indices[i]]).squeeze(), (0, 2, 3, 1))[:,:,:,:64].reshape(-1, 3, 64)
    for i in range(360)
])

data = tt_all.reshape(-1,3,64)
data.shape

(5184000, 3, 64)

In [6]:
# change these to try different configurations
n_data = data.shape[0] # number of data points 
batch_size = 10 # training batch size for one iteration
dim_data = 64 # dimension of each vector (this has to be 2^n or the Unet will need changing)
n_channels = 3 # number of channels


model = Unet1D(
    dim = dim_data,
    dim_mults = (1, 2, 4, 8),
    channels = n_channels
)


diffusion = GaussianDiffusion1D(
    model,
    seq_length = dim_data,
    timesteps = 100,
    objective = 'pred_v'
)

training_seq =  torch.from_numpy(data)

trainer = Trainer1D(
    diffusion,
    dataset = training_seq,
    train_batch_size = batch_size,
    train_lr = 1e-4,
    train_num_steps = 100000,         # total training steps
    gradient_accumulate_every = 2,    # gradient accumulation steps
    ema_decay = 0.995,                # exponential moving average decay
    amp = True,                       # turn on mixed precision
)
trainer.train()

# after a lot of training

sampled_seq = diffusion.sample(batch_size = 10000)
sampled_seq.shape 

dataloader_config = DataLoaderConfiguration(split_batches=True)
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 123.85it/s]
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 145.39it/s]
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 134.81it/s]
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 144.30it/s]
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 143.66it/s]
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 146.84it/s]
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 143.01it/s]
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 142.99it/s]
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 142.56it/s]
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 143.80it/s]
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 143.93it/s]
sampling loop time step: 100%|██████████| 100/100 [00:00<00:00, 143.56it/s]
sampling loop time step:

training complete


sampling loop time step: 100%|██████████| 100/100 [00:23<00:00,  4.25it/s]


torch.Size([10000, 3, 64])

In [7]:
sample = sampled_seq.cpu().numpy()
folder = 'Samples_2'
if not os.path.exists(folder):
    os.makedirs(folder)

file_path = os.path.join(folder, 'sample.npy')
np.save(file_path, sample)