In [28]:
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
from modules import UNet_conditional
from diffusion import *
from utils import *
from torch.amp import autocast

In [29]:
def predict(model,
            sampler,
            test_dl,
            device,
            n_samples=1):
    """
    Return predictions
    """
    x_real = []
    predictions = []

    model.eval()

    with torch.no_grad():
        for i, data in enumerate(tqdm(test_dl, desc="Testing loop")):
            vectors = data['data'].to(device)
            settings = data['settings'].to(device)

            pred = sampler.ddim_sample_loop(model=model,
                                            y=settings,
                                            cfg_scale=1,
                                            device=device,
                                            eta=1,
                                            n=n_samples
                                            )

            x_real.extend(vectors.cpu().tolist() * n_samples)
            predictions.append(pred.cpu().tolist())

    return x_real, predictions
    

In [30]:
def evaluate(model,
             sampler,
             device,
             test_csv_path,
             n_samples=1):
    """
    Evaluate predictions
    """
    # Load the test dataset
    x_test, y_test = get_data(test_csv_path)

    test_dataset = CustomDataset(x_test, y_test)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=1,
                                 shuffle=False)

    x_real, predictions = predict(model,
                                  sampler,
                                  test_dataloader,
                                  device=device,
                                  n_samples=n_samples)

    # intesities are normalized
    #x_real = [[x * 3925 for x in row] for row in x_real]

    return x_real, predictions

In [31]:
path = "models/test/ema_ckpt.pt"
print("Loading: ", path)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

model = UNet_conditional(length=1024,
                         feat_num=3,
                         device=device).to(device)
ckpt = torch.load(path, map_location=device, weights_only=True)
model.load_state_dict(ckpt)

sampler = SpacedDiffusion(beta_start=1e-4,
                          beta_end=0.02,
                          noise_steps=1000,
                          section_counts=[40],
                          length=1024,
                          device=device,
                          rescale_timesteps=False)

x_real, predictions = evaluate(model,
                      sampler,
                      device,
                      "../data/test_data.csv")
x_real = np.array(x_real)
predictions = np.array(predictions)

mse = np.mean(np.sum((x_real - predictions) ** 2, axis=1))
print(f"Mean test mse error: {mse}")

Loading:  models/test/ema_ckpt.pt
Using device: cuda


Testing loop: 100%|███████████████████████████████████████████████| 1567/1567 [16:21<00:00,  1.60it/s]

Mean test mse error: 219.9922186028118





In [32]:
x_real

array([[0.00061579, 0.        , 0.        , ..., 0.00181409, 0.00107043,
        0.00106402],
       [0.00087057, 0.        , 0.        , ..., 0.00079498, 0.00183476,
        0.00029969],
       [0.00010624, 0.00054038, 0.        , ..., 0.        , 0.00081565,
        0.00106402],
       ...,
       [0.00112534, 0.        , 0.        , ..., 0.00054021, 0.00056087,
        0.00055447],
       [0.00087057, 0.        , 0.        , ..., 0.00155931, 0.00157998,
        0.00029969],
       [0.        , 0.        , 0.        , ..., 0.00079498, 0.0013252 ,
        0.00029969]])

In [33]:
predictions

array([[0.49986938, 0.49974379, 0.49977601, ..., 0.50027573, 0.50066578,
        0.50055599],
       [0.49942249, 0.4983426 , 0.50092518, ..., 0.50267631, 0.50151879,
        0.50277191],
       [0.50021368, 0.4997417 , 0.50045103, ..., 0.50025201, 0.50050372,
        0.50036037],
       ...,
       [0.49993038, 0.49990419, 0.50048947, ..., 0.49979636, 0.49980822,
        0.49937314],
       [0.50249672, 0.49838027, 0.49974307, ..., 0.49700677, 0.50511807,
        0.49888307],
       [0.50058937, 0.50008249, 0.50088292, ..., 0.50117469, 0.50165546,
        0.49977022]])

In [36]:
x_real_str = [','.join(map(str, row)) for row in x_real]
preds_str = [','.join(map(str, row)) for row in predictions]

df = pd.DataFrame({'x_real': x_real_str, 'predictions': preds_str})
df.to_csv('results/predictions/preds_test.csv', index=False)

In [37]:
df.head()

Unnamed: 0,x_real,predictions
0,"0.0006157915922813118,0.0,0.0,0.00149204465560...","0.49986937642097473,0.499743789434433,0.499776..."
1,"0.0008705681539140642,0.0,0.0,0.00174682121723...","0.49942249059677124,0.4983426034450531,0.50092..."
2,"0.0001062385126715526,0.0005403818213380873,0....","0.5002136826515198,0.49974170327186584,0.50045..."
3,"0.0008705681539140642,3.082872717641294e-05,0....","0.4998929500579834,0.5008013844490051,0.498885..."
4,"0.0003610150597523898,0.0,0.0,0.00123726809397...","0.49972549080848694,0.5000240802764893,0.50007..."


In [41]:
# Find indices and values greater than 0.5
indices = np.where(np.any(x_real > 0.5, axis=1))[0]
filtered_values = [x_real[idx][x_real[idx] > 0.5] for idx in indices]
filtered_predictions = [predictions[idx][x_real[idx] > 0.5] for idx in indices]

# Flatten results and take the first 50
flat_filtered_values = np.concatenate(filtered_values)[:50]
flat_filtered_predictions = np.concatenate(filtered_predictions)[:50]

# Print the results side by side
print("Filtered Values (greater than 0.5) from x_real and corresponding Predictions:")
for value, prediction in zip(flat_filtered_values, flat_filtered_predictions):
    print(f"{value:.5f}\t\t\t{prediction:.5f}")

Filtered Values (greater than 0.5) from x_real and corresponding Predictions:
0.51206			0.74380
0.51917			0.74502
0.53315			0.75106
0.50054			0.73277
0.51451			0.74905
0.57714			0.78376
0.62085			0.80965
0.67306			0.83520
0.75889			0.87753
0.82681			0.91355
0.89323			0.94234
0.94675			0.96625
0.98621			0.98562
1.00000			1.00000
1.00000			0.98785
1.00000			0.99698
0.98376			0.97684
0.91490			0.94376
0.86643			0.91961
0.81041			0.89188
0.76278			0.86764
0.68614			0.83288
0.66184			0.81947
0.57601			0.77922
0.53988			0.76191
0.50519			0.78015
0.51203			0.78525
0.53162			0.79211
0.50025			0.77075
0.54682			0.81168
0.58773			0.83272
0.63280			0.85919
0.71023			0.90690
0.77866			0.94408
0.83973			0.97863
0.89197			1.00000
0.93576			1.00000
0.97427			1.00000
0.95795			1.00000
0.98165			1.00000
0.95217			1.00000
0.89197			1.00000
0.85242			0.98056
0.80047			0.94558
0.75615			0.92096
0.69047			0.88026
0.66184			0.86827
0.57448			0.81987
0.53402			0.79769
0.50635			0.74597


In [48]:
# Find indices where predictions > 0.8 and corresponding x_real < 0.5
indices = np.where((predictions > 0.8) & (x_real < 0.1))

# Extract filtered values from x_real and predictions
filtered_x_real = x_real[indices]
filtered_predictions = predictions[indices]

# Print the results side by side
print("Filtered x_real (values < 0.1) and corresponding Predictions (values > 0.8):")
for x_val, pred_val in zip(filtered_x_real.flatten(), filtered_predictions.flatten()):
    print(f"{x_val:.5f}\t\t\t{pred_val:.5f}")

Filtered x_real (values < 0.1) and corresponding Predictions (values > 0.8):
0.00000			0.92265
0.03061			0.84337
0.03801			0.81315
0.06477			0.80533
0.07688			0.82607
0.08723			0.82684
0.09592			0.87914
