In [1]:
!pip install scipy==1.11.1

Collecting scipy==1.11.1
  Downloading scipy-1.11.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.3/36.3 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: scipy
  Attempting uninstall: scipy
    Found existing installation: scipy 1.11.4
    Uninstalling scipy-1.11.4:
      Successfully uninstalled scipy-1.11.4
Successfully installed scipy-1.11.1


In [2]:
from keras.layers import Input, Dense, Conv2D, Flatten, Reshape, Conv2DTranspose
from keras.models import Model
from keras.datasets import mnist
from keras.utils import to_categorical
from keras import backend as K
from keras.callbacks import Callback
import tensorflow as tf

from scipy.stats import entropy
from scipy.linalg import sqrtm
from scipy.stats import pearsonr

from keras.applications.inception_v3 import InceptionV3

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.models import load_model

import cv2

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy.stats import entropy
from scipy.spatial.distance import cosine

import tensorflow as tf
from tensorflow.keras.applications.inception_v3 import InceptionV3
from scipy.linalg import sqrtm

from tqdm import tqdm


In [3]:
import random

seed = 42

# Python's built-in random library
random.seed(seed)

# Scikit-learn (only affects some parts of scikit-learn)
from sklearn.utils import check_random_state
check_random_state(seed)

np.random.seed(seed)
tf.random.set_seed(seed)
tf.keras.utils.set_random_seed(seed)

In [4]:
img_rows = 168
img_cols = 168
channels = 1

# Input image dimensions
img_shape = (img_rows, img_cols, channels)

In [5]:
# Set path for data source
import os
from google.colab import drive
drive.mount('/content/gdrive')
os.chdir("/content/gdrive/My Drive/Load Diffusion/dataset/")

Mounted at /content/gdrive


In [6]:
# Set the path to the folder containing saved files
load_path = "./preprocessed_data_168x168_2years"

# Load saved NumPy arrays
X_train = np.load(os.path.join(load_path, 'X_train.npy'))
X_test = np.load(os.path.join(load_path, 'X_test.npy'))
y_train = np.load(os.path.join(load_path, 'y_train.npy'))
y_test = np.load(os.path.join(load_path, 'y_test.npy'))

# Load saved Pandas DataFrames
train_data = pd.read_pickle(os.path.join(load_path, 'train_data.pkl'))
test_data = pd.read_pickle(os.path.join(load_path, 'test_data.pkl'))
metadata = pd.read_pickle(os.path.join(load_path, 'metadata.pkl'))
metadata_original = pd.read_pickle(os.path.join(load_path, 'metadata_original.pkl'))

In [7]:
def merge_dummified_columns(df, dummified_columns, new_column_name):
    """
    Merges dummified columns into a single column in a DataFrame.

    Parameters:
        df (pd.DataFrame): The DataFrame containing the dummified columns.
        dummified_columns (list): A list of the names of the dummified columns.
        new_column_name (str): The name of the new column to create.

    Returns:
        pd.DataFrame: The DataFrame with the dummified columns merged into a single new column.
    """
    # Create a new column containing the column name where the value is 1 for each row
    df[new_column_name] = df[dummified_columns].idxmax(axis=1)

    # Drop the dummified columns
    df.drop(dummified_columns, axis=1, inplace=True)

    return df

test_metadata = metadata_original.loc[test_data.columns]

test_metadata = merge_dummified_columns(test_metadata,
                     dummified_columns=['chilledwater', 'electricity', 'gas', 'hotwater', 'steam'],
                     new_column_name='meter_type')
test_metadata = merge_dummified_columns(test_metadata,
                     dummified_columns=['Education', 'Entertainment/public assembly', 'Lodging/residential', 'Office', 'Public services'],
                     new_column_name='building_type')

test_metadata


Unnamed: 0,lat,lng,year,meter_type,building_type
electricity_Lamb_office_Caitlin_2016,51.497838,-3.186246,2016,electricity,Office
electricity_Rat_public_Kelle_2016,38.903504,-77.005349,2016,electricity,Public services
electricity_Rat_assembly_Ezequiel_2016,38.903504,-77.005349,2016,electricity,Entertainment/public assembly
electricity_Bear_assembly_Roxy_2016,37.871903,-122.260729,2016,electricity,Entertainment/public assembly
gas_Shrew_office_Rose_2017,51.499840,-0.124663,2017,gas,Office
...,...,...,...,...,...
steam_Moose_education_Omar_2016,45.421500,-75.697200,2016,steam,Education
electricity_Rat_public_Chrissy_2017,38.903504,-77.005349,2017,electricity,Public services
electricity_Hog_office_Merilyn_2016,44.978782,-93.255398,2016,electricity,Office
steam_Bull_assembly_Amalia_2016,30.267200,-97.743100,2016,steam,Entertainment/public assembly


In [8]:
def calculate_rmse(true_data, predicted_data):
    return np.sqrt(mean_squared_error(true_data, predicted_data))

def calculate_mae(true_data, predicted_data):
    return mean_absolute_error(true_data, predicted_data)

def calculate_r2(true_data, predicted_data):
    return r2_score(true_data, predicted_data)

def calculate_kl_divergence(true_data, predicted_data):
    return entropy(true_data+0.00000001, predicted_data+0.00000001)

def calculate_cosine_similarity(true_data, predicted_data):
    return 1 - cosine(true_data, predicted_data)

In [9]:
def evaluate_model(test_data, generated_series):
    metrics_result = {
        'rmse': [],
        'mae': [],
        'r2': [],
        'kl_divergence': [],
        'cosine_similarity': [],
        'pearson_correlation': []
    }

    for i in range(test_data.shape[1]):
        try:
            true_data = test_data[:, i]
            predicted_data = generated_series[:, i]
        except:
            true_data = test_data.values[:, i]
            predicted_data = generated_series.values[:, i]

        # Calculate Pearson correlation
        pearson_corr, _ = pearsonr(true_data, predicted_data)

        metrics_result['rmse'].append(calculate_rmse(true_data, predicted_data))
        metrics_result['mae'].append(calculate_mae(true_data, predicted_data))
        metrics_result['r2'].append(calculate_r2(true_data, predicted_data))
        metrics_result['kl_divergence'].append(calculate_kl_divergence(true_data, predicted_data))
        metrics_result['cosine_similarity'].append(calculate_cosine_similarity(true_data, predicted_data))
        metrics_result['pearson_correlation'].append(pearson_corr)

    return metrics_result

# Function to calculate FID between two sets of images
def calculate_fid(model, images1, images2):
    # Calculate activations
    act1 = model.predict(images1)
    act2 = model.predict(images2)

    # Calculate mean and covariance statistics
    mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
    mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)

    # Calculate sum squared difference between means
    ssdiff = np.sum((mu1 - mu2)**2.0)

    # Calculate sqrt of product between covariances
    covmean = sqrtm(sigma1.dot(sigma2))

    # Check and correct imaginary numbers from sqrt
    if np.iscomplexobj(covmean):
        covmean = covmean.real

    # Calculate the FID score
    fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
    return fid

In [10]:
test_metadata.pivot_table(index=['lat'],aggfunc='count')

Unnamed: 0_level_0,building_type,lng,meter_type,year
lat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
28.517689,9,9,9,9
30.2672,143,143,143,143
33.424425,144,144,144,144
37.406733,115,115,115,115
37.871903,27,27,27,27
38.903504,135,135,135,135
40.35,24,24,24,24
44.978782,135,135,135,135
45.4215,18,18,18,18
51.497838,90,90,90,90


In [11]:
test_metadata.pivot_table(index=['building_type'],aggfunc='count')

Unnamed: 0_level_0,lat,lng,meter_type,year
building_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Education,404,404,404,404
Entertainment/public assembly,115,115,115,115
Lodging/residential,94,94,94,94
Office,212,212,212,212
Public services,76,76,76,76


In [12]:
test_metadata.pivot_table(index=['meter_type'],aggfunc='count')

Unnamed: 0_level_0,building_type,lat,lng,year
meter_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
chilledwater,199,199,199,199
electricity,447,447,447,447
gas,75,75,75,75
hotwater,68,68,68,68
steam,112,112,112,112


In [13]:
agg_col_list = ['lat', 'building_type', 'meter_type']

# Initialize an empty DataFrame to store metrics
df_metrics = []

In [14]:
test_data = (test_data+1)/2

In [15]:
test_data.shape

(8736, 901)

In [16]:
test_data.describe()

Unnamed: 0,electricity_Lamb_office_Caitlin_2016,electricity_Rat_public_Kelle_2016,electricity_Rat_assembly_Ezequiel_2016,electricity_Bear_assembly_Roxy_2016,gas_Shrew_office_Rose_2017,electricity_Lamb_assembly_Cherie_2017,electricity_Rat_education_Nellie_2017,chilledwater_Bull_education_Miquel_2017,hotwater_Fox_lodging_Stephen_2016,electricity_Rat_assembly_Damaris_2017,...,chilledwater_Moose_education_Abbie_2017,electricity_Eagle_education_Teresa_2017,chilledwater_Hog_public_Crystal_2016,steam_Hog_office_Garrett_2016,electricity_Lamb_assembly_Elinor_2017,steam_Moose_education_Omar_2016,electricity_Rat_public_Chrissy_2017,electricity_Hog_office_Merilyn_2016,steam_Bull_assembly_Amalia_2016,electricity_Rat_office_Jeannie_2017
count,8736.0,8736.0,8736.0,8736.0,8736.0,8736.0,8736.0,8736.0,8736.0,8736.0,...,8736.0,8736.0,8736.0,8736.0,8736.0,8736.0,8736.0,8736.0,8736.0,8736.0
mean,0.188451,0.348453,0.258079,0.173391,0.091892,0.25077,0.337465,0.39597,0.162933,0.294121,...,0.170633,0.428276,0.243286,0.215425,0.137308,0.291646,0.33038,0.420632,0.373292,0.401658
std,0.237092,0.173016,0.202699,0.193813,0.207332,0.306181,0.210603,0.24746,0.256923,0.231476,...,0.21495,0.171506,0.352709,0.244234,0.209695,0.28092,0.155869,0.170919,0.160209,0.183267
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.043682,0.206243,0.100734,0.057022,0.0,0.0,0.177187,0.180456,0.0,0.118665,...,0.023933,0.306398,0.0,0.005794,0.0,0.023153,0.209051,0.258872,0.265928,0.281406
50%,0.069787,0.337544,0.201469,0.092661,0.0,0.000353,0.247577,0.38627,0.076923,0.2089,...,0.0574,0.418361,0.0,0.132097,0.000377,0.244535,0.301679,0.44764,0.362881,0.374426
75%,0.21957,0.438935,0.355357,0.206706,0.0,0.510895,0.44603,0.607443,0.153846,0.42398,...,0.293706,0.533525,0.597015,0.348783,0.24707,0.565617,0.437118,0.552548,0.462604,0.507801
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [17]:
model_name = 'CVAE_mse'
generation_path = "./generation/" + model_name + '_168x168'

file_name = os.listdir(generation_path)[2]

# Load the dataset
generated_images = np.load(os.path.join(generation_path, file_name))

print(generated_images.shape)
print(generated_images.max())
print(generated_images.min())

(901, 168, 168, 1)
0.86604565
0.001584873


In [18]:
model_name = 'CGAN'
generation_path = "./generation/" + model_name + '_168x168'

file_name = os.listdir(generation_path)[0]

# Load the dataset
generated_images = np.load(os.path.join(generation_path, file_name))

print(generated_images.shape)
print(generated_images.max())
print(generated_images.min())

(901, 168, 168, 1)
0.99997425
0.0


In [19]:
model_name = 'diffusion_model'
generation_path = "./generation/" + model_name + '_168x168'

file_name = os.listdir(generation_path)[0]

# Load the dataset
generated_images = np.load(os.path.join(generation_path, file_name))

print(generated_images.shape)
print(generated_images.max())
print(generated_images.min())

(901, 168, 168, 1)
1.4020152
-0.16470277


In [20]:
test_data.max()

electricity_Lamb_office_Caitlin_2016      1.0
electricity_Rat_public_Kelle_2016         1.0
electricity_Rat_assembly_Ezequiel_2016    1.0
electricity_Bear_assembly_Roxy_2016       1.0
gas_Shrew_office_Rose_2017                1.0
                                         ... 
steam_Moose_education_Omar_2016           1.0
electricity_Rat_public_Chrissy_2017       1.0
electricity_Hog_office_Merilyn_2016       1.0
steam_Bull_assembly_Amalia_2016           1.0
electricity_Rat_office_Jeannie_2017       1.0
Length: 901, dtype: float64

In [21]:
for model_name in ['CVAE_mse','CGAN','diffusion_model']:

  generation_path = "./generation/" + model_name + '_168x168'

  file_name = os.listdir(generation_path)[0]

  # Load the dataset
  generated_images = np.load(os.path.join(generation_path, file_name))

  #generated_images = (generated_images - generated_images.min())/(generated_images.max() - generated_images.min())

  # Set the target shape
  target_shape = (generated_images.shape[0], 52, 168, 1)

  # Initialize an array to hold the resized images
  resized_images = np.zeros(target_shape)

  # Loop through each image to resize it
  for i in range(generated_images.shape[0]):
      resized_image = cv2.resize(generated_images[i], (168, 52))  # Note the dimensions are (width, height)
      resized_images[i] = np.expand_dims(resized_image, axis=-1)  # Add back the last dimension

  # Assume resized_images has a shape of (generated_images.shape[0], 52, 168, 1)
  flattened_images = resized_images.reshape(generated_images.shape[0], -1).T  # Transpose to get shape (8736, generated_images.shape[0])

  generated_series = pd.DataFrame(flattened_images)
  generated_series.columns = test_data.columns

  generated_series = (generated_series-generated_series.min())/(generated_series.max()-generated_series.min())
  test_data = (test_data-test_data.min())/(test_data.max()-test_data.min())

  test_data_resampled = test_data.rolling(24).mean()[23::24]
  generated_series_resampled = generated_series.rolling(24).mean()[23::24]

  # Prepare the Inception v3 model
  model = InceptionV3(include_top=False, pooling='avg', weights="imagenet", input_shape=(99,99,3))

  # Convert to RGB and upscale to the necessary input size for the Inception model
  X_test_rgb = tf.image.grayscale_to_rgb(tf.convert_to_tensor(X_test))
  generated_images_rgb = tf.image.grayscale_to_rgb(tf.convert_to_tensor(generated_images))

  X_test_upscaled = tf.image.resize(X_test_rgb, [99, 99])
  generated_images_upscaled = tf.image.resize(generated_images_rgb, [99, 99])

  for agg_col in agg_col_list:

    agg_items = test_metadata[agg_col].unique()
    for agg_item in agg_items:
      print(agg_item)

      filtered_meters = test_metadata[test_metadata[agg_col]==agg_item].index

      test_data_subset = test_data[filtered_meters]
      generated_series_subset = generated_series[filtered_meters]

      X_test_upscaled_subset = X_test_upscaled[(test_metadata[agg_col]==agg_item).values]
      generated_images_upscaled_subset = generated_images_upscaled[(test_metadata[agg_col]==agg_item).values]

      evaluation_results = evaluate_model(test_data_subset, generated_series_subset)
      df_evaluation_results = pd.DataFrame(evaluation_results)

      # Calculate FID
      fid_score = calculate_fid(model, X_test_upscaled_subset, generated_images_upscaled_subset)
      #print('FID: '+str(fid_score))

      metrics = df_evaluation_results.mean()
      metrics['fid_score'] = fid_score
      metrics['model'] = model_name
      metrics['agg_col'] = agg_col
      metrics['agg_item'] = agg_item
      metrics = metrics.to_frame().T
      print(metrics.to_dict())

      # Append the metrics to df_metrics
      df_metrics.append(metrics)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
51.49783804108846
{'rmse': {0: 0.28107568269281613}, 'mae': {0: 0.22951228222241957}, 'r2': {0: -1.2914098367938216}, 'kl_divergence': {0: 0.8051318919814264}, 'cosine_similarity': {0: 0.6654347914513583}, 'pearson_correlation': {0: 0.4511816970589398}, 'fid_score': {0: 2239.6571797366596}, 'model': {0: 'CVAE_mse'}, 'agg_col': {0: 'lat'}, 'agg_item': {0: 51.49783804108846}}
38.9035039800032
{'rmse': {0: 0.24360670144451485}, 'mae': {0: 0.19422586761006863}, 'r2': {0: -0.8464013610939257}, 'kl_divergence': {0: 0.2859243179565129}, 'cosine_similarity': {0: 0.8128212306825693}, 'pearson_correlation': {0: 0.32023428442476515}, 'fid_score': {0: 1130.7965045206238}, 'model': {0: 'CVAE_mse'}, 'agg_col': {0: 'lat'}, 'agg_item': {0: 38.9035039800032}}
37.87190340000004
{'rmse': {0: 0.22446623159094295}, 'mae': {0: 0.18143733190147182}, 'r2': {0

In [22]:
df_metrics = pd.concat(df_metrics, ignore_index=True)
df_metrics

Unnamed: 0,rmse,mae,r2,kl_divergence,cosine_similarity,pearson_correlation,fid_score,model,agg_col,agg_item
0,0.281076,0.229512,-1.29141,0.805132,0.665435,0.451182,2239.65718,CVAE_mse,lat,51.497838
1,0.243607,0.194226,-0.846401,0.285924,0.812821,0.320234,1130.796505,CVAE_mse,lat,38.903504
2,0.224466,0.181437,-0.484201,0.225437,0.855348,0.513794,1423.362447,CVAE_mse,lat,37.871903
3,0.311083,0.263765,-0.780368,1.005591,0.618827,0.271676,4184.53405,CVAE_mse,lat,51.49984
4,0.238333,0.194005,-0.799944,0.290366,0.822118,0.344905,1135.184288,CVAE_mse,lat,30.2672
...,...,...,...,...,...,...,...,...,...,...
64,0.229682,0.18662,-0.794682,0.235104,0.8475,0.386893,612.868489,diffusion_model,meter_type,electricity
65,0.319737,0.267946,-1.601893,0.923381,0.631253,0.379522,1674.711799,diffusion_model,meter_type,gas
66,0.240481,0.197198,-8.304,0.424493,0.820654,0.582405,669.346417,diffusion_model,meter_type,chilledwater
67,0.296477,0.248466,-1.640299,0.709824,0.684645,0.253114,1275.897124,diffusion_model,meter_type,hotwater


In [23]:
df_metrics.pivot_table(index='model',values=['fid_score','pearson_correlation','mae','rmse','kl_divergence'])

Unnamed: 0_level_0,fid_score,kl_divergence,mae,pearson_correlation,rmse
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CGAN,1557.488194,0.978266,0.209703,0.330364,0.270908
CVAE_mse,1537.83436,0.50503,0.215926,0.343358,0.263976
diffusion_model,1232.119437,0.449701,0.208737,0.41292,0.254084


In [24]:
generation_path = "./evaluations"
df_metrics.to_csv(os.path.join(generation_path, 'updated_All_model_metrics_agg_by_meta_168x168.csv'),index=False)