In [1]:
model_name = 'CGAN'

In [2]:
!pip install scipy==1.11.1



In [3]:
from keras.layers import Input, Dense, Conv2D, Flatten, Reshape, Conv2DTranspose
from keras.models import Model
from keras.datasets import mnist
from keras.utils import to_categorical
from keras import backend as K
from keras.callbacks import Callback
import tensorflow as tf

from scipy.stats import entropy
from scipy.linalg import sqrtm
from scipy.stats import pearsonr

from keras.applications.inception_v3 import InceptionV3

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.models import load_model

import cv2

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy.stats import entropy
from scipy.spatial.distance import cosine

import tensorflow as tf
from tensorflow.keras.applications.inception_v3 import InceptionV3
from scipy.linalg import sqrtm

In [4]:
import random

seed = 42

# Python's built-in random library
random.seed(seed)

# Scikit-learn (only affects some parts of scikit-learn)
from sklearn.utils import check_random_state
check_random_state(seed)

np.random.seed(seed)
tf.random.set_seed(seed)
tf.keras.utils.set_random_seed(seed)

In [5]:
img_rows = 168
img_cols = 168
channels = 1

# Input image dimensions
img_shape = (img_rows, img_cols, channels)

In [6]:
# Set path for data source
import os
from google.colab import drive
drive.mount('/content/gdrive')
os.chdir("/content/gdrive/My Drive/Load Diffusion/dataset/")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [7]:
# Set the path to the folder containing saved files
load_path = "./preprocessed_data_168x168_2years"

# Load saved NumPy arrays
X_train = np.load(os.path.join(load_path, 'X_train.npy'))
X_test = np.load(os.path.join(load_path, 'X_test.npy'))
y_train = np.load(os.path.join(load_path, 'y_train.npy'))
y_test = np.load(os.path.join(load_path, 'y_test.npy'))

# Load saved Pandas DataFrames
train_data = pd.read_pickle(os.path.join(load_path, 'train_data.pkl'))
test_data = pd.read_pickle(os.path.join(load_path, 'test_data.pkl'))
metadata = pd.read_pickle(os.path.join(load_path, 'metadata.pkl'))
metadata_original = pd.read_pickle(os.path.join(load_path, 'metadata_original.pkl'))

In [8]:
def merge_dummified_columns(df, dummified_columns, new_column_name):
    """
    Merges dummified columns into a single column in a DataFrame.

    Parameters:
        df (pd.DataFrame): The DataFrame containing the dummified columns.
        dummified_columns (list): A list of the names of the dummified columns.
        new_column_name (str): The name of the new column to create.

    Returns:
        pd.DataFrame: The DataFrame with the dummified columns merged into a single new column.
    """
    # Create a new column containing the column name where the value is 1 for each row
    df[new_column_name] = df[dummified_columns].idxmax(axis=1)

    # Drop the dummified columns
    df.drop(dummified_columns, axis=1, inplace=True)

    return df

test_metadata = metadata_original.loc[test_data.columns]

test_metadata = merge_dummified_columns(test_metadata,
                     dummified_columns=['chilledwater', 'electricity', 'gas', 'hotwater', 'steam'],
                     new_column_name='meter_type')
test_metadata = merge_dummified_columns(test_metadata,
                     dummified_columns=['Education', 'Entertainment/public assembly', 'Lodging/residential', 'Office', 'Public services'],
                     new_column_name='building_type')

test_metadata


Unnamed: 0,lat,lng,year,meter_type,building_type
electricity_Lamb_office_Caitlin_2016,51.497838,-3.186246,2016,electricity,Office
electricity_Rat_public_Kelle_2016,38.903504,-77.005349,2016,electricity,Public services
electricity_Rat_assembly_Ezequiel_2016,38.903504,-77.005349,2016,electricity,Entertainment/public assembly
electricity_Bear_assembly_Roxy_2016,37.871903,-122.260729,2016,electricity,Entertainment/public assembly
gas_Shrew_office_Rose_2017,51.499840,-0.124663,2017,gas,Office
...,...,...,...,...,...
steam_Moose_education_Omar_2016,45.421500,-75.697200,2016,steam,Education
electricity_Rat_public_Chrissy_2017,38.903504,-77.005349,2017,electricity,Public services
electricity_Hog_office_Merilyn_2016,44.978782,-93.255398,2016,electricity,Office
steam_Bull_assembly_Amalia_2016,30.267200,-97.743100,2016,steam,Entertainment/public assembly


In [9]:
def calculate_rmse(true_data, predicted_data):
    return np.sqrt(mean_squared_error(true_data, predicted_data))

def calculate_mae(true_data, predicted_data):
    return mean_absolute_error(true_data, predicted_data)

def calculate_r2(true_data, predicted_data):
    return r2_score(true_data, predicted_data)

def calculate_kl_divergence(true_data, predicted_data):
    return entropy(true_data+0.00000001, predicted_data+0.00000001)

def calculate_cosine_similarity(true_data, predicted_data):
    return 1 - cosine(true_data, predicted_data)

In [10]:
def evaluate_model(test_data, generated_series):
    metrics_result = {
        'rmse': [],
        'mae': [],
        'r2': [],
        'kl_divergence': [],
        'cosine_similarity': [],
        'pearson_correlation': []
    }

    for i in range(test_data.shape[1]):
        try:
            true_data = test_data[:, i]
            predicted_data = generated_series[:, i]
        except:
            true_data = test_data.values[:, i]
            predicted_data = generated_series.values[:, i]

        # Calculate Pearson correlation
        pearson_corr, _ = pearsonr(true_data, predicted_data)

        metrics_result['rmse'].append(calculate_rmse(true_data, predicted_data))
        metrics_result['mae'].append(calculate_mae(true_data, predicted_data))
        metrics_result['r2'].append(calculate_r2(true_data, predicted_data))
        metrics_result['kl_divergence'].append(calculate_kl_divergence(true_data, predicted_data))
        metrics_result['cosine_similarity'].append(calculate_cosine_similarity(true_data, predicted_data))
        metrics_result['pearson_correlation'].append(pearson_corr)

    return metrics_result

# Function to calculate FID between two sets of images
def calculate_fid(model, images1, images2):
    # Calculate activations
    act1 = model.predict(images1)
    act2 = model.predict(images2)

    # Calculate mean and covariance statistics
    mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
    mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)

    # Calculate sum squared difference between means
    ssdiff = np.sum((mu1 - mu2)**2.0)

    # Calculate sqrt of product between covariances
    covmean = sqrtm(sigma1.dot(sigma2))

    # Check and correct imaginary numbers from sqrt
    if np.iscomplexobj(covmean):
        covmean = covmean.real

    # Calculate the FID score
    fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
    return fid

In [11]:
# Specify the path where the datasets are saved
generation_path = "./generation/" + model_name + '_168x168'

# Initialize an empty DataFrame to store metrics
df_metrics = []

for file_name in os.listdir(generation_path):
  # Load the dataset
  generated_images = np.load(os.path.join(generation_path, file_name))
  images_seed = int(file_name.split('.')[0].split('_')[-1])

  # Set the target shape
  target_shape = (generated_images.shape[0], 52, 168, 1)

  # Initialize an array to hold the resized images
  resized_images = np.zeros(target_shape)

  # Loop through each image to resize it
  for i in range(generated_images.shape[0]):
      resized_image = cv2.resize(generated_images[i], (168, 52))  # Note the dimensions are (width, height)
      resized_images[i] = np.expand_dims(resized_image, axis=-1)  # Add back the last dimension

  # Assume resized_images has a shape of (905, 52, 168, 1)
  flattened_images = resized_images.reshape(generated_images.shape[0], -1).T  # Transpose to get shape (8736, generated_images.shape[0])

  generated_series = pd.DataFrame(flattened_images)
  generated_series.columns = test_data.columns

  generated_series = (generated_series-generated_series.min())/(generated_series.max()-generated_series.min())
  test_data = (test_data-test_data.min())/(test_data.max()-test_data.min())

  test_data_resampled = test_data.rolling(24).mean()[23::24]
  generated_series_resampled = generated_series.rolling(24).mean()[23::24]

  evaluation_results = evaluate_model(test_data, generated_series)
  evaluation_results_resampled = evaluate_model(test_data_resampled, generated_series_resampled)

  df_evaluation_results = pd.DataFrame(evaluation_results)
  df_evaluation_results_resampled = pd.DataFrame(evaluation_results_resampled)
  df_evaluation_results_resampled.columns = df_evaluation_results_resampled.columns+'_daily_avg'

  # Prepare the Inception v3 model
  model = InceptionV3(include_top=False, pooling='avg', weights="imagenet", input_shape=(99,99,3))

  # Convert to RGB and upscale to the necessary input size for the Inception model
  X_test_rgb = tf.image.grayscale_to_rgb(tf.convert_to_tensor(X_test))
  generated_images_rgb = tf.image.grayscale_to_rgb(tf.convert_to_tensor(generated_images))

  X_test_upscaled = tf.image.resize(X_test_rgb, [99, 99])
  generated_images_upscaled = tf.image.resize(generated_images_rgb, [99, 99])

  # Calculate FID
  fid_score = calculate_fid(model, X_test_upscaled, generated_images_upscaled)
  print('FID: '+str(fid_score))

  metrics = pd.concat([df_evaluation_results.mean(), df_evaluation_results_resampled.mean()])
  metrics['fid_score'] = fid_score
  metrics['images_seed'] = images_seed
  metrics['model'] = model_name
  metrics = metrics.to_frame().T

  # Append the metrics to df_metrics
  df_metrics.append(metrics)

FID: 816.5609881443559
FID: 828.7441077955718
FID: 804.033063834877
FID: 794.3444852236125
FID: 795.024499957359
FID: 810.3023341954656
FID: 780.0850411266354
FID: 815.7035633924581
FID: 823.4186599522883
FID: 810.3648228120348
FID: 811.4135611267211
FID: 805.8549966168322
FID: 805.0226114992504
FID: 805.8179148142485
FID: 793.5509156242863
FID: 798.2811971996067
FID: 798.9583629268566
FID: 802.6497862742659
FID: 800.7973314389081
FID: 790.4527080749575
FID: 816.7018654445397
FID: 829.3509716914677
FID: 822.9641108001339
FID: 820.3282303291685
FID: 837.214287461948
FID: 798.2415512500236
FID: 822.6979179024485
FID: 796.9718060469538
FID: 829.7821928233834
FID: 813.8682357148607


In [12]:
df_metrics = pd.concat(df_metrics, ignore_index=True)
df_metrics

Unnamed: 0,rmse,mae,r2,kl_divergence,cosine_similarity,pearson_correlation,rmse_daily_avg,mae_daily_avg,r2_daily_avg,kl_divergence_daily_avg,cosine_similarity_daily_avg,pearson_correlation_daily_avg,fid_score,images_seed,model
0,0.266936,0.208033,-2.523189,0.850772,0.74209,0.330211,0.216724,0.174694,-33.39359,0.5162,0.814148,0.323147,816.560988,1.0,CGAN
1,0.270168,0.211327,-2.847209,0.838104,0.741545,0.326234,0.219242,0.177088,-37.329998,0.509275,0.815061,0.327309,828.744108,3.0,CGAN
2,0.270502,0.211735,-3.419544,0.856902,0.741182,0.325497,0.221313,0.179113,-50.812885,0.522303,0.812586,0.314495,804.033064,2.0,CGAN
3,0.268234,0.208878,-2.655214,0.868167,0.739723,0.322036,0.217699,0.175464,-34.127746,0.527621,0.812875,0.312971,794.344485,4.0,CGAN
4,0.268515,0.208963,-3.958449,0.911734,0.738204,0.32768,0.218281,0.176051,-63.880142,0.554089,0.811172,0.324242,795.0245,6.0,CGAN
5,0.27003,0.210401,-3.784898,0.829131,0.742261,0.322693,0.219046,0.176848,-60.331425,0.495656,0.815656,0.316689,810.302334,5.0,CGAN
6,0.267931,0.208775,-2.846658,0.879963,0.741315,0.332882,0.217981,0.175808,-37.382554,0.534758,0.814475,0.32841,780.085041,7.0,CGAN
7,0.269019,0.210196,-2.829386,0.861085,0.739189,0.323222,0.219239,0.177289,-36.466857,0.524616,0.811824,0.315429,815.703563,8.0,CGAN
8,0.268254,0.208678,-3.0233,0.88826,0.740813,0.326878,0.217642,0.175212,-39.949966,0.536607,0.814722,0.322402,823.41866,9.0,CGAN
9,0.26851,0.209622,-2.611975,0.84178,0.744327,0.332006,0.21794,0.176004,-34.534976,0.514387,0.816939,0.326439,810.364823,10.0,CGAN


In [13]:
generation_path = "./evaluations"
df_metrics.to_csv(os.path.join(generation_path, model_name+'_metrics.csv'),index=False)

In [14]:
os.path.join(generation_path, model_name+'_metrics.csv')

'./evaluations/CGAN_metrics.csv'