# Data preparation for CWGAN

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pickle
import datetime


def split_timestamp(pth) -> pd.DataFrame:
    df = pd.read_csv(pth)

    # consider chinese holiday
    # if self.chinese_holiday:
    #     df["holiday"] = 0
    #     for j in range(len(df)):
    #         temp = df["ds"][j]
    #         date_time = datetime.datetime.strptime(temp,
    #                                                 "%Y-%m-%d %H:%M:%S")
    #         if not is_workday(date_time):
    #             df["holiday"] = 1

    df_time = df["ds"].str.split(' ', expand=True)
    df_date = df_time[0].str.split('/', expand=True)
    df_date.columns = ["year", "month", "date"]
    df_hour = df_time[1].str.split(':', expand=True)
    df_hour.columns = ["hour", "minute"]
    # if self.chinese_holiday:
    #     df_all_time = pd.concat(
    #         [df_date, df_hour, df[["holiday"]], df[["value"]]], axis=1)
    # else:
    df_all_time = pd.concat([df_date, df_hour, df[["value"]]], axis=1)
    df_all_time = df_all_time.astype("float")
    # df_all_time.drop("second", axis=1, inplace=True)
    # df_all_time.to_csv(f"{self.file_pth[:-4]}_split.csv", index=False)
    return df_all_time

def scale(train_data, val_data, test_data):
    # scaler = StandardScaler()
    # scaled_train = scaler.fit_transform(train_data)
    # if val_data is not None:
    #     scaled_val = scaler.transform(val_data)
    # else:
    #     scaled_val = None
    # scaled_test = scaler.transform(test_data)
    results = {
        "train_ds": train_data,
        "val_ds": val_data,
        "test_ds": test_data,
        "scaler": 0
    }
    # results = {
    #     "train_ds": scaled_train,
    #     "val_ds": scaled_val,
    #     "test_ds": scaled_test,
    #     "scaler": scaler
    # }
    pickle.dump(results, open('loggings/data_preprocess.pkl', 'wb'))


pth = "data/PV_jz.csv"
df = split_timestamp(pth)
df.drop(["year", "value", "minute", "hour"], axis=1, inplace=True)
print(df.head(5))
pv_reshape = np.loadtxt("data/PV_jz_reshape.csv", delimiter=",")

df = df.values

results = np.zeros([pv_reshape.shape[0], pv_reshape.shape[1] + df.shape[1]])
start = 0
for i in range(len(pv_reshape)):
    # print(pv_reshape[i, :])
    # print(df[start, :])
    # print(np.concatenate([pv_reshape[i, :], df[start, :]], axis=0))
    results[i, :] = np.concatenate([pv_reshape[i, :], df[start, :]], axis=0)
    start = start + 96

print(results)
# results = pd.DataFrame(results)

pv_train, pv_test = train_test_split(results, test_size=0.2, shuffle=False)
scale(pv_train, None, pv_test)



# Data preparation for WGAN

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pickle
import datetime


def scale(train_data, val_data, test_data):
    # scaler = StandardScaler()
    # scaled_train = scaler.fit_transform(train_data)
    # if val_data is not None:
    #     scaled_val = scaler.transform(val_data)
    # else:
    #     scaled_val = None
    # scaled_test = scaler.transform(test_data)
    results = {
        "train_ds": train_data,
        "val_ds": val_data,
        "test_ds": test_data,
        "scaler": 0
    }
    # results = {
    #     "train_ds": scaled_train,
    #     "val_ds": scaled_val,
    #     "test_ds": scaled_test,
    #     "scaler": scaler
    # }
    pickle.dump(results, open('loggings/data_preprocess.pkl', 'wb'))


df = pd.read_csv("data/PV_jz.csv")
df["date"] = pd.to_datetime(df["date"])

pv = df.values[:int(len(df) / 96) * 96, -1]
print(pv.shape)
pv = pv.reshape([-1, 96])
print(pv.shape)
pv_train, pv_test = train_test_split(pv, test_size=0.2, shuffle=False)
scale(pv_train, None, pv_test)

# Test Trained GAN

In [None]:
import torch
import pickle
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

mydata = pickle.load(open('loggings/data_preprocess.pkl', 'rb'))
# scaler = mydata["scaler"]
test_ds = mydata["test_ds"]
generator = torch.load("loggings/generator.pth")
generator.eval()

select = 51
target = test_ds[select][:-2]
cond = test_ds[select][-2:].reshape(1, -1)
# target = torch.from_numpy(target).float().cuda()
cond = torch.from_numpy(cond).float().cuda()

num = 100
dist = 99999999
idx = 0
record = []
for i in range(num):
    z = torch.normal(0, 1, [1, 96]).cuda()
    
    # print(z)
    # break
    y_hat = generator(z, cond)
    y_hat = y_hat.detach().cpu().numpy().reshape(target.shape)
    y_hat = y_hat.astype(float)
    if np.linalg.norm(y_hat - target) < dist:
        dist = np.linalg.norm(y_hat - target)
        idx = i
    record.append(y_hat)
best_fit = record[idx]
print(len(best_fit))
plt.plot(range(96), target, label="target")
plt.plot(range(96), best_fit, label="generated")
plt.legend()
# plt.savefig("loggings/pv_gen.png")

# print(y_hat)

# Evaluation

In [None]:
from utils import autocor, cdf, psd, ws_dist, cor_dist, dtw_dist
import numpy as np


fn_target = cdf(target)
fn_fit = cdf(best_fit)
x = np.linspace(0,180,300)

atc_tgt, freq_tgt  = psd(target)
atc_best, freq_best= psd(best_fit)
atc_tgt = autocor(target, lags=48, plot=False)
atc_best = autocor(best_fit, lags=48, plot=False)
fig, ax = plt.subplots()
ax.step(x, fn_target(x), label="target")
ax.step(x, fn_fit(x), label="generated")
ax.plot(freq_tgt, atc_tgt, label="target")
ax.plot(freq_best, atc_best, label="generated")
# ax.plot(range(len(atc_tgt)), atc_tgt, label="target")
# ax.plot(range(len(atc_tgt)), atc_best, label="generated")
ax.legend()
