In [6]:
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

### Read in desired EV interval model numbers first

In [7]:
interval = 20
data_dir = os.path.join(os.getcwd(), "dataGeneration")

EV_model_files = np.load(os.path.join(data_dir, f"Interval_{interval}_model.npy"))

### Read in model file names

In [8]:
read_path = "dataGeneration/feature_target"
model_path = "dataGeneration/model"

feature_target_dir = os.path.join(os.getcwd(), read_path)
feature_target_files = os.listdir(feature_target_dir)
print("Total feature_target number: ", len(feature_target_files))

model_dir = os.path.join(os.getcwd(), model_path)
model_files = os.listdir(model_dir)
print("Total model number: ", len(model_files))


Total feature_target number:  1755
Total model number:  1755


In [9]:
sample = []

for file in EV_model_files:
# for file in range(len(feature_target_files)):
    with open(os.path.join(feature_target_dir, f"dataPoint_{file}.pkl"), 'rb') as f:
        loaded_dict = pickle.load(f)
        sample.append(loaded_dict)
        f.close()

# for reading all samples
# for file in feature_target_files:
#     with open(os.path.join(feature_target_dir, file), 'rb') as f:
#         loaded_dict = pickle.load(f)
#         sample.append(loaded_dict)
#         f.close()


### Print out EV numbers to verify

In [10]:
nbEVMatrix = np.array([x["nbEV"] for x in sample])

# print(nbEVMatrix)
print("Sample number: ", len(nbEVMatrix))
print(np.sum(nbEVMatrix))

Sample number:  800
48000


### Normalizing data

In [6]:
loadMatrix = np.array([x["Load"] for x in sample])
solarMatrix = np.array([x["Solar"] for x in sample])

nbBus = loadMatrix.shape[1]
nbSolar = solarMatrix.shape[1]
nbTime = loadMatrix.shape[2]
nbSample = loadMatrix.shape[0]

nbSample = loadMatrix.shape[0]

print(loadMatrix.shape, solarMatrix.shape)
print(nbBus, nbSolar, nbTime, nbSample)

(800, 66, 48) (800, 3, 48)
66 3 48 800


In [7]:
loadScaler = MinMaxScaler()
solarScaler = MinMaxScaler()

loadScaler.fit(loadMatrix.reshape(nbSample, -1))
solarScaler.fit(solarMatrix.reshape(nbSample, -1))

loadNormalized = loadScaler.transform(loadMatrix.reshape(nbSample, -1))
solarNormalized = solarScaler.transform(solarMatrix.reshape(nbSample, -1))

loadNormalized = loadNormalized.reshape(-1, nbBus, nbTime)
solarNormalized = solarNormalized.reshape(nbSample, -1, nbTime)

# plt.plot(solarNormalized[0, 1,:])
print(loadNormalized.shape, solarNormalized.shape)

(800, 66, 48) (800, 3, 48)


### Perform Padding

In [8]:
charging_station = np.squeeze(pd.read_csv(os.path.join(os.path.join(os.getcwd(), 'systemData'), 'cs_params_variable.csv')).to_numpy())
nbCS = len(charging_station)

data_dir = os.path.join(os.getcwd(), 'systemData')
EV_routes = pd.read_csv(os.path.join(data_dir, 'EV_routes.csv')).to_numpy()
nbRoute = EV_routes.shape[0]

max_EV = 100
max_pad_schedule = max_EV*4
max_pad_bin = max_EV * (nbRoute*(nbTime-1) + nbCS*nbTime*2)

# uncomment to check number of EV in dataset
unique, counts = np.unique([x["Binary"].shape[0] / (nbRoute*(nbTime-1) + nbCS*nbTime*2) for x in sample], return_counts=True)
print(dict(zip(unique, counts)))

# scheduleMatrix = [x["Schedule"] for x in sample]

binaryMatrix = np.array([np.pad(x["Binary"], ((0,max_pad_bin - x["Binary"].shape[0]),), 'constant', constant_values=0) for x in sample])
indicesMatrix = np.array([np.pad(x["Indices"], ((0,max_pad_bin - x["Indices"].shape[0]),), 'constant', constant_values=0) for x in sample])
scheduleMatrix = np.array([np.pad(x["Schedule"], ((0,max_pad_schedule - int(x["Schedule"].shape[0])),(0,0)), 'constant', constant_values=0) for x in sample]).astype("int16")

print(binaryMatrix.shape, indicesMatrix.shape, scheduleMatrix[4].shape)



{20.0: 160, 40.0: 160, 60.0: 160, 80.0: 160, 100.0: 160}
(800, 767300) (800, 767300) (400, 3)


### Preprocessing schedules

In [9]:
# transform schedule to time array
processedScheduleList = []

for i in range(len(scheduleMatrix)):

    scheduleArray = np.zeros((int(scheduleMatrix[i].shape[0]/4), nbTime))

    for s in range(scheduleMatrix[i].shape[0]):
        scheduleArray[scheduleMatrix[i][s,0], scheduleMatrix[i][s,2]] = scheduleMatrix[i][s,1]
        
    processedScheduleList.append(scheduleArray)

print(processedScheduleList[6].shape)



(100, 48)


In [10]:
# since schedule size always not the same, need to do scaler separately
destinationList = []

for i in range(len(scheduleMatrix)):
    for s in range(scheduleMatrix[i].shape[0]):
        destinationList.append(scheduleMatrix[i][s,1])
        destinationList.append(scheduleMatrix[i][s,2])

maxDest = np.max(destinationList)
print(maxDest)

107


In [11]:
# do a simple scaling (not MinMaxScaler)
scheduleListNormalized = []

for i in range(len(processedScheduleList)):
    scheduleListNormalized.append(processedScheduleList[i]/maxDest)

print(len(scheduleListNormalized))

800


In [12]:
featureList = []

for i in range(len(loadNormalized)):

    feature = np.append(loadNormalized[i], solarNormalized[i], axis=0)
    feature = np.append(feature, scheduleListNormalized[i], axis=0)
    # feature = np.transpose(feature, (1,0))

    featureList.append(feature)

featureList = np.array(featureList)

print(len(featureList), featureList[7].shape)
print(len(binaryMatrix), binaryMatrix[7].shape)
print(len(indicesMatrix), indicesMatrix[7].shape)

800 (169, 48)
800 (767300,)
800 (767300,)


### Read in Rest of the Parameters

In [13]:
solTimeMatrix = np.array([x["solve_time"] for x in sample])
objValMatrix = np.array([x["Obj_val"] for x in sample])
nbEVMatrix = np.array([x["nbEV"] for x in sample])
modelNumMatrix = np.array([x["model"] for x in sample])


### Split data for training and testing

In [14]:
train_idx, val_idx = train_test_split(np.arange(len(loadNormalized)), test_size=0.1, random_state=42)

feature_train = featureList[train_idx]
feature_val = featureList[val_idx]
print(feature_train.shape, feature_val.shape)

binary_train = binaryMatrix[train_idx]
binary_val = binaryMatrix[val_idx]
print(binary_train.shape, binary_val.shape)

indices_train = indicesMatrix[train_idx]
indices_val = indicesMatrix[val_idx]
print(indices_train.shape, indices_val.shape)

solTime_val = solTimeMatrix[val_idx]
objVal_val = objValMatrix[val_idx]
schedule_val = scheduleMatrix[val_idx]
model_val = modelNumMatrix[val_idx]

print(solTime_val.shape)
print(objVal_val.shape)
print(schedule_val.shape)
print(model_val.shape)

(720, 169, 48) (80, 169, 48)
(720, 767300) (80, 767300)
(720, 767300) (80, 767300)
(80,)
(80,)
(80, 400, 3)
(80,)


### Save preprocessed data

In [16]:
train_test_dir = os.path.join(os.getcwd(), f"dataGeneration/preprocessed_data_nopadding_{interval}")

np.save(os.path.join(train_test_dir, "X_train.npy"), feature_train)
np.save(os.path.join(train_test_dir, "X_val.npy"), feature_val)

np.save(os.path.join(train_test_dir, "y_train.npy"), binary_train)
np.save(os.path.join(train_test_dir, "y_val.npy"), binary_val)

np.save(os.path.join(train_test_dir, "indices_train.npy"), indices_train)
np.save(os.path.join(train_test_dir, "indices_val.npy"), indices_val)

np.save(os.path.join(train_test_dir, "solTime_val.npy"), solTime_val)
np.save(os.path.join(train_test_dir, "objVal_val.npy"), objVal_val)
np.save(os.path.join(train_test_dir, "schedule_val.npy"), schedule_val)
np.save(os.path.join(train_test_dir, "model_val.npy"), model_val)

