In [9]:
import json
import os
import numpy as np
import pandas as pd
from os import path

In [10]:
def make_npz(datasets_path, output_filename, x_train_filename, y_train_filename):
    """
    Creates the npz file and deletes the x_train and y_train files
    """
    x_train_path = datasets_path.joinpath(x_train_filename)
    y_train_path = datasets_path.joinpath(y_train_filename)
    print('Creating %s.npz...' % output_filename, end='\r')
    csv2npz(x_train_path, y_train_path, datasets_path, output_filename)
    clear_line_str = '\033[K'
    print(clear_line_str+'Create '+output_filename+'.npz\tDone')
    # there is no more need to keep x_train and y_train files
    remove(x_train_path)
    remove(y_train_path)

def csv2npz(dataset_x_path, dataset_y_path, output_path, filename, labels_path='labels.json'):
    """Load input dataset from csv and create x_train tensor."""
    # Load dataset as csv
    x = pd.read_csv(dataset_x_path)
    y = pd.read_csv(dataset_y_path)

    # Load labels, file can be found in challenge description
    with open(labels_path, "r") as stream_json:
        labels = json.load(stream_json)

    m = x.shape[0]
    K = TIME_SERIES_LENGTH  # Can be found through csv

    # Create R and Z
    R = x[labels["R"]].values
    R = R.astype(np.float32)

    X = y[[f"{var_name}_{i}" for var_name in labels["X"]
           for i in range(K)]]
    X = X.values.reshape((m, -1, K))
    X = X.astype(np.float32)

    Z = x[[f"{var_name}_{i}" for var_name in labels["Z"]
           for i in range(K)]]
    Z = Z.values.reshape((m, -1, K))
#     Z = Z.transpose((0, 2, 1))
    Z = Z.astype(np.float32)

    np.savez(path.join(output_path, filename), R=R, X=X, Z=Z)

In [11]:
TIME_SERIES_LENGTH = 672

In [12]:
csv2npz('data/x_train_LsAZgHU.csv', 'data/y_train_EFo1WyE.csv', 
        'data', 'data_gen_original', 'labels_original.json')

In [5]:
data = pd.read_csv('data/x_train_LsAZgHU.csv')

In [8]:
data.head()

Unnamed: 0,index,airchange_infiltration_vol_per_h,capacitance_kJ_perdegreK_perm3,power_VCV_kW_heat,power_VCV_kW_clim,nb_occupants,nb_PCs,facade_1_thickness_2,facade_1_window_area_percent,facade_2_thickness_2,...,TAMB_662,TAMB_663,TAMB_664,TAMB_665,TAMB_666,TAMB_667,TAMB_668,TAMB_669,TAMB_670,TAMB_671
0,0,0.1,20.0,500.0,500.0,1000.0,1000.0,0.15,45.0,0.06,...,23.2,23.1,23.5,24.1,24.2,23.9,23.3,22.1,21.0,19.7
1,1,0.3,30.0,600.0,500.0,1300.0,1400.0,0.09,40.0,0.11,...,14.5,14.8,15.3,15.0,14.7,14.0,13.2,12.5,11.9,0.0
2,2,0.1,270.0,500.0,600.0,1500.0,1500.0,0.08,45.0,0.07,...,14.5,14.8,14.7,14.6,14.5,14.0,13.8,13.1,12.5,11.6
3,3,0.4,140.0,500.0,600.0,1300.0,1300.0,0.08,45.0,0.08,...,14.5,14.8,15.0,15.2,15.0,14.8,14.9,14.5,14.1,14.2
4,4,0.4,40.0,600.0,600.0,1000.0,1500.0,0.06,50.0,0.07,...,30.3,31.3,31.5,32.0,31.5,30.8,30.2,28.2,26.4,23.8


In [18]:
data.shape

(7500, 12116)