In [36]:
######################################################## CONSTANTS ########################################################
import CONSTANTS as c

NR_OF_BINS = c.NR_OF_BINS
INTERPLOATION_POINTS = c.INTERPLOATION_POINTS
FFT_AC_COEFFICIENT  = c.FFT_AC_COEFFICIENT
LOWER_POINT = c.LOWER_POINT
UPPER_POINT = c.UPPER_POINT
THRESHOLD = c.THRESHOLD
PREPROCESSED_DATA_PATH = c.PREPROCESSED_DATA_PATH
FEATURE_ENGINEERED_DATA_PATH = c.FEATURE_ENGINEERED_DATA_PATH

######################################################## LIBRARIES ########################################################
import pandas as pd

import numpy as np

from scipy import interpolate
from numpy.fft import fft,ifft

from sklearn.preprocessing import  KBinsDiscretizer

import os


for oFile in os.walk(PREPROCESSED_DATA_PATH):
    sFolderPath = oFile[0]
    
    if sFolderPath != PREPROCESSED_DATA_PATH:
        sSubFolder = sFolderPath.split('\\')[-1]
        
        sFolderToWrite = r'{}\{}'.format(FEATURE_ENGINEERED_DATA_PATH, sSubFolder)
        
        if os.path.exists(sFolderToWrite) == False:
            os.makedirs(sFolderToWrite)
        
        if all(elem in os.listdir(sFolderToWrite)  for elem in  ['X.npy', 'Y.npy', 'X_TIME.npy']) == True:
            continue

            
        ######################################################## DATA SOURCE ########################################################
        X_ORIGINAL = np.load(r'{}\X_ORIGINAL.npy'.format(sFolderPath))
        X_TIME_ORIGINAL = np.load(r'{}\X_TIME_ORIGINAL.npy'.format(sFolderPath))
        Y_ORIGINAL = np.load(r'{}\Y_ORIGINAL.npy'.format(sFolderPath))
        
        ######################################################## ANALYSIS ########################################################
        ######################################################## COMPILE INPUT DATASET(S) ########################################################
        # reduce dimension of input dataset with fast fourier transform
        X = np.zeros((X_ORIGINAL.shape[0], X_ORIGINAL.shape[1], FFT_AC_COEFFICIENT, X_ORIGINAL.shape[3]))
        for i in range(X_ORIGINAL.shape[1]): #future time step
            for j in range(X_ORIGINAL.shape[3]): #exchange rate
                aTransformed = fft(X_ORIGINAL[:, i,:,j])
                aTransformed = aTransformed[:, :FFT_AC_COEFFICIENT]
                aInverseTransformed = ifft(aTransformed)
                X[:, i,:,j] = aInverseTransformed

        X_TIME = X_TIME_ORIGINAL
        
        ######################################################## COMPILE OUTPUT DATASET ########################################################
        Y= Y_ORIGINAL.copy()
        Y[Y<LOWER_POINT] = LOWER_POINT
        Y[Y>UPPER_POINT] = UPPER_POINT

        # interplating Y dataset (as data augmentation ?)
        Y_copy = Y.copy()
        Y = np.zeros((Y_copy.shape[0], Y_copy.shape[1], INTERPLOATION_POINTS, Y_copy.shape[3]))
        for i in range(Y_copy.shape[1]): #future time step
            for j in range(Y_copy.shape[3]): #exchange rate
                aX = list(range(0,60))
                aY = Y_copy[:, i,:,j]

                oInterpolate = interpolate.interp1d(aX, aY, kind = 'linear')

                aX_new = np.linspace(0, 59, INTERPLOATION_POINTS)
                aY_new = oInterpolate(aX_new)

                Y[:, i,:,j] = aY_new


            # discritize output to histogram bin size
            aHistogramBins = np.linspace(LOWER_POINT,UPPER_POINT, num = INTERPLOATION_POINTS).reshape(-1 ,1)
            oOutputDiscritizer = KBinsDiscretizer(n_bins = NR_OF_BINS, encode = 'ordinal', strategy = 'uniform')
            oOutputDiscritizer.fit(aHistogramBins)
            Y = oOutputDiscritizer.transform(Y.reshape(-1,1)).reshape(Y.shape).astype(int)


            # calculate frequencies
            Y_copy = Y.copy()
            Y = np.zeros((Y_copy.shape[0], Y_copy.shape[1], NR_OF_BINS, Y_copy.shape[3]))
            for i in range(Y_copy.shape[1]): #future time step
                for j in range(Y_copy.shape[3]): #exchange rate
                    aOriginal = Y_copy[:, i,:,j]
                    df = pd.DataFrame(aOriginal).melt(ignore_index = False)
                    df.reset_index(inplace = True)
                    df = pd.pivot_table(df , columns = 'value', index = 'index', values = 'variable', aggfunc='count', fill_value=0)
                    
                    df2 = pd.DataFrame(index = df.index, columns = list(range(NR_OF_BINS)))
                    df2.loc[:,:] = 0
                    df2.loc[:, df.columns] = df.values
                    
                    Y[:, i,:,j] = df2.values

            # convert to softmax format
            Y = Y/INTERPLOATION_POINTS


            # set the active values to 1
            Y[Y>=THRESHOLD] = 1
            Y[Y<THRESHOLD] = 0


            # set points around zero to 1
            Y_copy = Y.copy()

            Y = np.zeros(Y_copy.shape)
            iZeroPoint = int(NR_OF_BINS/2)-1
            for i in range(Y_copy.shape[1]): #future time step
                for j in range(Y_copy.shape[3]): #exchange rate
                    for k in range(Y_copy.shape[2]): # bin
                        ixToFix = np.where(Y_copy[:,i,k,j] == 1)[0]
                        if k<iZeroPoint:
                            Y[ixToFix,i, k:iZeroPoint, j] = 1
                        else:
                            Y[ixToFix,i, iZeroPoint+1:k+1, j] = 1
                            
                            
        ######################################################## SAVE ########################################################

        np.save(r'{}\X.npy'.format(sFolderToWrite), X)
        np.save(r'{}\Y.npy'.format(sFolderToWrite), Y)
        np.save(r'{}\X_TIME.npy'.format(sFolderToWrite), X_TIME)


