In [1]:
%reset -f

import os
import pickle
import numpy as np
from tqdm import tqdm

rootDir = os.getcwd()
saveDir = f'{rootDir}/results/'
dataset_dir = os.path.join(rootDir, 'datasets/dataSRP/')
dataset_savedir = dataset_dir

with open(os.path.join(dataset_dir, 'dataDict_originProcValueRange.pkl'), 'rb') as f:
    dataDict = pickle.load(f)
print('Data Loaded!\n')

Data Loaded!



## 1.1 SRPD x10

In [3]:
def DivideDataset(data:np.ndarray, splitType:str, trainRate:int, valRate:int):
    """Divide the datasets(len,block_size) into two or three sub-sets, and then return the divided datasets.
    ## Args
        data (Numpy);  
        splitType (string): withTest OR withoutTest;  
        trainRate (int): e.g. 8 represents the training set accounts for 8 in 10;    
        valRate (int).
    ## Returns
        trainData (Numpy);    
        valData (Numpy);    
        (*)testData (Numpy);.   
    """
    data_dim = len(data.shape)
    if data_dim != 2:
        raise ValueError('Wrong data dimension, the input data should be 2D array')
    if splitType == 'withoutTest':
        idx = data.shape[0] * trainRate // 10
        if data_dim == 1:
            trainData = data[:idx]
            valData = data[idx:]
        elif data_dim == 2:
            trainData = data[:idx,:]
            valData = data[idx:,:]
        return trainData, valData
    
    elif splitType == 'withTest':
        print('withTestSet\n')
        pass
    else:
        raise SyntaxError('Wrong splitType, please input withTest or withoutTest')
    
def DownsampDivide(data:np.ndarray, ProcType:str):
    """N-n downsampling, downsample according to the rate, keeping the first data point of each segment
    # Args
        data (1D np array): The input data. Defaults to np.ndarray.
        ProcType (str): Two types: 10x10 and 100x10. Defaults to str.
    # Raises
        ValueError: The input data dimension is incorrect
    # Returns
        train_input, val_input, train_label, val_label: Downsampled training and validation sets
    """
    if len(data.shape) != 1:
        raise ValueError('The input data dimension is incorrect and should be 1D array')
    if ProcType == '100x10':
        inputSet = data[::10].reshape(-1, 300)
        labelSet = data.reshape(-1, 3000)
        
    train_input, val_input = DivideDataset(inputSet, splitType='withoutTest', trainRate=8, valRate=2)
    train_label, val_label = DivideDataset(labelSet, splitType='withoutTest', trainRate=8, valRate=2)
    
    return train_input, val_input, train_label, val_label


A_train_input = []; A_val_input = []; A_train_label = []; A_val_label = []
ProcType = '100x10'

for dataIdx in tqdm(dataDict.keys()):
    train_input, val_input, train_label, val_label = DownsampDivide(dataDict[dataIdx].squeeze(), ProcType)
    A_train_input.append(train_input)  ; A_val_input.append(val_input)
    A_train_label.append(train_label)  ; A_val_label.append(val_label)

A_train_input = np.vstack(A_train_input)  ;  A_val_input = np.vstack(A_val_input)
A_train_label = np.vstack(A_train_label)  ;  A_val_label = np.vstack(A_val_label)
np.save(f'{dataset_savedir}train_input_{ProcType}', A_train_input)
np.save(f'{dataset_savedir}val_input_{ProcType}', A_val_input)
np.save(f'{dataset_savedir}train_label_{ProcType}', A_train_label)
np.save(f'{dataset_savedir}val_label_{ProcType}', A_val_label)

print(f'''Successfully divided dataset!
      ProcType: {ProcType}
        train_input: {A_train_input.shape}
        val_input: {A_val_input.shape}
        train_label: {A_train_label.shape}
        val_label: {A_val_label.shape}''')

100%|██████████| 2000/2000 [00:00<00:00, 307365.09it/s]


Successfully divided dataset!
      ProcType: 100x10
        train_input: (128000, 300)
        val_input: (32000, 300)
        train_label: (128000, 3000)
        val_label: (32000, 3000)


## 1.2 SRPD x20

In [4]:
labelTrain = np.load(f'{dataset_dir}train_label_100x10.npy')
labelVal = np.load(f'{dataset_dir}val_label_100x10.npy')

inputTrain = np.load(f'{dataset_dir}train_input_100x10.npy')
inputVal = np.load(f'{dataset_dir}val_input_100x10.npy')

In [5]:
print(np.shape(inputTrain), np.shape(labelTrain), np.shape(inputVal), np.shape(labelVal))

(128000, 300) (128000, 3000) (32000, 300) (32000, 3000)


In [6]:
inTr = inputTrain[:,::2]
inVal = inputVal[:,::2]
print(np.shape(inTr), np.shape(inVal))

(128000, 150) (32000, 150)


In [7]:
np.save(f'{dataset_savedir}train_input_50x20.npy', inTr)
np.save(f'{dataset_savedir}val_input_50x20.npy', inVal)
np.save(f'{dataset_savedir}train_label_50x20.npy', labelTrain)
np.save(f'{dataset_savedir}val_label_50x20.npy', labelVal)
print('Data saved successfully!')

Data saved successfully!
