# Data Preprocessing

## Import libraries

In [23]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt
from IPython.display import Image
from IPython.core.display import HTML

## Load Transmission data (MTD_tx CSV file)

In [24]:
MTD_Tx = pd.read_csv('MTD_Tx.csv')
MTD_Tx = np.array(MTD_Tx)
MTD_Tx.shape

(12000, 5)

In [25]:
MTD_Tx[0:20,:]

array([[0, 1, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [1, 1, 0, 1, 0],
       [1, 0, 0, 1, 1],
       [1, 1, 0, 1, 1],
       [1, 1, 1, 0, 1],
       [0, 0, 1, 0, 0],
       [0, 1, 0, 1, 0],
       [1, 1, 1, 0, 1],
       [1, 0, 0, 0, 0],
       [0, 1, 0, 1, 0],
       [0, 1, 1, 0, 1],
       [0, 1, 0, 1, 0],
       [0, 1, 0, 0, 1],
       [1, 1, 0, 1, 0],
       [1, 1, 0, 1, 1],
       [1, 0, 0, 1, 1],
       [1, 0, 1, 1, 1],
       [0, 1, 0, 0, 1],
       [0, 1, 0, 0, 0]])

## Transmission Data Splitting (Training and testing)

First transmission data split in to training and testing datasets where 80% of transmission data use for LSTM model training and rest 20% use for LSTM model testing purposes.

In [26]:
training = MTD_Tx[:9600,:]
testing = MTD_Tx[9600:12000,:]

In [27]:
testing.shape

(2400, 5)

In [28]:
training.shape

(9600, 5)

## Data Reshaping and Reconstructing

Following Python code use for data preprocessing. NumPy inbuilt array manipulation functions such as reshape, append, delete and indexing apply on (9600, 5) shape training dataset to get independent and depended training-feature of matrix with shape of (9588, 12, 5) and (9588, 5) respectively. Similar way same set of functions apply on (2400, 5) shape testing dataset to get independent and depended testing-feature of matrix with shape of (2388, 12, 5) and (2388, 5) respectively. Finally, all 4 matrix save as NPZ files by np.savez function.

In [36]:
Image(url= "Picture2.png", width=1000, height=1000)

In [29]:
X_training = np.empty((9588,12,1))
Y_training = np.empty((9588,1))
X_testing = np.empty((2388,12,1))
Y_testing = np.empty((2388,1))

for n in range(0,5):
    
    X_training_MTD = []
    Y_training_MTD = []
    X_testing_MTD = []
    Y_testing_MTD = []
    
    for i in range(12, 9600):
        
        X_training_MTD.append(training[i-12:i, n])
        Y_training_MTD.append(training[i, n])
        
    for k in range(12,2400):
        
        X_testing_MTD.append(testing[k-12:k, n])
        Y_testing_MTD.append(testing[k, n])
    
    X_training_MTD, Y_training_MTD = np.array(X_training_MTD), np.array(Y_training_MTD)
    X_testing_MTD, Y_testing_MTD = np.array(X_testing_MTD), np.array(Y_testing_MTD)
    
    X_training_MTD = np.reshape(X_training_MTD, (X_training_MTD.shape[0], X_training_MTD.shape[1], 1))
    Y_training_MTD = np.reshape(Y_training_MTD, (Y_training_MTD.shape[0],1))
    
    X_testing_MTD = np.reshape(X_testing_MTD, (X_testing_MTD.shape[0], X_testing_MTD.shape[1], 1))
    Y_testing_MTD = np.reshape(Y_testing_MTD, (Y_testing_MTD.shape[0],1))
    
    X_training = np.append(X_training, X_training_MTD, axis=2)
    Y_training = np.append(Y_training, Y_training_MTD, axis=1)
    
    X_testing = np.append(X_testing, X_testing_MTD, axis=2)
    Y_testing = np.append(Y_testing, Y_testing_MTD, axis=1)
    
X_training = np.delete(X_training, 0, axis=2)
Y_training = np.delete(Y_training, 0, axis=1)

X_testing = np.delete(X_testing, 0, axis=2)
Y_testing = np.delete(Y_testing, 0, axis=1)

## Training independent feature of matrix

In [30]:
X_training.shape

(9588, 12, 5)

## Training dependent feature of matrix

In [31]:
Y_training.shape

(9588, 5)

## Testing independent feature of matrix

In [32]:
X_testing.shape

(2388, 12, 5)

## Testing dependent feature of matrix

In [33]:
Y_testing.shape

(2388, 5)

## Save preprocessed transmission data as NPZ files

In [None]:
np.savez('5_mtd_train_restruct', dat_tr=X_training, lbl_tr=Y_training)
np.savez('5_mtd_test_restruct', dat_ts=X_testing, lbl_ts=Y_testing)