## Change TXT Data into Numpy Array

> Modify the following

In [1]:
window_size = 1000
normality = 'abnormal'
source = '0208_anomaly'
outname = '{}_{}_{}_rss.npy'.format(source, normality, str(window_size))

In [2]:
import numpy as np
import os
import pickle
import joblib
import sys
import glob

ModuleNotFoundError: No module named 'joblib'

In [3]:
input_path = '/net/adv_spectrum/data/rss/downsample_10/{}/{}/'.format(normality, source)
output_file = '/net/adv_spectrum/array_data_rss/{}'.format(outname)
print(input_path)
print(output_file)

/net/adv_spectrum/data/rss/downsample_10/abnormal/0208_anomaly/
/net/adv_spectrum/array_data_rss/0208_anomaly_abnormal_1000_rss.npy


> Run the following

In [12]:
def txt_to_series(input_path):
    features = []

    with open(input_path, 'r') as f:
        for line in f:
            x = line.split()
            features.append(x)

    return np.array(features).reshape((-1, 1)).astype('float64')


def array_to_window(X, window_size):
    """
    Inputs:
        X (np.array): Its shape should be (n_time_steps, n_features)
        window_size (int): the number of time steps in a window
        
    Return:
        result (np.array): Its shape should be (n_windows, window_size, n_features)
    """
    result = []
    ind = np.arange(0, X.shape[0], window_size)
    
    for start, end in zip(ind, np.r_[ind[1:], X.shape[0]]):
        if end - start < window_size:
            # Discard the last few lines
            break
        result.append(X[start:end, :])
        
    return np.array(result)

In [13]:
series_list = []

for filename in sorted(glob.glob(input_path + '*.txt')):
    print(filename)
    series = txt_to_series(filename)
    print(series.shape)
    series_list.append(series)

/net/adv_spectrum/data/rss/downsample_10/abnormal/ryerson_ab_train_sigOver/1518561613_880M_5m_sigOver_rss.txt
(156214, 1)
/net/adv_spectrum/data/rss/downsample_10/abnormal/ryerson_ab_train_sigOver/1518564397_880M_5m_sigOver_rss.txt
(156214, 1)
/net/adv_spectrum/data/rss/downsample_10/abnormal/ryerson_ab_train_sigOver/1518565194_880M_5m_sigOver_rss.txt
(156214, 1)
/net/adv_spectrum/data/rss/downsample_10/abnormal/ryerson_ab_train_sigOver/1518574357_880M_5m_sigOver_rss.txt
(156214, 1)
/net/adv_spectrum/data/rss/downsample_10/abnormal/ryerson_ab_train_sigOver/1518586722_880M_5m_sigOver_rss.txt
(156214, 1)
/net/adv_spectrum/data/rss/downsample_10/abnormal/ryerson_ab_train_sigOver/1518591120_880M_5m_sigOver_rss.txt
(156214, 1)
/net/adv_spectrum/data/rss/downsample_10/abnormal/ryerson_ab_train_sigOver/1518614667_880M_5m_sigOver_rss.txt
(156214, 1)
/net/adv_spectrum/data/rss/downsample_10/abnormal/ryerson_ab_train_sigOver/1518618240_880M_5m_sigOver_rss.txt
(156214, 1)
/net/adv_spectrum/data/r

In [14]:
X_full= array_to_window(series_list.pop(0), window_size)

for i, X in enumerate(series_list):
    print('Converting the {}th array to window...'.format(i))
    X_windowed = array_to_window(X, window_size)
    print('Concatenating...\n')
    X_full = np.concatenate((X_full, X_windowed), axis=0)

X_full = X_full.reshape(-1, X_full.shape[1])
print('Done converting and concatenating!')

Converting the 0th array to window...
Concatenating...

Converting the 1th array to window...
Concatenating...

Converting the 2th array to window...
Concatenating...

Converting the 3th array to window...
Concatenating...

Converting the 4th array to window...
Concatenating...

Converting the 5th array to window...
Concatenating...

Converting the 6th array to window...
Concatenating...

Converting the 7th array to window...
Concatenating...

Converting the 8th array to window...
Concatenating...

Converting the 9th array to window...
Concatenating...

Converting the 10th array to window...
Concatenating...

Converting the 11th array to window...
Concatenating...

Converting the 12th array to window...
Concatenating...

Converting the 13th array to window...
Concatenating...

Converting the 14th array to window...
Concatenating...

Converting the 15th array to window...
Concatenating...

Converting the 16th array to window...
Concatenating...

Converting the 17th array to window...
Co

In [15]:
X_full.shape

(3588, 1000)

In [16]:
np.save(output_file, X_full)