### Data Reading and Extraction
The data used is from NASA bearing 2nd_test channel 1, where at the end of the test-to-failure experiment, outer race failure occurrs in bearing 1.  
Following works has been done in this file: 
1. Reading data
2. Extract channel 1 data and save it to csv file

In [16]:
import os
import pandas as pd
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from natsort import natsorted
import scipy.stats as stats

# file path, NASA bearing data in data folder 
folder_path = 'data' 
file_paths = natsorted([os.path.join(folder_path, f) for f in os.listdir(folder_path)])
print(f"Total files: {len(file_paths)}")


Total files: 984


In [17]:
# function for file reading with parallel processing
def load_file(file_path):
    try:
        return pd.read_csv(file_path, delimiter='\t', header=None, usecols=[0], dtype=np.float32)
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return pd.DataFrame()  # Return empty DataFrame on failure

def parallel_read(file_paths, n_threads=8):
    with ThreadPoolExecutor(max_workers=n_threads) as executor:
        data_frames = list(executor.map(load_file, file_paths))
    # Filter out empty DataFrames
    data_frames = [df for df in data_frames if not df.empty]
    combined_data = pd.concat(data_frames, ignore_index=True)
    return combined_data

combined_data = parallel_read(file_paths, n_threads=8)

In [18]:
print(f"Data shape after reading: {combined_data.shape}")

Data shape after reading: (20152320, 1)


In [19]:
print(combined_data.head())  

       0
0 -0.049
1 -0.042
2  0.015
3 -0.051
4 -0.107


In [20]:
combined_data.isnull().sum().values[0]

np.int64(0)

In [21]:
print(combined_data.describe()) 

                  0
count  2.015232e+07
mean  -1.942867e-03
std    1.210052e-01
min   -5.000000e+00
25%   -6.100000e-02
50%   -2.000000e-03
75%    5.900000e-02
max    4.998000e+00


In [22]:
# Save channel_1 data
output_csv_path = "channel_1_data.csv"  
combined_data.to_csv(output_csv_path, index=False, header=False)
print(f"Data saved as CSV file as: {output_csv_path}")

Data saved as CSV file as: channel_1_data.csv
