In [1]:
import os
from pathlib import Path
import pandas as pd

import boto3
from io import BytesIO

In [2]:
from enum import Enum
class CODE_ENV(Enum):
    EC2=0 #Running in AWS EC2
    DEV=1 #Running in IOT Device
    WIN=2 #Running in Win
   
print(list(CODE_ENV))

#IMP: Update coding environment
code_env = CODE_ENV.WIN

[<CODE_ENV.EC2: 0>, <CODE_ENV.DEV: 1>, <CODE_ENV.WIN: 2>]


In [18]:
if code_env == CODE_ENV.EC2:
    #To access 's3' without any access key embedded following dependencies shall be met:
    # 1. Policy for user : Allow-S3-Passrole-to-EC2, AmazonS3FullAccess
    # 2. Role            : S3Admin

    aws_s3 = boto3.resource('s3')
    s3_bucket = aws_s3.Bucket('anomaly-detection-from-bearing-vibration-project-bucket')

    s3_bucket_objects=[]
    for s3_bucket_object in s3_bucket.objects.all():
        s3_bucket_objects.append(s3_bucket_object)

elif code_env == CODE_ENV.WIN:
    curr_dir=os.getcwd()
    dataset_root_path = Path(curr_dir+'/'+'capstone-data/01_PHM-Bearing')
    if not dataset_root_path.is_dir():
        print('Path ERROR!!!', str(dataset_root_path))

elif code_env == CODE_ENV.DEV:
    pass
        

In [29]:
if code_env == CODE_ENV.EC2:
    s3_objects_1st_dataset=[]
    s3_objects_2nd_dataset=[]
    s3_objects_3rd_dataset=[]
    paths = []

    for s3_object in s3_bucket_objects:
        path_parts = Path(s3_object.key).parts
        if len(path_parts) == 4 and path_parts[0] == 'data_input' and path_parts[1] == 'IMS':
            paths.append(s3_object)
            if path_parts[2] == '1st_test':
                s3_objects_1st_dataset.append(s3_object)
            elif path_parts[2] == '2nd_test':
                s3_objects_2nd_dataset.append(s3_object)
            else:
                s3_objects_3rd_dataset.append(s3_object)

    print('Number of files in 1st Dataset:', len(s3_objects_1st_dataset), 'first file=', s3_objects_1st_dataset[0].key)
    print('Number of files in 2nd Dataset:', len(s3_objects_2nd_dataset), 'first file=', s3_objects_2nd_dataset[0].key)
    print('Number of files in 3rd Dataset:', len(s3_objects_3rd_dataset), 'first file=', s3_objects_3rd_dataset[0].key)

elif code_env == CODE_ENV.WIN:
    data_set1_path = dataset_root_path.as_posix() + '/1st_test'
    data_set2_path = dataset_root_path.as_posix() + '/2nd_test'
    data_set3_path = dataset_root_path.as_posix() + '/3rd_test'
    filelist_1st_dataset = [data_set1_path+'/'+src_path for src_path in sorted(os.listdir(data_set1_path))]
    filelist_2nd_dataset = [data_set2_path+'/'+src_path for src_path in sorted(os.listdir(data_set2_path))]
    filelist_3rd_dataset = [data_set3_path+'/'+src_path for src_path in sorted(os.listdir(data_set3_path))]
    
    print('Number of files in 1st Dataset:', len(filelist_1st_dataset), 'first file=', filelist_1st_dataset[0])
    print('Number of files in 2nd Dataset:', len(filelist_2nd_dataset), 'first file=', filelist_2nd_dataset[0])
    print('Number of files in 3rd Dataset:', len(filelist_3rd_dataset), 'first file=', filelist_3rd_dataset[0])
    
    

Number of files in 1st Dataset: 2156 first file= g:/My Drive/github/bearing-vibration-anomaly-detection/models/capstone-data/01_PHM-Bearing/1st_test/2003.10.22.12.06.24
Number of files in 2nd Dataset: 984 first file= g:/My Drive/github/bearing-vibration-anomaly-detection/models/capstone-data/01_PHM-Bearing/2nd_test/2004.02.12.10.32.39
Number of files in 3rd Dataset: 6324 first file= g:/My Drive/github/bearing-vibration-anomaly-detection/models/capstone-data/01_PHM-Bearing/3rd_test/2004.03.04.09.27.46


In [31]:
#1st Set has 8 
col_names_1st = ['b1_ch1', 'b1_ch2', 'b2_ch3', 'b2_ch4', 'b3_ch5', 'b3_ch6', 'b4_ch7', 'b4_ch8']
#2nd and 3rd has 4
col_names_2nd_3rd = ['b1_ch1', 'b2_ch2', 'b3_ch3', 'b4_ch4']

col_names_set = [col_names_1st, col_names_2nd_3rd, col_names_2nd_3rd]
select_columns = [
        [
            ['b1_ch1', 'b1_ch2'],
            ['b2_ch3', 'b2_ch4'],
            ['b3_ch5', 'b3_ch6'],
            ['b4_ch7', 'b4_ch8']
        ],
        [
            ['b1_ch1'],
            ['b2_ch2'],
            ['b3_ch3'],
            ['b4_ch4'],
        ],
        [
            ['b1_ch1'],
            ['b2_ch2'],
            ['b3_ch3'],
            ['b4_ch4'],    
        ],
    ]

data_set_paths=[]
if code_env == CODE_ENV.EC2:
    data_set_paths= [s3_objects_1st_dataset, s3_objects_2nd_dataset, s3_objects_3rd_dataset]
    #Verify variables
    print('Number of files in 1st Dataset:', len(data_set_paths[0]), 'first file=', data_set_paths[0][0].key)
    print('Number of files in 2nd Dataset:', len(data_set_paths[1]), 'first file=', data_set_paths[1][0].key)
    print('Number of files in 3rd Dataset:', len(data_set_paths[2]), 'first file=', data_set_paths[2][0].key)

elif code_env == CODE_ENV.WIN:
    data_set_paths= [filelist_1st_dataset, filelist_2nd_dataset, filelist_3rd_dataset]
    #Verify variables
    print('Number of files in 1st Dataset:', len(data_set_paths[0]), 'first file=', data_set_paths[0][0])
    print('Number of files in 2nd Dataset:', len(data_set_paths[1]), 'first file=', data_set_paths[1][0])
    print('Number of files in 3rd Dataset:', len(data_set_paths[2]), 'first file=', data_set_paths[2][0])



Number of files in 1st Dataset: 2156 first file= g:/My Drive/github/bearing-vibration-anomaly-detection/models/capstone-data/01_PHM-Bearing/1st_test/2003.10.22.12.06.24
Number of files in 2nd Dataset: 984 first file= g:/My Drive/github/bearing-vibration-anomaly-detection/models/capstone-data/01_PHM-Bearing/2nd_test/2004.02.12.10.32.39
Number of files in 3rd Dataset: 6324 first file= g:/My Drive/github/bearing-vibration-anomaly-detection/models/capstone-data/01_PHM-Bearing/3rd_test/2004.03.04.09.27.46


In [32]:
select_data_set = 0
select_input_stepsize= 3000

#Trial: Reading content of file
df = pd.DataFrame()
if code_env == CODE_ENV.EC2:
    s3_object = data_set_paths[0][0]
    data = s3_object.get()['Body'].read()
    df = pd.read_csv(BytesIO(data), header=None, delimiter='\t', names=col_names_set[0], low_memory='False')
elif code_env == CODE_ENV.WIN:
    df = pd.read_csv(data_set_paths[0][0], header=None, delimiter='\t', names=col_names_set[0], low_memory='False')

df.head()

Unnamed: 0,b1_ch1,b1_ch2,b2_ch3,b2_ch4,b3_ch5,b3_ch6,b4_ch7,b4_ch8
0,-0.022,-0.039,-0.183,-0.054,-0.105,-0.134,-0.129,-0.142
1,-0.105,-0.017,-0.164,-0.183,-0.049,0.029,-0.115,-0.122
2,-0.183,-0.098,-0.195,-0.125,-0.005,-0.007,-0.171,-0.071
3,-0.178,-0.161,-0.159,-0.178,-0.1,-0.115,-0.112,-0.078
4,-0.208,-0.129,-0.261,-0.098,-0.151,-0.205,-0.063,-0.066
