<a href="https://colab.research.google.com/github/fboldt/SignalProcessing/blob/master/cwru_segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CWRU files.


Associate each Matlab file name to a bearing condition in a Python dictionary. The dictionary keys identify the conditions.

There are only four normal conditions, with loads of 0, 1, 2 and 3 hp. All conditions end with an underscore character followed by an algarism representing the load applied during the acquisitions. The remaining conditions follow the pattern:

First two characters represent the bearing location, i.e. drive end (DE) and fan end (FE). The following two characters represent the failure location in the bearing, i.e. ball (BA), Inner Race (IR) and Outer Race (OR). The next three algarisms indicate the severity of the failure, where 007 stands for 0.007 inches and 0021 for 0.021 inches. For Outer Race failures, the character @ is followed by a number that indicates different load zones.



In [0]:
debug = False
# size of each segment
sample_size = 32768
if not debug:
  sample_size = 512
acquisitions = {}
# Normal
acquisitions["Normal_0"] = "97.mat"
acquisitions["Normal_1"] = "98.mat"
acquisitions["Normal_2"] = "99.mat"
acquisitions["Normal_3"] = "100.mat"
# DE Inner Race 0.007 inches
acquisitions["DEIR.007_0"] = "105.mat"
acquisitions["DEIR.007_1"] = "106.mat"
acquisitions["DEIR.007_2"] = "107.mat"
acquisitions["DEIR.007_3"] = "108.mat"
# DE Ball 0.007 inches
acquisitions["DEB.007_0"] = "118.mat"
acquisitions["DEB.007_1"] = "119.mat"
acquisitions["DEB.007_2"] = "120.mat"
acquisitions["DEB.007_3"] = "121.mat"
# DE Outer race 0.007 inches centered @6:00
acquisitions["DEOR.007@6_0"] = "130.mat"
acquisitions["DEOR.007@6_1"] = "131.mat"
acquisitions["DEOR.007@6_2"] = "132.mat"
acquisitions["DEOR.007@6_3"] = "133.mat"
# DE Outer race 0.007 inches centered @3:00
acquisitions["DEOR.007@3_0"] = "144.mat"
acquisitions["DEOR.007@3_1"] = "145.mat"
acquisitions["DEOR.007@3_2"] = "146.mat"
acquisitions["DEOR.007@3_3"] = "147.mat"
# DE Outer race 0.007 inches centered @12:00
acquisitions["DEOR.007@12_0"] = "156.mat"
acquisitions["DEOR.007@12_1"] = "158.mat"
acquisitions["DEOR.007@12_2"] = "159.mat"
acquisitions["DEOR.007@12_3"] = "160.mat"
# DE Inner Race 0.014 inches
acquisitions["DEIR.014_0"] = "169.mat"
acquisitions["DEIR.014_1"] = "170.mat"
acquisitions["DEIR.014_2"] = "171.mat"
acquisitions["DEIR.014_3"] = "172.mat"
# DE Ball 0.014 inches
acquisitions["DEB.014_0"] = "185.mat"
acquisitions["DEB.014_1"] = "186.mat"
acquisitions["DEB.014_2"] = "187.mat"
acquisitions["DEB.014_3"] = "188.mat"
# DE Outer race 0.014 inches centered @6:00
acquisitions["DEOR.014@6_0"] = "197.mat"
acquisitions["DEOR.014@6_1"] = "198.mat"
acquisitions["DEOR.014@6_2"] = "199.mat"
acquisitions["DEOR.014@6_3"] = "200.mat"
# DE Ball 0.021 inches
acquisitions["DEB.021_0"] = "222.mat"
acquisitions["DEB.021_1"] = "223.mat"
acquisitions["DEB.021_2"] = "224.mat"
acquisitions["DEB.021_3"] = "225.mat"
# FE Inner Race 0.021 inches
acquisitions["FEIR.021_0"] = "270.mat"
acquisitions["FEIR.021_1"] = "271.mat"
acquisitions["FEIR.021_2"] = "272.mat"
acquisitions["FEIR.021_3"] = "273.mat"
# FE Inner Race 0.014 inches
acquisitions["FEIR.014_0"] = "274.mat"
acquisitions["FEIR.014_1"] = "275.mat"
acquisitions["FEIR.014_2"] = "276.mat"
acquisitions["FEIR.014_3"] = "277.mat"
# FE Ball 0.007 inches
acquisitions["FEB.007_0"] = "282.mat"
acquisitions["FEB.007_1"] = "283.mat"
acquisitions["FEB.007_2"] = "284.mat"
acquisitions["FEB.007_3"] = "285.mat"
# DE Inner Race 0.021 inches
acquisitions["DEIR.021_0"] = "209.mat"
acquisitions["DEIR.021_1"] = "210.mat"
acquisitions["DEIR.021_2"] = "211.mat"
acquisitions["DEIR.021_3"] = "212.mat"
# DE Outer race 0.021 inches centered @6:00
acquisitions["DEOR.021@6_0"] = "234.mat"
acquisitions["DEOR.021@6_1"] = "235.mat"
acquisitions["DEOR.021@6_2"] = "236.mat"
acquisitions["DEOR.021@6_3"] = "237.mat"
# DE Outer race 0.021 inches centered @3:00
acquisitions["DEOR.021@3_0"] = "246.mat"
acquisitions["DEOR.021@3_1"] = "247.mat"
acquisitions["DEOR.021@3_2"] = "248.mat"
acquisitions["DEOR.021@3_3"] = "249.mat"
# DE Outer race 0.021 inches centered @12:00
acquisitions["DEOR.021@12_0"] = "258.mat"
acquisitions["DEOR.021@12_1"] = "259.mat"
acquisitions["DEOR.021@12_2"] = "260.mat"
acquisitions["DEOR.021@12_3"] = "261.mat"
# FE Inner Race 0.007 inches
acquisitions["FEIR.007_0"] = "278.mat"
acquisitions["FEIR.007_1"] = "279.mat"
acquisitions["FEIR.007_2"] = "280.mat"
acquisitions["FEIR.007_3"] = "281.mat"
# FE Ball 0.014 inches
acquisitions["FEB.014_0"] = "286.mat"
acquisitions["FEB.014_1"] = "287.mat"
acquisitions["FEB.014_2"] = "288.mat"
acquisitions["FEB.014_3"] = "289.mat"
# FE Ball 0.021 inches
acquisitions["FEB.021_0"] = "290.mat"
acquisitions["FEB.021_1"] = "291.mat"
acquisitions["FEB.021_2"] = "292.mat"
acquisitions["FEB.021_3"] = "293.mat"
# FE Outer race 0.007 inches centered @6:00
acquisitions["FEOR.007@6_0"] = "294.mat"
acquisitions["FEOR.007@6_1"] = "295.mat"
acquisitions["FEOR.007@6_2"] = "296.mat"
acquisitions["FEOR.007@6_3"] = "297.mat"
# FE Outer race 0.007 inches centered @3:00
acquisitions["FEOR.007@3_0"] = "298.mat"
acquisitions["FEOR.007@3_1"] = "299.mat"
acquisitions["FEOR.007@3_2"] = "300.mat"
acquisitions["FEOR.007@3_3"] = "301.mat"
# FE Outer race 0.007 inches centered @12:00
acquisitions["FEOR.007@12_0"] = "302.mat"
acquisitions["FEOR.007@12_1"] = "305.mat"
acquisitions["FEOR.007@12_2"] = "306.mat"
acquisitions["FEOR.007@12_3"] = "307.mat"
# FE Outer race 0.014 inches centered @3:00
acquisitions["FEOR.014@3_0"] = "310.mat"
acquisitions["FEOR.014@3_1"] = "309.mat"
acquisitions["FEOR.014@3_2"] = "311.mat"
acquisitions["FEOR.014@3_3"] = "312.mat"
# FE Outer race 0.014 inches centered @6:00
acquisitions["FEOR.014@6_0"] = "313.mat"
# FE Outer race 0.021 inches centered @6:00
acquisitions["FEOR.021@6_0"] = "315.mat"
# FE Outer race 0.021 inches centered @3:00
acquisitions["FEOR.021@3_1"] = "316.mat"
acquisitions["FEOR.021@3_2"] = "317.mat"
acquisitions["FEOR.021@3_3"] = "318.mat"
# DE Inner Race 0.028 inches
acquisitions["DEIR.028_0"] = "3001.mat"
acquisitions["DEIR.028_1"] = "3002.mat"
acquisitions["DEIR.028_2"] = "3003.mat"
acquisitions["DEIR.028_3"] = "3004.mat"
# DE Ball 0.028 inches
acquisitions["DEB.028_0"] = "3005.mat"
acquisitions["DEB.028_1"] = "3006.mat"
acquisitions["DEB.028_2"] = "3007.mat"
acquisitions["DEB.028_3"] = "3008.mat"

# Functions definitions


In [0]:
def get_labels_dict(acquisitions, separator='_', detectPosition=True):
  """Generate a dictionary linking the labels with values to keep consistence."""
  labels_dict = {}
  value = 0
  for key in acquisitions.keys():
    key = key.split('_')[0]
    key = key.split(separator)
    if key[0] == "Normal" or detectPosition:
      label = key[0]
    else:
      label = key[0][2:]
    if not label in labels_dict:
      labels_dict[label] = value
      value += 1
  return labels_dict

Convert Matlab file into tensors.


In [0]:
import scipy.io
import numpy as np
def acquisition2tensor(file_name, position=None, sample_size=sample_size):
  """
  Convert Matlab file into tensors.
  The file is divided in segments of sample_size values.
  """
  print(file_name, end=' ')
  matlab_file = scipy.io.loadmat(file_name)
  DE_samples = []
  FE_samples = []
  
  #signal segmentation
  signal_begin = 0
  if position == None:
    DE_time = [key for key in matlab_file if key.endswith("DE_time")][0] #Find the DRIVE END acquisition key name
    FE_time = [key for key in matlab_file if key.endswith("FE_time")][0] #Find the FAN END acquisition key name
    acquisition_size = max(len(matlab_file[DE_time]),len(matlab_file[FE_time]))
    while signal_begin + sample_size < acquisition_size:
      DE_samples.append([item for sublist in matlab_file[DE_time][signal_begin:signal_begin+sample_size] for item in sublist])
      FE_samples.append([item for sublist in matlab_file[FE_time][signal_begin:signal_begin+sample_size] for item in sublist])
      signal_begin += sample_size
    sample_tensor = np.stack([DE_samples,FE_samples],axis=2).astype('float32')
  elif position == 'DE':
    DE_time = [key for key in matlab_file if key.endswith("DE_time")][0] #Find the DRIVE END acquisition key name
    acquisition_size = len(matlab_file[DE_time])
    while signal_begin + sample_size < acquisition_size:
      DE_samples.append([item for sublist in matlab_file[DE_time][signal_begin:signal_begin+sample_size] for item in sublist])
      signal_begin += sample_size
    sample_tensor = np.stack([DE_samples],axis=2).astype('float32')
  elif position == 'FE':
    FE_time = [key for key in matlab_file if key.endswith("FE_time")][0] #Find the FAN END acquisition key name
    acquisition_size = len(matlab_file[FE_time])
    while signal_begin + sample_size < acquisition_size:
      FE_samples.append([item for sublist in matlab_file[FE_time][signal_begin:signal_begin+sample_size] for item in sublist])
      signal_begin += sample_size
    sample_tensor = np.stack([FE_samples],axis=2).astype('float32')
  return sample_tensor

Extract datasets from acquisitions.


In [0]:
def concatenate_datasets(xd,yd,xo,yo):
  """
  xd: destination patterns tensor
  yd: destination labels tensor
  xo: origin patterns tensor to be concateneted 
  yo: origin labels tensor to be concateneted 
  """
  if xd is None or yd is None:
    xd = xo
    yd = yo
  else:
    xd = np.concatenate((xd,xo))
    yd = np.concatenate((yd,yo))
  return xd,yd

import urllib.request
import os.path

def acquisitions_from_substr(substr, acquisitions, labels_dict, position=None,
                             url="http://csegroups.case.edu/sites/default/files/bearingdatacenter/files/Datafiles/"):
  """
  Extract samples from all files with some load.
  """
  samples = None
  labels = None
  for key in acquisitions:
    if str(substr) in key:
      file_name = acquisitions[key]
      if not os.path.exists(file_name):
        urllib.request.urlretrieve(url+file_name, file_name)
      if substr[:2] == key[:2] and position == None:
        acquisition_samples = acquisition2tensor(file_name)
      elif position =='DE':
        acquisition_samples = acquisition2tensor(file_name, 'DE')
      elif position =='FE':
        acquisition_samples = acquisition2tensor(file_name, 'FE')
      else:
        acquisition_samples = acquisition2tensor(file_name, key[:2])
      for label in labels_dict.keys():
        if label in key:
          break
      acquisition_labels = np.ones(acquisition_samples.shape[0])*labels_dict[label]
      samples,labels = concatenate_datasets(samples,labels,acquisition_samples,acquisition_labels)
  print(substr)
  return samples,labels

# Downloading and Matlab files


Extract samples.


In [0]:
labels_dict = get_labels_dict(acquisitions, '.', False)
print(labels_dict)
def normal_indenpendent_position_acquisitions(load,acquisitions,labels_dict):
  x,y = None,None
  for position in ['DE','FE']:
    xn,yn = acquisitions_from_substr('Normal_'+str(load),acquisitions,labels_dict,position)
    x,y = concatenate_datasets(x,y,xn,yn)
  return x,y

xn_0,yn_0 = normal_indenpendent_position_acquisitions(0,acquisitions,labels_dict)
xn_1,yn_1 = normal_indenpendent_position_acquisitions(1,acquisitions,labels_dict)
xn_2,yn_2 = normal_indenpendent_position_acquisitions(2,acquisitions,labels_dict)
xn_3,yn_3 = normal_indenpendent_position_acquisitions(3,acquisitions,labels_dict)

x007,y007 = acquisitions_from_substr('007',acquisitions,labels_dict)
x014,y014 = acquisitions_from_substr('014',acquisitions,labels_dict)
x021,y021 = acquisitions_from_substr('021',acquisitions,labels_dict)
x028,y028 = acquisitions_from_substr('028',acquisitions,labels_dict)

severities = ['007','014','021','028']

{'Normal': 0, 'IR': 1, 'B': 2, 'OR': 3}
97.mat Normal_0
97.mat Normal_0
98.mat Normal_1
98.mat Normal_1
99.mat Normal_2
99.mat Normal_2
100.mat Normal_3
100.mat Normal_3
105.mat 106.mat 107.mat 108.mat 118.mat 119.mat 120.mat 121.mat 130.mat 131.mat 132.mat 133.mat 144.mat 145.mat 146.mat 147.mat 156.mat 158.mat 159.mat 160.mat 282.mat 283.mat 284.mat 285.mat 278.mat 279.mat 280.mat 281.mat 294.mat 295.mat 296.mat 297.mat 298.mat 299.mat 300.mat 301.mat 302.mat 305.mat 306.mat 307.mat 007
169.mat 170.mat 171.mat 172.mat 185.mat 186.mat 187.mat 188.mat 197.mat 198.mat 199.mat 200.mat 274.mat 275.mat 276.mat 277.mat 286.mat 287.mat 288.mat 289.mat 310.mat 309.mat 311.mat 312.mat 313.mat 014
222.mat 223.mat 224.mat 225.mat 270.mat 271.mat 272.mat 273.mat 209.mat 210.mat 211.mat 212.mat 234.mat 235.mat 236.mat 237.mat 246.mat 247.mat 248.mat 249.mat 258.mat 259.mat 260.mat 261.mat 290.mat 291.mat 292.mat 293.mat 315.mat 316.mat 317.mat 318.mat 021
3001.mat 3002.mat 3003.mat 3004.mat 3005.m

Count number of samples.



In [0]:
print("Label", end='\t')
for s in severities:
  print(s, end='\t')
print("total")
mat = np.zeros((4,4))
i = 0
for label,value in labels_dict.items():
  print(label, end='\t')
  tsamples = 0
  if label == 'Normal':
    print(4*'\t'+'...')
    for load in range(4):
      print(' '+str((load+len(severities)+1)%4)+(load)*'\t', end='\t')
      mat[i][load] = list(eval('yn_'+str((load+len(severities)+1)%4))).count(value)
      print(int(mat[i][load]))
  else:
    for j,severity in enumerate(severities):
      tmp = eval('y'+str(severity))
      if tmp is not None:
        nsamples = list(tmp).count(value)
        mat[i][j] = nsamples
        print(nsamples, end='\t')
        tsamples += nsamples
      else:
        print('0', end='\t')
    print(tsamples)
  i+=1
total = np.sum(mat,axis=0)
print("Total:", end='\t')
for i in range(len(total)):
  print(int(total[i]), end='\t')
print(int(np.sum(total)))

Label	007	014	021	028	total
Normal					...
 1	1890
 2		1890
 3			1896
 0				952
IR	1900	1895	1894	946	6635
B	1894	1900	1897	944	6635
OR	5694	2131	3798	0	11623
Total:	11378	7816	9485	2842	31521


#Gerando CSV Files


In [0]:
import os

cond_dict = {v: k for k, v in labels_dict.items()}

def write_csv(severity, samples, labels):
  if not os.path.exists("cwru"):
    os.makedirs("cwru")
  sevdir = 'cwru/'+severity
  if not os.path.exists(sevdir):
    os.makedirs(sevdir)
  for i,value in enumerate(labels):
    condir = sevdir+'/'+cond_dict[value]
    if not os.path.exists(condir):
      os.makedirs(condir)
    sample_name = condir+'/'+str(i).zfill(len(str(labels.shape[0])))+'.csv'
    np.savetxt(sample_name,samples[i],delimiter=',')

In [0]:
def write_dataset(severity, normal_load):
  x,y = concatenate_datasets(eval('x'+severity),
                             eval('y'+severity),
                             eval('xn_'+str(normal_load)),
                             eval('yn_'+str(normal_load)))
  write_csv(severity,x,y)

write_dataset('007',1)
write_dataset('014',2)
write_dataset('021',3)
write_dataset('028',0)

In [0]:
import shutil
output_filename = 'cwru_segmented'
ext = 'zip'
shutil.make_archive(output_filename, ext, 'cwru')
zipfile_name = output_filename+'.'+ext

In [0]:
from google.colab import files
import time
while not os.path.exists(zipfile_name):
  time.sleep(1)
files.download(zipfile_name)