# Spectrogram and Power Spectrum Generator

This part of the project use Deep Learning to automatize feature extraction. For that propourse we convert waveforms to spectrograms images. This notebook can be avoid beacuse it data is provided in the file "spectrograms.zip"

<a href="https://colab.research.google.com/drive/1hP9gDx5AkAymNtzz9ZdeJgjlLtmZ0C-p#scrollTo=3-ruJnWwzFxn">Colab</a>



In [1]:
from scipy import signal
from scipy.fft import fftshift
import matplotlib.pyplot as plt
import pandas as pd
from Scripts.datosadash import datosADASH
import re
import os
import pandas as pd
import numpy as np
import pywt

In [3]:
# Now we want to get a dataframe with the relation between every spectrogram
# and the label. This label is the diagnostic made be the analyst. We import
# the csv "diagnosis.csv" where this information is saved. Then we create
# an empty dataframe to write the information of the spectrogram. These spectrogram
# will be merged with the diagnostics csv.

diag = pd.read_csv('Data/diagnosis.csv')
spectro_diag = pd.DataFrame(columns=['file','time'])

diag.head()

Unnamed: 0.1,Unnamed: 0,Nombre,diagnostico,Id_Estado_Activo_fixed,time,max_mms,machine
0,0,S3-Ventilador M38,Motor: Activo en buen estado.\n Ventilador: S...,2.0,2019-06-18 12:38:26.205,9.2617,S3-M38
1,1,S3-Ventilador M38,Motor: Activo en buen estado.\n Ventilador: S...,2.0,2019-06-18 12:38:41.249,6.4828,S3-M38
2,2,S3-Ventilador M38,Motor: Activo en buen estado.\n Ventilador: S...,2.0,2019-06-18 12:38:57.866,7.4407,S3-M38
3,3,S3-Ventilador M38,Motor: Activo en buen estado.\n Ventilador: S...,2.0,2019-06-18 12:39:08.660,6.9886,S3-M38
4,4,S3-Ventilador M38,Motor: Activo en buen estado.\n Ventilador: S...,2.0,2019-06-18 12:39:20.397,7.3701,S3-M38


In [4]:
# We select the folder where waveforms are and get machine, points and files
path = 'Data/Export'

folder = datosADASH(path)
machines = folder.getmachines()
points = folder.getpoints()
files = folder.getfiles()

In [32]:
# Spectrograms will be created with Scypy, it takes few minutes to generate them.

# First we create a folder to save Spectrograms
try:
    os.mkdir('Data/Export/Spectrograms')
except FileExistsError:
    pass

# And the create Spectrograms
for machine, points_iter in points.items():
    filteredmachines = list(filter(lambda x: machine in x, files))
    for iteration, point in enumerate(points_iter):
        acc_spec_text = "Aceleracion - Forma"
        filteredpoints = list(filter(lambda x: point in x, filteredmachines))
        filteredcsv = list(filter(lambda x:".csv" in x, filteredpoints))
                        
        # Get files by type of measurement
        accspec = list(filter(lambda x:acc_spec_text in x, filteredcsv))
                
                
        for filetime in accspec:
            forma = pd.read_csv(filetime, skiprows=1,
                                        delimiter=";", encoding = "ISO-8859-1")
            forma = forma.rename(columns={'time[ms]':'time',
                                                  ' amplitude[g]':'amplitude'})
            date = pd.read_csv(filetime, nrows=0,
                                        delimiter=";", encoding = "ISO-8859-1").columns[1][6:]
            fs = 12000
            fig,ax = plt.subplots(1)
            fig.subplots_adjust(left=0,right=1,bottom=0,top=1)
            ax.axis('off')
            # Create the spetrogrmas
            f, t, Sxx = signal.spectrogram(forma['amplitude'], fs)
            plt.pcolormesh(t, f, Sxx)
            ax.axis('tight')
            ax.axis('off')
            date_sub = re.sub('[^A-Za-z0-9]+','', date)
                    
            filename = str(date_sub) + "_" + str(machine) + "_" + str(point)
            folder = "Spectrograms"
            filename = os.path.join(path, folder, filename)
            filename = filename.replace('\\','/')
            fig.savefig(filename, transparent=True, pad_inches=0.0,
                                figsize=(96/96, 96/96), dpi=96)
            plt.close()
            
            spectro_diag = spectro_diag.append({"file":filename,
                                                "time":date,
                                                "machine":machine
                                                }, ignore_index=True)

  plt.pcolormesh(t, f, Sxx)
  fig.savefig(filename, transparent=True, pad_inches=0.0,


In [54]:
# Now we need to create a dataframe where images and status 
# are correlated.

def sorted_index_by_time(df):
    """Function for sorting values
    by time and set it to index"""
    df = df.sort_values(['time'])
    df['time'] = pd.to_datetime(df['time'])
    return df

diag = sorted_index_by_time(diag)
spectro_diag = sorted_index_by_time(spectro_diag)

merged_diagnostic = pd.merge_asof(spectro_diag, diag, on="time", by=['machine'],
                  tolerance=pd.Timedelta('10 days'), direction='nearest')

merged_diagnostic[['file', 'time', 'machine','Nombre', 'diagnostico',
       'Id_Estado_Activo_fixed']]

merged_diagnostic = merged_diagnostic.dropna()

In [55]:
# For deep learning will need to move the images to
# one folder for the two labels. 

try:
    os.mkdir('Data/Export/Spectrograms/class_a')
except FileExistsError:
    pass

try:
    os.mkdir('Data/Export/Spectrograms/class_b')
except FileExistsError:
    pass


merged_diagnostic.file = merged_diagnostic.file + '.png'

merged_diagnostic['filecut'] = merged_diagnostic['file'].str.split('/').str[-1]

folder_a = os.path.join(path, 'Spectrograms/class_a/')
folder_b = os.path.join(path, 'Spectrograms/class_b/')

for index, row in merged_diagnostic.iterrows():
    filecut = row.filecut
    file = row.file
    if row.Id_Estado_Activo_fixed == 1.0:
        final_filename = folder_a + filecut
        os.rename(file, final_filename)
    if row.Id_Estado_Activo_fixed > 1:
        final_filename = folder_b + filecut
        os.rename(file, final_filename)
    

In [63]:
# For deep learning will need to move the images to
# one folder for the two labels. 

try:
    os.mkdir('Data/Export/Spectrograms/prediction')
except FileExistsError:
    pass

import os, glob
for filename in glob.glob("Data/Export/Spectrograms/2021*"):
    filename_end = filename.split('/')[-1]
    path_pre = "Data/Export/Spectrograms/prediction/"
    final_filename= os.path.join(path_pre, filename_end)
    os.rename(filename, final_filename)

## Wavelets

We also are going to use power spectrums, the way to create them is similar to Spectrograms. In this case pywt library is used.

In [None]:
# First we create a folder to save Wavelets
try:
    os.mkdir('Data/Export/Wavelets')
except FileExistsError:
    pass

# Wavelets and power spectrums are generated with pywt
for machine, points_iter in points.items():
    filteredmachines = list(filter(lambda x: machine in x, files))
    for iteration, point in enumerate(points_iter):
        acc_spec_text = "Aceleracion - Forma"
        filteredpoints = list(filter(lambda x: point in x, filteredmachines))
        filteredcsv = list(filter(lambda x:".csv" in x, filteredpoints))
                        
        # Get files by type of measurement
        accspec = list(filter(lambda x:acc_spec_text in x, filteredcsv))
                
                
        for filetime in accspec:
            forma = pd.read_csv(filetime, skiprows=1,
                                        delimiter=";", encoding = "ISO-8859-1")
            forma = forma.rename(columns={'time[ms]':'time',
                                                  ' amplitude[g]':'amplitude'})
            date = pd.read_csv(filetime, nrows=0,
                                        delimiter=";", encoding = "ISO-8859-1").columns[1][6:]
            
            
            time = forma['time']
            sst = forma['amplitude']
            dt = time[1] - time[0]

            wavelet = 'cmor1.5-1.0'
            scales = np.arange(1, 128)

            [cfs, frequencies] = pywt.cwt(sst, scales, wavelet, dt)
            power = (abs(cfs)) ** 2

            period = 1. / frequencies
            levels = [0.0625, 0.125, 0.25, 0.5, 1, 2, 4, 8]
            dpi = 96


            f, ax = plt.subplots(1, figsize=(400/dpi, 400/dpi))
            f.subplots_adjust(left=0,right=1,bottom=0,top=1)
            ax.contourf(time, np.log2(period), np.log2(power), np.log2(levels),
                        extend='both')
            ax.invert_yaxis()
            ax.axis('off')
            
            
            date_sub = re.sub('[^A-Za-z0-9]+','', date)    
             
            filename = str(date_sub) + "_" + str(machine) + "_" + str(point)
            filename = filename.replace('\\','/')
            folder = "Wavelets"
            filename = os.path.join(path, folder, filename)
            filename = filename.replace('\\','/')
            
            
            f.savefig(filename, transparent=True, pad_inches=0.0,
                                figsize=(96/96, 96/96), dpi=96)
            
            plt.close()
            
            
            spectro_diag = spectro_diag.append({"file":filename,
                                                "time":date,
                                                "machine":machine
                                                }, ignore_index=True)

  f.savefig(filename, transparent=True, pad_inches=0.0,


In [None]:
# Now we need to create a dataframe where images and status 
# are correlated.

def sorted_index_by_time(df):
    """Function for sorting values
    by time and set it to index"""
    df = df.sort_values(['time'])
    df['time'] = pd.to_datetime(df['time'])
    return df

diag = sorted_index_by_time(diag)
spectro_diag = sorted_index_by_time(spectro_diag)

merged_diagnostic = pd.merge_asof(spectro_diag, diag, on="time", by=['machine'],
                  tolerance=pd.Timedelta('10 days'), direction='nearest')

merged_diagnostic[['file', 'time', 'machine','Nombre', 'diagnostico',
       'Id_Estado_Activo_fixed']]

merged_diagnostic = merged_diagnostic.dropna()

In [None]:
# For deep learning will need to move the images to
# one folder for every label. 

try:
    os.mkdir('Data/Export/Wavelets/class_a')
except FileExistsError:
    pass

try:
    os.mkdir('Data/Export/Wavelets/class_b')
except FileExistsError:
    pass


merged_diagnostic.file = merged_diagnostic.file + '.png'

merged_diagnostic['filecut'] = merged_diagnostic['file'].str.split('/').str[-1]

folder_a = os.path.join(path, 'Wavelets/class_a/')
folder_b = os.path.join(path, 'Wavelets/class_b/')

for index, row in merged_diagnostic.iterrows():
    filecut = row.filecut
    file = row.file
    if row.Id_Estado_Activo_fixed == 1.0:
        final_filename = folder_a + filecut
        os.rename(file, final_filename)
    if row.Id_Estado_Activo_fixed > 1:
        final_filename = folder_b + filecut
        os.rename(file, final_filename)

In [None]:
# For deep learning will need to move the images to
# one folder for the two labels. 

try:
    os.mkdir('Data/Export/Wavelets/prediction')
except FileExistsError:
    pass

import os, glob
for filename in glob.glob("Data/Export/Wavelets/2021*"):
    filename_end = filename.split('/')[-1]
    path_pre = "Data/Export/Wavelets/prediction/"
    final_filename= os.path.join(path_pre, filename_end)
    os.rename(filename, final_filename)