## Imports:

In [1]:
import scipy.io
import os
import pandas
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.pylab as plt_lab
import matplotlib.gridspec as gridspec
import obspy
import pandas as pd

## Load data:
We are going to use this data to plot spectograms of SCG signals. 

In [2]:
path = "/Users/ecem/Desktop/gyrocardiogram/s-vs-r/"
diseased_s = np.load(path + "data/diseased-10sec-s.npy", allow_pickle= True)
diseased_r = np.load(path + "data/diseased-10sec-r.npy", allow_pickle= True)

print(diseased_s.shape)

(3, 1070, 2560)


In [3]:
def converter(array):
    x,y,z = [],[],[]
    for i in range(array.shape[1]):
        x.append(obspy.core.trace.Trace(array[0][i]))
        y.append(obspy.core.trace.Trace(array[1][i]))
        z.append(obspy.core.trace.Trace(array[2][i]))
        
        x[i].stats.sampling_rate = 256
        y[i].stats.sampling_rate = 256
        z[i].stats.sampling_rate = 256
    return [x,y,z]

In [4]:
diseased_s = converter(diseased_s)

diseased_r = converter(diseased_r)



## comparison of two dataset:

Now we will to try visualize spectograms. So far we have used Obspy to visualize SCG signals. However, with obspy we are not able to plot spectograms side by side. This is important for us because we want to see spectograms of x,y and z components simultaneously for both healty and diseased people. So, from now on I will refer to those plots, for 3 components of healty and diseased (in total six plot, as "PAIR".


I have mentioned that Obpsy has a problem in side by side plotting. So I changed it source code. I have added and deleted some part to operate according to my need. ???????????

Thats why from now on we are going to use **output_spectogram** function to get values needed to plot spectogram.

These are the imports for output_spectogram function to work.

In [5]:
from obspy.imaging.cm import obspy_sequential
import math
from matplotlib import mlab
from matplotlib.colors import Normalize

In [6]:
def _nearest_pow_2(x):
    a = math.pow(2, math.ceil(np.log2(x)))
    b = math.pow(2, math.floor(np.log2(x)))
    if abs(a - x) < abs(b - x):
        return a
    else:
        return b

In [7]:
def output_spectrogram(data, samp_rate, per_lap=0.9, wlen=None, log=False,
                outfile=None, fmt=None, axes=None, dbscale=False,
                mult=8.0, cmap=obspy_sequential, zorder=None, title=None,
                show=True, clip=[0.0, 1.0]):
 
    import matplotlib.pyplot as plt
    
    # enforce float for samp_rate
    samp_rate = float(samp_rate)

    # set wlen from samp_rate if not specified otherwise
    if not wlen:
        wlen = samp_rate / 100.

    npts = len(data)
    # nfft needs to be an integer, otherwise a deprecation will be raised
    # XXX add condition for too many windows => calculation takes for ever
    nfft = int(_nearest_pow_2(wlen * samp_rate))
    if nfft > npts:
        nfft = int(_nearest_pow_2(npts / 8.0))

    if mult is not None:
        mult = int(_nearest_pow_2(mult))
        mult = mult * nfft
    nlap = int(nfft * float(per_lap))

    data = data - np.array(data).mean()
    end = npts / samp_rate

    # Here we call not plt.specgram as this already produces a plot
    # matplotlib.mlab.specgram should be faster as it computes only the
    # arrays
    # XXX mlab.specgram uses fft, would be better and faster use rfft
    specgram, freq, time = mlab.specgram(data, Fs=samp_rate, NFFT=nfft,
                                         pad_to=mult, noverlap=nlap, mode = 'psd')

    # db scale and remove zero/offset for amplitude
    if dbscale:
        specgram = 10 * np.log10(specgram[1:, :])
    else:
        specgram = np.sqrt(specgram[1:, :])
    freq = freq[1:]
    
    vmin, vmax = clip
    if vmin < 0 or vmax > 1 or vmin >= vmax:
        msg = "Invalid parameters for clip option."
        raise ValueError(msg)
    _range = float(specgram.max() - specgram.min())
    vmin = specgram.min() + vmin * _range
    vmax = specgram.min() + vmax * _range
    norm = Normalize(vmin, vmax, clip=True)

    # calculate half bin width
    halfbin_time = (time[1] - time[0]) / 2.0
    halfbin_freq = (freq[1] - freq[0]) / 2.0

    # argument None is not allowed for kwargs on matplotlib python 3.3
    kwargs = {k: v for k, v in (('cmap', cmap), ('zorder', zorder))
              if v is not None}

    if log:
        # pcolor expects one bin more at the right end
        freq = np.concatenate((freq, [freq[-1] + 2 * halfbin_freq]))
        time = np.concatenate((time, [time[-1] + 2 * halfbin_time]))
        # center bin
        time -= halfbin_time
        freq -= halfbin_freq
        # Log scaling for frequency values (y-axis)
        ax.set_yscale('log')
        # Plot times
        ax.pcolormesh(time, freq, specgram, norm=norm, **kwargs)
    else:
        # this method is much much faster!
        specgram = np.flipud(specgram)
        # center bin
        extent = (time[0] - halfbin_time, time[-1] + halfbin_time,
                  freq[0] - halfbin_freq, freq[-1] + halfbin_freq)
    return [specgram, extent, end]

    



## Plotting spectogram:

f is the frequency array, containing the frequencies of every band of the fft. Which can be used as the labels for a graph

t is the time array, containing the time at which this FFT was made relative to the source signal. Again can be used for labels.

The Sxx array contains the amplitudes and is a 2d array whose shape is the length of f by the length of t.


 The purpose of a spectogram is to take the FFT of small, equal-sized time chunks. This produces a 2D fourier transform where the X axis is the start time of the time chunk and the Y axis is the energy (or power, etc.) in each frequency in that time chunk. This allows you to see how the frequency components change over time.
 
  A spectrogram is a representation of frequency over time with the addition of amplitude as a third dimension, denoting the intensity or volume of the signal at a frequency and a time.

In [8]:
def freq_average(array):
    means= []
    for i in range(array.shape[0]):
        means.append(array[i][0:40].mean())
    return np.array(means)
        

In [9]:
output_spectrogram(diseased_s[0][0].data, 256)[0].shape

(2048, 40)

In [10]:
freq_average(array = output_spectrogram(diseased_s[0][0].data, 256)[0])

array([0.00342719, 0.00512656, 0.00567603, ..., 0.42324948, 0.42909873,
       0.43112987])

In [11]:
freq_average(array = output_spectrogram(diseased_r[0][0].data, 256)[0]).shape

(2048,)

In [12]:
def spec_values(array):
    x_, y_, z_ = [],[],[]
    for i in range(len(array[0])):
        _x = output_spectrogram(array[0][i].data, 256)[0]
        _y = output_spectrogram(array[1][i].data, 256)[0]
        _z = output_spectrogram(array[2][i].data, 256)[0]

        x_.append(freq_average(array = _x))
        y_.append(freq_average(array = _y))
        z_.append(freq_average(array = _z)) 
        
        
    return np.stack((x_, y_, z_), axis = 0)

In [13]:
diseased_s = spec_values(diseased_s)
diseased_r = spec_values(diseased_r)

In [14]:
diseased_s[0][0]

array([0.00342719, 0.00512656, 0.00567603, ..., 0.42324948, 0.42909873,
       0.43112987])

In [15]:
diseased_s[0][30]

array([0.00958285, 0.01355766, 0.01349824, ..., 0.12608085, 0.12633624,
       0.12579942])

In [16]:
print(diseased_s.shape)
print(diseased_r.shape)


(3, 1070, 2048)
(3, 2017, 2048)


In [17]:
columns = []
for i in range(diseased_s.shape[2]):
    columns.append("freq_x{}".format(i))
    columns.append("freq_y{}".format(i))
    columns.append("freq_z{}".format(i))
print(columns)

['freq_x0', 'freq_y0', 'freq_z0', 'freq_x1', 'freq_y1', 'freq_z1', 'freq_x2', 'freq_y2', 'freq_z2', 'freq_x3', 'freq_y3', 'freq_z3', 'freq_x4', 'freq_y4', 'freq_z4', 'freq_x5', 'freq_y5', 'freq_z5', 'freq_x6', 'freq_y6', 'freq_z6', 'freq_x7', 'freq_y7', 'freq_z7', 'freq_x8', 'freq_y8', 'freq_z8', 'freq_x9', 'freq_y9', 'freq_z9', 'freq_x10', 'freq_y10', 'freq_z10', 'freq_x11', 'freq_y11', 'freq_z11', 'freq_x12', 'freq_y12', 'freq_z12', 'freq_x13', 'freq_y13', 'freq_z13', 'freq_x14', 'freq_y14', 'freq_z14', 'freq_x15', 'freq_y15', 'freq_z15', 'freq_x16', 'freq_y16', 'freq_z16', 'freq_x17', 'freq_y17', 'freq_z17', 'freq_x18', 'freq_y18', 'freq_z18', 'freq_x19', 'freq_y19', 'freq_z19', 'freq_x20', 'freq_y20', 'freq_z20', 'freq_x21', 'freq_y21', 'freq_z21', 'freq_x22', 'freq_y22', 'freq_z22', 'freq_x23', 'freq_y23', 'freq_z23', 'freq_x24', 'freq_y24', 'freq_z24', 'freq_x25', 'freq_y25', 'freq_z25', 'freq_x26', 'freq_y26', 'freq_z26', 'freq_x27', 'freq_y27', 'freq_z27', 'freq_x28', 'freq_y28

In [18]:
print(len(columns))

6144


In [19]:
def dataframe_creator(array, df = pd.DataFrame()):
    values = []
    for i in range(array.shape[1]):
        values.append(array[0][i][0:2048].tolist() + array[1][i][0:2048].tolist() 
                        + array [2][i][0:2048].tolist())

        
        medium = pd.DataFrame([values[i]], columns = columns)
        
        
        df = pd.concat([df, medium], axis = 0)
    return df 

In [20]:
df = dataframe_creator(diseased_s, 
                         df =pd.DataFrame(columns = columns))
df = dataframe_creator(diseased_r, 
                         df =df )

In [21]:
df = df.reset_index(drop= True)

In [22]:
df

Unnamed: 0,freq_x0,freq_y0,freq_z0,freq_x1,freq_y1,freq_z1,freq_x2,freq_y2,freq_z2,freq_x3,...,freq_z2044,freq_x2045,freq_y2045,freq_z2045,freq_x2046,freq_y2046,freq_z2046,freq_x2047,freq_y2047,freq_z2047
0,0.003427,0.005127,0.005676,0.006353,0.007078,0.007797,0.008472,0.009078,0.00961,0.010098,...,0.048074,0.051574,0.054949,0.057982,0.060547,0.062563,0.063992,0.064843,0.065181,0.065136
1,0.005258,0.007622,0.007945,0.008296,0.008626,0.00891,0.009148,0.009371,0.009616,0.009921,...,0.027374,0.029678,0.032078,0.034552,0.036875,0.038896,0.040515,0.041683,0.042398,0.042708
2,0.007979,0.011349,0.011402,0.011404,0.011338,0.0112,0.010997,0.010755,0.010524,0.010372,...,0.028008,0.030592,0.033098,0.035447,0.037558,0.03935,0.040772,0.0418,0.042438,0.042715
3,0.003781,0.005553,0.005987,0.006505,0.00704,0.007551,0.008016,0.008439,0.008825,0.009131,...,0.028074,0.029409,0.030634,0.031775,0.032806,0.033703,0.034519,0.035153,0.035573,0.03577
4,0.009495,0.013529,0.013682,0.01381,0.013878,0.013879,0.013838,0.013796,0.013739,0.013616,...,0.020864,0.022995,0.02512,0.027146,0.028956,0.030463,0.031618,0.032407,0.032848,0.033003
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3082,0.005453,0.007844,0.008084,0.008326,0.008522,0.008645,0.008687,0.008651,0.008561,0.008455,...,0.091397,0.091212,0.090284,0.088847,0.087394,0.086162,0.084872,0.083445,0.081755,0.080073
3083,0.005401,0.007962,0.008597,0.009344,0.010111,0.010841,0.011494,0.012073,0.012587,0.012953,...,0.266124,0.275595,0.286313,0.296864,0.306311,0.313949,0.319278,0.322037,0.322307,0.320785
3084,0.018487,0.027552,0.030742,0.034638,0.03872,0.042704,0.046408,0.049736,0.052701,0.055224,...,13.706523,14.085274,14.478497,14.826008,15.098279,15.300386,15.459038,15.529028,15.500269,15.411286
3085,0.249094,0.351497,0.34908,0.344997,0.339428,0.332734,0.325438,0.318325,0.312635,0.308642,...,33.853434,34.652292,35.200565,35.515092,35.61452,35.455935,35.078689,34.553846,33.989792,33.52198


In [24]:
df.to_csv(path + 'feature_extr/s-vs-r-spectogram_coef.csv')