# VFT Preprocessing

## Library settings

In [9]:
import os, sys, random, time, glob, warnings, shutil
import urllib
import librosa
import librosa.display
import sklearn
import torch
import torchvision
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import IPython.display as ipd

In [10]:
from PIL import Image
from tqdm import tqdm

In [11]:
directory = os.getcwd()
print("Current working directory is:", directory)

warnings.filterwarnings('ignore') ## ignoring warning messages

Current working directory is: C:\Users\user\Jupyter\ML_practices\projects\FNIRS_VFT


## MFCC convert trials

### Previous Trials

In [None]:
audio_path = "E:/RESEARCH/BRAIN/research_data/VFT/1/VerbalFluencyTest-001-1Stimulus-2.wav"

In [None]:
sample_rate = 22050

x = librosa.load(audio_path, sample_rate)[0]
S = librosa.feature.melspectrogram(x, sr=sample_rate, n_mels=128)
log_S = librosa.power_to_db(S, ref=np.max)
mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=5)

delta2_mfcc = librosa.feature.delta(mfcc, order=2)

In [None]:
plt.figure(figsize=(12, 4))
librosa.display.specshow(delta2_mfcc)
plt.ylabel('MFCC coeffs')
plt.xlabel('Time')
plt.title('MFCC')
plt.colorbar()
plt.tight_layout()

### New Trials

In [None]:
x, sr = librosa.load(audio_path) ## sr:Sample Rate
print("Time series x is:", x, "\nSampling rate sr is:", sr)

* Checking waveform

In [None]:
plt.figure(figsize = (20,5))
librosa.display.waveshow(x, sr = sr)

* Spectrogram

In [None]:
X = librosa.stft(x)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(20, 5))
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
plt.colorbar()

* Log frequency axis

In [None]:
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
plt.colorbar()

* Zero Crossing Rate

In [None]:
# Zooming in
n0 = 9000
n1 = 9100
plt.figure(figsize=(20, 5))
plt.plot(x[n0:n1])
plt.grid()

In [None]:
zero_crossings = librosa.zero_crossings(x[n0:n1], pad=False)
zero_crossings.shape

In [None]:
print(sum(zero_crossings))

* Spectral Centroid

In [None]:
spectral_centroids = librosa.feature.spectral_centroid(x, sr=sr)[0]
spectral_centroids.shape

In [None]:
# Computing the time variable for visualization
plt.figure(figsize=(20,5))
frames = range(len(spectral_centroids))
t = librosa.frames_to_time(frames)

# Normalising the spectral centroid for visualisation
def normalize(x, axis=0):
    return sklearn.preprocessing.minmax_scale(x, axis=axis)

#Plotting the Spectral Centroid along the waveform
librosa.display.waveshow(x, sr=sr, alpha=0.4)
plt.plot(t, normalize(spectral_centroids), color='r')

* Spectral Rolloff

In [None]:
plt.figure(figsize=(20,5))
spectral_rolloff = librosa.feature.spectral_rolloff(x+0.01, sr=sr)[0]
librosa.display.waveshow(x, sr=sr, alpha=0.4)
plt.plot(t, normalize(spectral_rolloff), color='r')
plt.grid()

* MFCC

In [None]:
plt.figure(figsize=(20,5))
x, fs = librosa.load(audio_path)
librosa.display.waveshow(x, sr=sr)

In [None]:
# MFCC
plt.figure(figsize=(20,5))
mfccs = librosa.feature.mfcc(x, sr=sr)
print(mfccs.shape)

# librosa.display.specshow(mfccs, sr=sr, x_axis='time')
librosa.display.specshow(mfccs, sr=sr)

* Feature Scaling

In [None]:
mfccs = sklearn.preprocessing.scale(mfccs, axis=1)
print(mfccs.mean(axis=1))
print(mfccs.var(axis=1))

In [None]:
plt.figure(figsize=(20,5))
# librosa.display.specshow(mfccs, sr=sr, x_axis='time')
librosa.display.specshow(mfccs, sr=sr)

* Chroma Frequencies

In [None]:
# Loadign the file
x, sr = librosa.load(audio_path)
ipd.Audio(x, rate=sr)

In [None]:
hop_length = 512
chromagram = librosa.feature.chroma_stft(x, sr=sr, hop_length=hop_length)
plt.figure(figsize=(20, 5))

# librosa.display.specshow(chromagram, x_axis='time', y_axis='chroma', hop_length=hop_length, cmap='coolwarm')
librosa.display.specshow(chromagram, hop_length=hop_length, cmap='coolwarm')

---

## Dataset settings

In [None]:
vft_dir = "E:/RESEARCH/BRAIN/research_data/VFT/"
vft_file_names = os.listdir(vft_dir)

In [None]:
## removing unnecessary files except for the wave file
def removing_unnec_files(path):
    i = 1
    for i in range(1, len(os.listdir(path))-1):
#         [os.remove(file) for file in glob.glob(path + str(i) + '/*.xml')]
#         [os.remove(file) for file in glob.glob(path + str(i) + '/*.edat3')]
#         [os.remove(file) for file in glob.glob(path + str(i) + '/*.txt')]
        
#         [os.remove(file) for file in glob.glob(path + str(i) + '/*.png')]
        [os.remove(file) for file in glob.glob(path + str(i) + '/*.wav')]
        i += 1

In [None]:
removing_unnec_files(vft_dir)

In [None]:
## removing all files including "BaseLine" 
def removing_baseline(path):
    i = 1
    for i in range(1, len(os.listdir(path))-1):
        [os.remove(file) for file in glob.glob(path + str(i) + '/*BaseLine*')]
        i += 1

In [None]:
removing_baseline(vft_dir)

In [None]:
def convert_mfcc(wav):
    x, sr = librosa.load(wav)
    plt.figure(figsize = (10, 10))
    mfccs = librosa.feature.mfcc(x, sr = sr)
    mfccs = sklearn.preprocessing.scale(mfccs, axis=1)
    librosa.display.specshow(mfccs, sr=sr)
    plt.savefig(wav + '.png')

In [None]:
def wav_to_mfcc(path):
    i = 1
    for i in range(1, len(os.listdir(path))-1):
        direc = path + str(i) + '/'
        for file in os.listdir(direc):
            convert_mfcc(direc + file)
        i += 1

In [None]:
# wav_to_mfcc(vft_dir)

In [None]:
dp = ['7','8','9','12','16','17','18','21','28','30','33','48','49','50','51','52','53','58','64','72','75','77','78','80','81','93','96','100','102','103','104','107','110','118','119']
hc = ['1','5','11','13','15','20','22','23','24','26','27','29','31','32','34','35','36','37','38','40','41','42','43','44','45','46','56','57','59','60','61','62','63','65','66','67','68',
      '69','70','74','82','83','85','86','87','88','89','91','94','95','105','106','108','109','111','112','113','115','117']
si = ['2','3','4','6','10','14','19','25','39','47','54','55','71','73','76','79','84','90','92','97','98','99','101','114','116']

In [None]:
len(si)

In [None]:
# for folder in vft_file_names:
#     if folder in dp:
#         shutil.move(vft_dir + folder, "E:/RESEARCH/BRAIN/research_data/VFT/DP/")
#     elif folder in hc:
#         shutil.move(vft_dir + folder, "E:/RESEARCH/BRAIN/research_data/VFT/HC/")
#     else:
#         shutil.move(vft_dir + folder, "E:/RESEARCH/BRAIN/research_data/VFT/SI/")

In [15]:
file_path = "E:/RESEARCH/BRAIN/research_data/VFT_2CLASS/DP"
file_names = os.listdir(file_path)

# Changing file names into numerical order
i = 1
for name in file_names:
    src = os.path.join(file_path, name)
    dst = str(i) + '.png'
    dst = os.path.join(file_path, dst)
    os.rename(src, dst)
    i += 1