<a href="https://colab.research.google.com/github/bilalProgTech/mtech-nmims/blob/master/speech-recognition/Lab-Work/20220814-Lab-4-MTech-AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
os.environ['KAGGLE_CONFIG_DIR'] = '/content/'
!kaggle competitions download -c tensorflow-speech-recognition-challenge
!unzip *.zip

In [None]:
!7za x 'train.7z' 

In [None]:
import librosa
import librosa.display
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.metrics import accuracy_score, log_loss, confusion_matrix
import IPython.display as ipd
import plotly as py
import plotly.graph_objs as go

In [None]:
files = []
target_series = []
for dirname, _, filenames in os.walk('/content/train/audio/'):
    for filename in filenames:
        filepath = os.path.join(dirname, filename)
        target = filepath.split('/')[-2]
        if target == 'happy' or target == 'stop' or target == 'wow':
            target_series.append(target)
            files.append(filepath)
data = pd.DataFrame(target_series, columns=['target'])
data['filename'] = files
data = data.sample(frac=1)
data = data.reset_index(drop=True)
data.head()

In [None]:
def get_audio_feature_plots(path, target):
    x , sr = librosa.load(path)
    plt.figure(figsize=(20, 5))
    plt.title('Waveplot of '+target)
    librosa.display.waveplot(x, sr=sr)
    plt.show()

    plt.figure(figsize=(20, 6))
    plt.title('MFCC Spectral of '+target)
    mfccs = librosa.feature.mfcc(y=x, sr=sr) # n_mfcc
    librosa.display.specshow(mfccs, sr=sr, x_axis='time')
    print(mfccs.shape)
    plt.show()

    plt.figure(figsize=(20, 6))
    plt.title('Mel Spectrogram of '+target)
    mel_spec = librosa.feature.melspectrogram(y=x, sr=sr)
    librosa.display.specshow(mel_spec, sr=sr, x_axis='time')
    plt.show()

    zero_crossings = librosa.zero_crossings(x)
    print("Sum of zero crossing ", zero_crossings.sum())
    plt.figure(figsize=(20, 5))
    plt.title('Zero Crossing Rate of '+target)
    zcrs = librosa.feature.zero_crossing_rate(x)
    plt.plot(zcrs[0])
    plt.show()

In [None]:
sample = data[(data['target'] == 'wow')].sample(1)
get_audio_feature_plots(sample['filename'].values[0], sample['target'].values[0])
ipd.Audio(sample.filename.values[0])

In [None]:
sample = data[(data['target'] == 'stop')].sample(1)
get_audio_feature_plots(sample['filename'].values[0], sample['target'].values[0])
ipd.Audio(sample.filename.values[0])

In [None]:
sample = data[(data['target'] == 'happy')].sample(1)
get_audio_feature_plots(sample['filename'].values[0], sample['target'].values[0])
ipd.Audio(sample.filename.values[0])

# Feature Extractions

### Zero Cross Ratings

In [None]:
def create_zero_cross_rating_features(path):
    zcrs = []
    try:
        x , sr = librosa.load(path, res_type='kaiser_fast')
        zcrs = librosa.feature.zero_crossing_rate(x)[0]
    except:
        print('Error reading audio')
    return zcrs

In [None]:
%%time
X_df = pd.DataFrame(data['filename'].apply(lambda x: create_zero_cross_rating_features(x)).tolist())

In [None]:
X_df.head()

In [None]:
X_df.shape, data.shape

In [None]:
X_df.isnull().sum()

In [None]:
X_impute = X_df.copy()
X_impute = X_impute.fillna(0)
X_impute.isnull().sum().sum()

In [None]:
plt.figure(figsize=(8,8))
sns.heatmap(X_impute.corr(), vmax=1, square=True)

plt.title('Correlation between different zero cross rating features')
plt.show()

### PCA

In [None]:
# Create principal components
pca = PCA()
X_pca = pca.fit_transform(X_impute)

# Convert to dataframe
X_pca = pd.DataFrame(X_pca, columns=['PCA_'+str(i) for i in range(len(X_impute.columns))])

X_pca.head()

In [None]:
plt.figure(figsize=(8,8))
sns.heatmap(X_pca.corr(), vmax=1, square=True)

plt.title('Correlation between PCA Components')
plt.show()

In [None]:
loadings = pd.DataFrame(
    pca.components_.T,
    columns=X_pca.columns,
    index=X_impute.columns,
)
plt.figure(figsize=(8,8))
sns.heatmap(loadings, vmax=1, square=True, cmap='cubehelix')

plt.title('Loadings of PCA with zero cross rating')
plt.show()

In [None]:
plt.figure(figsize=(8,8))
sns.lineplot(np.arange(pca.n_components_) + 1, pca.explained_variance_ratio_, marker='o')
plt.title('Scree Plot')
plt.xlabel('Principal Component')
plt.ylabel('Variance Explained')
plt.show()

### KMeans

In [None]:
opt_val = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
    kmeans.fit(X_pca.iloc[:, :3])
    opt_val.append(kmeans.inertia_)

In [None]:
plt.figure(figsize=(10,5))
sns.lineplot(range(1, 11), opt_val, marker='o',color='red')
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('Optimal Value')
plt.show()

In [None]:
kmeans = KMeans(n_clusters = 3, init = 'k-means++', random_state = 42)
y_kmeans = kmeans.fit_predict(X_pca.iloc[:, :3])

In [None]:
trace1 = go.Scatter3d(
    x= X_pca['PCA_0'],
    y= X_pca['PCA_1'],
    z= X_pca['PCA_2'],
    mode='markers',
     marker=dict(
        color = y_kmeans, 
        size= 10,
        line=dict(
            color= y_kmeans,
            width= 12
        ),
        opacity=0.8
     )
)
data_plotly = [trace1]
layout = go.Layout(
    title= 'Clusters wrt three PCA Components',
    scene = dict(
            xaxis = dict(title  = 'PCA 0'),
            yaxis = dict(title  = 'PCA 1'),
            zaxis = dict(title  = 'PCA 2')
        )
)
fig = go.Figure(data=data_plotly, layout=layout)
fig.show()