# Voice Clips Analysis
- Convert clips to vector represetations
- Cluster and visualize clips
- Classify clips

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import IPython
from tqdm import tqdm
import pickle
from datetime import datetime
import time

## Data Preparation

In [None]:
init_names = {'ya':'researcher1', 'ja':'researcher2', 'uc':'researcher3', 'sk':'researcher4', 'mm':'researcher5'}

In [None]:
path_prefix = A_PATH_TO_YOUR_PROJECT_FOLDER

## Count the Clips for Individual Collectors

In [None]:
records = []
for init in init_names.keys():
    record ={}
    date = datetime.now().strftime("%Y-%m-%d")
    record['name'] = init_names[init]
    record['date'] = '06/17/2024-07/12/2024'

    engaging_path = path_prefix + "/data/{}/engaging".format(init_names[init])
    boring_path = path_prefix + "/data/{}/boring".format(init_names[init])

    record['engaging_folder'] = './data/{}/engaging'.format(init_names[init])
    record['engaging_count'] = len(os.listdir(engaging_path))
    record['boring_folder'] = '/data/{}/boring'.format(init_names[init])
    record['boring_count'] = len(os.listdir(boring_path))
    records.append(record)
pd.DataFrame(records)

In [None]:
records = []
for init in init_names.keys():
    engaging_path = path_prefix + "/data/{}/engaging".format(init_names[init])

    engaging_files = os.listdir(engaging_path)
    for file in tqdm(engaging_files):
        record = {}
        record['name'] = init_names[init]
        record['file'] = file
        record['path'] = engaging_path + '/' + file
        record['label'] = 'engaging'
        records.append(record)

    boring_path = path_prefix + "/data/{}/boring".format(init_names[init])

    boring_files = os.listdir(boring_path)
    for file in tqdm(boring_files):
        record = {}
        record['name'] = init_names[init]
        record['file'] = file
        record['path'] = boring_path + '/' + file
        record['label'] = 'boring'
        records.append(record)

In [None]:
all_data = pd.DataFrame(records)

In [None]:
all_data.shape

In [None]:
all_data.head()

In [None]:
all_data.label.value_counts()

In [None]:
all_data['label'].value_counts()

In [None]:
## visualize the number of engaging and boring data
plt.figure(figsize=(9, 5))
(all_data['label'].value_counts()/all_data.shape[0]).plot(kind='bar')
#plt.title("Label Distribution in Data Set")
# Rotate the tick labels and set their font size and bold
plt.xticks(rotation=30, fontsize=12, fontweight='bold')
plt.yticks(fontsize=12, fontweight='bold')

plt.xlabel("", fontsize=12, fontweight='bold')

plt.ylabel("Percentage of Label", fontsize=12, fontweight='bold')

plt.tight_layout()

## Sample an Engaging and Boring Clip

In [None]:
## Retrieve all engaginga and boring and randomly selection one
engaging_files = np.array(all_data.path[all_data.label == 'engaging'])
boring_files = np.array(all_data.path[all_data.label == 'boring'])

In [None]:
an_engaging_file = np.random.choice(engaging_files)
a_boring_file = np.random.choice(boring_files)

## Plot waveplots and spectograms
- Waveplots - Waveplots let us know the loudness of the audio at a given time.
- Spectograms - A spectrogram is a visual representation of the spectrum of frequencies of sound or other signals as they vary with time. It’s a representation of frequencies changing with respect to time for given audio/music signals.

In [None]:
Audio(an_engaging_file)

In [None]:
Audio(a_boring_file)

In [None]:
data, sample_rate = librosa.load(an_engaging_file)

In [None]:
def create_waveplot(data, sr, label):
    plt.figure(figsize=(10, 3))
    #plt.title('Waveplot for an audio with {} label'.format(label), size=15)
    librosa.display.waveshow(data, sr=sr)

    plt.xticks(rotation=30, fontsize=12, fontweight='bold')
    plt.yticks(fontsize=12, fontweight='bold')

    plt.xlabel("")
    plt.tight_layout()

    plt.show()

In [None]:
data, sampling_rate = librosa.load(an_engaging_file)

In [None]:
sampling_rate

In [None]:
create_waveplot(data, sampling_rate, label='engaging')
#Audio(an_engaging_file)

In [None]:
data, sampling_rate = librosa.load(a_boring_file)
create_waveplot(data, sampling_rate, label="boring")
Audio(a_boring_file)

In [None]:
def create_spectrogram(data, sr, label):
    # stft function converts the data into short term fourier transform
    X = librosa.stft(data)
    Xdb = librosa.amplitude_to_db(abs(X))
    plt.figure(figsize=(12, 3))
    plt.title('Spectrogram for an audio with {} label'.format(label), size=15)
    librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
    #librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
    plt.colorbar()

In [None]:
data, sampling_rate = librosa.load(an_engaging_file)
create_spectrogram(data, sampling_rate, "engaging")

In [None]:
data, sampling_rate = librosa.load(a_boring_file)
create_spectrogram(data, sampling_rate, 'boring')

## Visualize the Clustering of  Engaging and Boring Clips

In [None]:
all_data.columns

In [None]:
all_engaging = all_data[all_data.label == 'engaging']
all_boring = all_data[all_data.label == 'boring']

In [None]:
%%time
engaging_feature_vectors = []
for path in tqdm(all_engaging.path):
    features, sr = librosa.load(path)
    engaging_feature_vectors.append(features)

In [None]:
%%time
boring_feature_vectors = []
for path in tqdm(all_boring.path):
    features, sr = librosa.load(path)
    boring_feature_vectors.append(features)

In [None]:
def extract_mfcc_features(data, sample_rate=22050):
    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)

    return mfcc

In [None]:
engaging_features = []
for features in tqdm(engaging_feature_vectors):
    engaging_features.append(extract_mfcc_features(features))

In [None]:
engaging_features = np.array(engaging_features)
engaging_features.shape

In [None]:
boring_features = []
for features in tqdm(boring_feature_vectors):
    boring_features.append(extract_mfcc_features(features))
boring_features = np.array(boring_features)
boring_features.shape

In [None]:
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, random_state=0)

In [None]:
embedded_engaging_features = tsne.fit_transform(engaging_features)

In [None]:
embedded_boring_features = tsne.fit_transform(boring_features)

In [None]:
# Plotting
plt.figure(figsize=(8, 6))

# Plot engaging clips in red
plt.scatter(embedded_engaging_features[:, 0], embedded_engaging_features[:, 1], color='red', label='Engaging Clips')

# Plot boring clips in blue
plt.scatter(embedded_boring_features[:, 0], embedded_boring_features[:, 1], color='blue', label='Boring Clips')

plt.title('t-SNE Plot of 100 Engaging and 100 Boring Clips')
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.legend()
plt.grid(True)
plt.show()

## Classify and Visualize by Logistic Regression

In [None]:
X_train = np.concatenate((engaging_features, boring_features), axis=0)
X_train.shape

In [None]:
## 1 for engaging and 0 for boring
y_train = np.concatenate((np.ones(engaging_features.shape[0]), np.zeros(boring_features.shape[0])), axis=0)
y_train.shape

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_train_std.shape

In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(C=100.0, random_state=1, solver='lbfgs', multi_class='ovr')
lr.fit(X_train_std, y_train)

In [None]:
# predict the test data
ypred = lr.predict(X_train_std)

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(y_train, ypred)

In [None]:
# prompt: compute the comfusion matrix given true and predicted labels.

from sklearn.metrics import confusion_matrix

# Compute confusion matrix
cm = confusion_matrix(y_train, ypred)
print(cm)

In [None]:
import numpy as np
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):
    # Perform t-SNE dimensionality reduction to 2 dimensions

    #tsne = TSNE(n_components=2, random_state=0)
    #X_2d = tsne.fit_transform(X)

    pca = PCA(n_components=2)
    X_2d = pca.fit_transform(X)


    # setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])

    # plot the decision surface
    x1_min, x1_max = X_2d[:, 0].min() - 1, X_2d[:, 0].max() + 1
    x2_min, x2_max = X_2d[:, 1].min() - 1, X_2d[:, 1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))

    # Create input for prediction by transforming the mesh grid back to 20 dimensions
    mesh_input = np.c_[xx1.ravel(), xx2.ravel()]
    mesh_input_20d = pca.inverse_transform(mesh_input)
    Z = classifier.predict(mesh_input_20d)
    Z = Z.reshape(xx1.shape)

    plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())

    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X_2d[y == cl, 0],
                    y=X_2d[y == cl, 1],
                    alpha=0.8,
                    color=colors[idx],
                    marker=markers[idx],
                    label=cl,
                    edgecolor='black')

    # highlight test examples
    if test_idx:
        X_test_2d = tsne.fit_transform(X[test_idx, :])
        plt.scatter(X_test_2d[:, 0],
                    X_test_2d[:, 1],
                    c='none',
                    edgecolor='black',
                    alpha=1.0,
                    linewidth=1,
                    marker='o',
                    s=100,
                    label='test set')


In [None]:
plot_decision_regions(X_train_std, y_train,
                      classifier=lr, test_idx=None)
plt.xlabel('tsne-component 1')
plt.ylabel('tsne-component 2')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()