In [None]:
import os
import numpy as np
import librosa
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from scipy.stats import multivariate_normal

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
orig_dir = os.getcwd()
os.chdir('/content/drive/MyDrive')
from mfcc import MFCC
os.chdir(orig_dir)

In [None]:
def extract_features(file_path):
    mfccs = MFCC(file_path)
    return mfccs.T


data_dir = '/content/drive/MyDrive/SPEECH_A6/sre_new_dataset'
speaker_dirs = os.listdir(data_dir)
features = []
labels = []

for label, speaker_dir in enumerate(speaker_dirs):
    speaker_path = os.path.join(data_dir, speaker_dir)
    for wav_file in os.listdir(speaker_path):
        file_path = os.path.join(speaker_path, wav_file)
        features.append(extract_features(file_path))
        labels.append(label)

In [None]:
features[0].shape

(3597, 42)

In [None]:
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.2)

In [None]:
print(len(features), len(train_features), len(test_features))
print(features[0].shape)

700 560 140
(3597, 42)


In [None]:
ubm_features = np.concatenate(train_features)
print(ubm_features.shape)

(2614154, 42)


In [None]:
class KMeans():
    def __init__(self, n_clusters, max_iters = 20, threshold = 1000):
        self.n_clusters = n_clusters
        self.max_iters = max_iters
        self.threshold = threshold

    def fit(self, X):
        self.centroids = X[np.random.choice(X.shape[0], self.n_clusters, replace=False), :]
        prev_centroids = self.centroids.copy()

        for i in range(self.max_iters):
            distances = self._get_distances(X)
            labels = np.argmin(distances, axis=1)

            for j in range(self.n_clusters):
                self.centroids[j] = np.mean(X[labels == j], axis=0)

            new_centroids = self.centroids
            centroid_distance = np.linalg.norm(new_centroids - prev_centroids)
            print(centroid_distance)
            if centroid_distance < self.threshold:
              print(f'Kmeans converged in {i+1} iterations')
              break
            prev_centroids = self.centroids.copy()
        self.labels = labels

    def _get_distances(self, X):
        distances = np.zeros((X.shape[0], self.n_clusters))

        for i in range(self.n_clusters):
            distances[:, i] = np.linalg.norm(X - self.centroids[i], axis=1)

        return distances

    def get_means(self):
      return self.centroids

    def get_labels(self):
      return self.labels


In [None]:
kmeans = KMeans(100)
kmeans.fit(ubm_features)

212472.54303980473
3341689.70959963
2690190.8278467082
260879.36447235712
53941.38250697448
84124.7616146722
544010.3099038219
2184195.0290925214
1029239.1552596877
600469.8273902357
346687.1657076922
207075.9534210261
107771.20970379224
85785.52979051265
60270.486612271445
40821.5374832921
24260.620926993328
12427.97937530196
11542.117986756832
10535.852182134015


In [None]:
data = ubm_features
labels = kmeans.get_labels()
print(data.shape)
print(labels.shape)
print(np.unique(labels))

(2614154, 42)
(2614154,)
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]


In [None]:
data = ubm_features
labels = kmeans.get_labels()
n_clusters = len(np.unique(labels))
n_features = data.shape[1]

cluster_means = {}
for label in np.unique(labels):
    cluster_data = data[labels == label]
    cluster_means[label] = np.mean(cluster_data, axis=0)

covariances = np.zeros((n_clusters, n_features, n_features))
counts = np.zeros(n_clusters)
for label in np.unique(labels):
    covariances[label] = np.zeros((data.shape[1], data.shape[1]))
    counts[label] = 0

for i in range(data.shape[0]):
    label = labels[i]
    x = data[i, :]
    mean = cluster_means[label]
    covariances[label] += np.outer(x - mean, x - mean)
    counts[label] += 1

for label in np.unique(labels):
    covariances[label] /= counts[label]

In [None]:
labels = kmeans.get_labels()
count_list = np.zeros(100)
for i in range(len(labels)):
  count_list[labels[i]] += 1

print(count_list)

[  4483.   3267.  65225.  19992. 182328.   2173.   8133.  10308.   1944.
   9385.  17314.   4241.   5851.  20192.  21446.  10873.   5229.   5732.
   3587.   9363. 191718.  65072. 168760.  13349.  10910.  13229.   9818.
  13312.  10956.  10190.   2959.  38096.   6482.   6684.  10276.  22655.
    304.  45139.  21147.   7700.   6908.  76750.   3989.  18957.  20272.
  35102.   9597.  21974.   6266.   6245.   8916.   5572.   5194.  24467.
   3737.   3226. 107224.   2319.  10331.   6369.   2690.   6511. 106551.
 180425.  13328.   6877.    243.  40889.  20010.   6953.  11325.  28041.
   7409.  12179. 126319.   6328.   8297.  18130.   9112.   5549.  14044.
  19443.   6447.  30577.  51552.  16949.  20128.   8576.  16053.   3448.
   7249. 120059.   9311.  58567. 108936.  17691.   4406.   7428.   5719.
   7168.]


In [None]:
def diag_det(matrix):
  d = matrix.shape[0]
  log_prod = 0
  log_list = []
  for i in range(d):
    log_list.append(np.log(matrix[i,i]+1e-6))
  log_list = np.nan_to_num(log_list, nan=0)
  log_prod = np.sum(log_list)
  prod = np.exp(log_prod)
  return prod

In [None]:
def diag_inv(matrix):
  inverse_matrix = []
  for i in range(len(matrix)):
    row = []
    for j in range(len(matrix)):
      if i == j:
        row.append(1 / (matrix[i][j]+1e-6))
      else:
        row.append(0)
    inverse_matrix.append(row)
  return np.array(inverse_matrix)

In [None]:
# def gaussian_pdf(x, mu, sigma):
#     d = mu.shape[0]
#     inv_sigma = diag_inv(sigma)
#     det_sigma = diag_det(sigma)
#     norm_const = 1 / ((2*np.pi)**(d/2) * np.sqrt(det_sigma))
#     diff = x - mu
#     exponent = -0.5 * np.sum(np.dot(diff, inv_sigma) * diff, axis=1)
#     pdf = norm_const * np.exp(exponent)
#     return pdf

In [None]:
def gaussian_pdf(x, mu, sigma):
    d = mu.shape[0]
    inv_sigma = diag_inv(sigma)
    det_sigma = diag_det(sigma)
    norm_const = 1 / ((2*np.pi)**(d/2) * np.sqrt(det_sigma))
    diff = x - mu
    exponent = -0.5 * np.sum(np.dot(diff, inv_sigma) * diff, axis=1)
    pdf = norm_const * np.exp(exponent)
    return pdf

In [None]:
n_components = 20
init_means = kmeans.get_means().copy()
init_covariances = covariances
init_weights = np.ones(n_components) / n_components

In [None]:
import numpy as np

class GMM():
    def __init__(self, n_components, max_iter = 10, tol=1e-4):
        self.n_components = n_components
        self.max_iter = max_iter
        self.tol = tol

    def _init_params(self, X, means, covariances, weights):
        n_samples, n_features = X.shape
        self.means_ = means.copy()
        self.covariances_ = covariances.copy()
        self.weights_ = weights.copy()

    def _estimate_posterior(self, X):
        n_samples = X.shape[0]
        likelihoods = np.zeros((n_samples, self.n_components))
        for j in range(self.n_components):
          likelihoods[:, j] = self.weights_[j] * gaussian_pdf(X, self.means_[j], self.covariances_[j])
        posteriors = likelihoods/np.sum(likelihoods, axis=1, keepdims= True)
        posteriors = np.nan_to_num(posteriors)
        scores = likelihoods.sum(axis = 1)
        scores[scores == 0] = 1e-150
        log_likelihoods = np.log(scores)
        total_log_likelihood = np.sum(log_likelihoods)
        return total_log_likelihood, posteriors


    def fit(self, X):
        self._init_params(X, init_means, init_covariances, init_weights)
        prev_log_likelihood = -np.inf
        for i in range(self.max_iter):
            # E-step
            log_likelihood, posteriors = self._estimate_posterior(X)
            Ns = np.sum(posteriors, axis= 0)

            # M-step
            for j in range(self.n_components):
                self.means_[j] = np.sum(posteriors[:, j].reshape(-1,1)*X, axis=0)/Ns[j]
                diff = X - self.means_[j]
                self.covariances_[j] = np.sum(posteriors[:, j, np.newaxis] * diff **2, axis=0) / Ns[j]
                self.weights_[j] = Ns[j]/np.sum(Ns)

            if abs(log_likelihood - prev_log_likelihood) < self.tol:
              break

            if i > 0:
              print(f'Change in Log Likelihood: {log_likelihood - prev_log_likelihood}')

            prev_log_likelihood = log_likelihood.copy()

    def predict(self, X):
      likelihood, posteriors = self._estimate_posterior(X)
      return posteriors

    def score(self, X):
      likelihood, _ = self._estimate_posterior(X)
      return likelihood

    def get_means(self):
        return self.means_

    def get_covariances(self):
        return self.covariances_

    def get_weights(self):
        return self.weights_


In [None]:
ubm_gmm = GMM(20)
ubm_gmm.fit(ubm_features)

  posteriors = likelihoods/np.sum(likelihoods, axis=1, keepdims= True)


Change in Log Likelihood: 1506742.7330776155
Change in Log Likelihood: 514593.8558846712
Change in Log Likelihood: 382591.7030055523
Change in Log Likelihood: 309045.28001469374
Change in Log Likelihood: 249095.39559558034
Change in Log Likelihood: 195015.1250398457
Change in Log Likelihood: 153036.85146540403
Change in Log Likelihood: 121529.21259361506
Change in Log Likelihood: 98105.82557073236


In [None]:
posterior_prob = ubm_gmm.predict(ubm_features)
eff_points = posterior_prob.sum(axis=0)
print(eff_points)

[11985.29407075 59989.86518117 32495.79800211 22477.51989604
 32877.83231724 18757.56881645 16271.6320281  19983.07137618
 10331.23240304 44968.42193793  9020.00922832 63277.90818516
 11874.88886629 18547.92069901   165.30175731 25614.43641456
 30789.20623175 21101.2543835   6662.08968178 64047.74852324]


  posteriors = likelihoods/np.sum(likelihoods, axis=1, keepdims= True)


In [None]:
def get_super_vector(ubm_gmm, test_utterance):
    post_probs = ubm_gmm.predict(test_utterance)
    N_k = post_probs.sum(axis=0)
    relevance_factor = 50*np.mean(N_k)
    means = (post_probs.T @ test_utterance)/(N_k[:, np.newaxis])
    means = np.nan_to_num(means, nan=0)
    alpha_k = [[i/(i+relevance_factor)] for i in N_k]
    alpha_k = np.array(alpha_k)
    ubm_means = ubm_gmm.get_means()
    new_means = ubm_means + alpha_k*(means - ubm_means)
    supervector = new_means.reshape(-1)
    return supervector

In [None]:
labels

array([14, 15,  5, ..., 11, 11, 19])

In [None]:
print(len(train_features))

112


In [None]:
train_super_vectors=[]
for i, x in enumerate(train_features):
  train_super_vectors.append(get_super_vector(ubm_gmm,x))

  posteriors = likelihoods/np.sum(likelihoods, axis=1, keepdims= True)


In [None]:
len(train_super_vectors)

112

In [None]:
def map_adaptation(ubm_gmm, speaker_features, relevance_factor=5, eps=1e-8):

    post_probs = ubm_gmm.predict(speaker_features)
    N_k = post_probs.sum(axis=0)
    relevance_factor = 0.05*np.mean(N_k)
    means = (post_probs.T @ speaker_features)/(N_k[:, np.newaxis])
    means = np.nan_to_num(means, nan=0)
    alpha_k = [[i/(i+relevance_factor)] for i in N_k]
    alpha_k = np.array(alpha_k)
    print(alpha_k)
    ubm_means = ubm_gmm.get_means()
    new_means = ubm_means + alpha_k*(means - ubm_means)
    adapted_gmm = GMM(20)
    adapted_gmm._init_params(speaker_features, new_means, ubm_gmm.get_covariances(), ubm_gmm.get_weights())

    return adapted_gmm


speaker_gmms = []
for label in set(train_labels):
    speaker_features = [feat for feat, lbl in zip(train_features, train_labels) if lbl == label]
    speaker_features = np.concatenate(speaker_features)
    adapted_gmm = map_adaptation(ubm_gmm, speaker_features)
    speaker_gmms.append(adapted_gmm)

[[0.87496002]
 [0.98444459]
 [0.96950505]
 [0.93450043]
 [0.95832867]
 [0.94487253]
 [0.83300874]
 [0.92694066]
 [0.80550365]
 [0.94206619]
 [0.69186397]
 [0.98396422]
 [0.8491131 ]
 [0.9463902 ]
 [0.12769354]
 [0.96017255]
 [0.95727447]
 [0.86516626]
 [0.55345311]
 [0.98467166]]
[[0.5293148 ]
 [0.86936453]
 [0.41496166]
 [0.64236062]
 [0.91598916]
 [0.6304223 ]
 [0.98453287]
 [0.39115117]
 [0.95237997]
 [0.99297935]
 [0.63629318]
 [0.94777049]
 [0.55351077]
 [0.74174475]
 [0.08455945]
 [0.3157024 ]
 [0.92970438]
 [0.98752255]
 [0.94287967]
 [0.94718947]]
[[0.91189723]
 [0.97016197]
 [0.95556418]
 [0.93649963]
 [0.96380245]
 [0.93238151]
 [0.91768417]
 [0.93493913]
 [0.91589904]
 [0.97557326]
 [0.90909354]
 [0.98014368]
 [0.91492998]
 [0.94333271]
 [0.15420862]
 [0.94880738]
 [0.96199411]
 [0.93844873]
 [0.88631888]
 [0.98050111]]
[[0.35494533]
 [0.8661611 ]
 [0.2060638 ]
 [0.49411148]
 [0.91110039]
 [0.39333461]
 [0.98525822]
 [0.19995232]
 [0.94994202]
 [0.99315027]
 [0.51289015]
 [0

  posteriors = likelihoods/np.sum(likelihoods, axis=1, keepdims= True)


[[0.89708892]
 [0.99395806]
 [0.9691702 ]
 [0.9322387 ]
 [0.93712462]
 [0.90080598]
 [0.05206757]
 [0.94418053]
 [0.57795305]
 [0.55980561]
 [0.8581453 ]
 [0.97556398]
 [0.87419445]
 [0.89812021]
 [0.04444278]
 [0.95661614]
 [0.92971113]
 [0.00547287]
 [0.41116272]
 [0.97573902]]
[[0.74333704]
 [0.93526746]
 [0.78475152]
 [0.81906375]
 [0.95697028]
 [0.86151282]
 [0.97689743]
 [0.65698453]
 [0.95639848]
 [0.99031574]
 [0.7859742 ]
 [0.96819905]
 [0.77037519]
 [0.9018186 ]
 [0.09558517]
 [0.80516738]
 [0.95800946]
 [0.98164   ]
 [0.94274068]
 [0.97141604]]
[[0.78940912]
 [0.9555058 ]
 [0.82907945]
 [0.86530722]
 [0.94973689]
 [0.89259277]
 [0.97099142]
 [0.74419268]
 [0.94747634]
 [0.99101121]
 [0.73176384]
 [0.97498028]
 [0.80877937]
 [0.91246619]
 [0.11357183]
 [0.83875148]
 [0.94766829]
 [0.9779819 ]
 [0.91900043]
 [0.97302674]]
[[0.92046812]
 [0.96984307]
 [0.97078156]
 [0.95975831]
 [0.97256104]
 [0.94451255]
 [0.73189583]
 [0.95775312]
 [0.85100878]
 [0.88164427]
 [0.91939093]
 [0

In [None]:
def identify_speaker(gmms, test_feature):
    speaker_scores = np.array([gmm.score(test_feature) for gmm in gmms])
    ubm_scores = ubm_gmm.score(test_feature)
    scores = (speaker_scores - ubm_scores)
    return np.argmax(scores)

predictions = [identify_speaker(speaker_gmms, test_feature) for test_feature in test_features]

In [None]:
print(predictions)
print(test_labels)

[0, 16, 9, 4, 17, 4, 1, 8, 7, 18, 19, 7, 18, 9, 13, 1, 5, 11, 13, 10, 9, 2, 14, 0, 0, 13, 18, 9]
[0, 16, 9, 2, 17, 4, 1, 8, 7, 18, 19, 7, 18, 9, 13, 1, 6, 11, 13, 10, 9, 2, 14, 0, 0, 7, 18, 13]


In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix

accuracy = accuracy_score(test_labels, predictions)
conf_matrix = confusion_matrix(test_labels, predictions)

print("Accuracy: ", accuracy*100)
print("Confusion Matrix: \n", conf_matrix)

Accuracy:  85.71428571428571
Confusion Matrix: 
 [[3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 2 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 2 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]]


In [None]:
speaker_features

array([[-6.23018921e+02,  4.53085846e+02, -1.87472122e+02, ...,
         9.00942032e-05, -3.58281765e-04, -2.32152940e-04],
       [-5.51705994e+02,  5.65153381e+02, -2.36920883e+02, ...,
         3.05704307e-04,  8.71633890e-03, -1.42788527e-04],
       [-5.28438171e+02,  5.88512878e+02, -2.14429092e+02, ...,
         4.63694858e-04,  4.09147238e-03, -2.28414597e-04],
       ...,
       [-5.93146545e+02,  6.18139465e+02, -1.77309097e+02, ...,
         1.06757303e-04, -1.46520083e-06,  5.16698929e-07],
       [-6.60111877e+02,  6.44647705e+02, -2.27034882e+02, ...,
         1.45346465e-04,  1.08848578e-06, -6.90398218e-07],
       [-6.75001953e+02,  6.17803772e+02, -2.55973633e+02, ...,
         2.89551332e-04, -8.51298595e-08,  1.74868881e-07]])

In [None]:
len(train_super_vectors)

112

In [None]:
# import numpy as np
# from sklearn.decomposition import FactorAnalysis

# # Function to compute supervectors for each speaker (Replace this with your supervector extraction method)


# # Dummy data for demonstration (Replace this with your actual speech data)
# # speech_data should be a list where each element corresponds to a speaker's speech data


# # Perform TVS using Factor Analysis
# num_components = 50 # You can set the desired number of components for the TVS
# tvs = FactorAnalysis(n_components=num_components)
# tvs.fit(train_super_vectors)

# # Get the TV matrix
# tvs_matrix = tvs.components_

# # Each row of tvs_matrix represents a factor that defines the variation in the supervectors.

# # Now you can use this TV matrix to transform new supervectors for unseen speakers.
# # For example, if you have a new supervector for a speaker:
# train_transform_vectors=[]
# for i in train_super_vectors:
#   train_transform_vectors.append(tvs.transform([i])[0])





In [None]:
len(train_super_vectors)

112

In [None]:
import numpy as np

class TVS:
    def __init__(self, num_components):
        self.num_components = num_components
        self.transform_matrix = None

    def fit_transform(self, supervectors):
        # Convert list of supervectors to a 2D numpy array
        supervectors = np.array(supervectors)

        # Compute the mean supervector
        mean_supervector = np.mean(supervectors, axis=0, keepdims=True)

        # Center the supervectors
        centered_supervectors = supervectors - mean_supervector

        # Compute the supervector covariance matrix
        covariance_matrix = np.cov(centered_supervectors, rowvar=False)

        # Perform eigenvalue decomposition on the covariance matrix
        eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)

        # Sort eigenvalues and eigenvectors in descending order
        sorted_indices = np.argsort(eigenvalues)[::-1]
        sorted_eigenvalues = eigenvalues[sorted_indices]
        sorted_eigenvectors = eigenvectors[:, sorted_indices]

        # Select the top 'num_components' eigenvectors
        self.transform_matrix = sorted_eigenvectors[:, :self.num_components]

        # Transform the supervectors to TVS space
        transformed_supervectors = centered_supervectors.dot(self.transform_matrix)

        return transformed_supervectors

    def transform(self, supervectors):
        # Check if the transform matrix is available
        if self.transform_matrix is None:
            raise ValueError("TVS not fitted. Please call fit_transform first.")

        # Convert list of supervectors to a 2D numpy array
        supervectors = np.array(supervectors)

        # Center the supervectors using the previously computed mean supervector
        centered_supervectors = supervectors - np.mean(supervectors, axis=0, keepdims=True)

        # Transform the centered supervectors to TVS space
        transformed_supervectors = centered_supervectors.dot(self.transform_matrix)

        return transformed_supervectors

# Dummy data for illustration (Replace this with your actual speech data)
# Each element of 'supervectors' list should be a 1D array representing a supervector
supervectors = [np.random.randn(50) for _ in range(5)]  # 5 speakers with 50-dimensional supervectors each

# Create a TVS instance and fit_transform the supervectors
num_components = 50  # You can set the desired number of components for the TVS
tvs = TVS(num_components)
transformed_supervectors = tvs.fit_transform(train_super_vectors)

# Now you can use the transformed supervectors for further processing or speaker verification tasks.


In [None]:
# import numpy as np
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# # Dummy data for illustration (Replace this with your actual speaker data)
# # X should be a numpy array of shape (num_samples, num_features) where num_samples is the number of supervectors
# # and num_features is the dimensionality of each supervector.
# X = train_transform_vectors # Assuming 100 supervectors with 840 features each
# y = train_labels  # Assuming 5 different classes (speakers)

# # Perform LDA with desired number of components
# num_components = 10  # You can choose the desired number of components (smaller than the number of classes)
# lda = LinearDiscriminantAnalysis(n_components=num_components)
# X_lda = lda.fit_transform(X, y)

# # X_lda will be the transformed data with reduced dimensionality based on the LDA analysis.

# # Now you can use the transformed data for further tasks, such as speaker recognition or classification.
# # For example, you can use the transformed data for training a classifier like SVM or k-NN.


In [None]:
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Dummy data for illustration (Replace this with your actual speaker data)
# X should be a numpy array of shape (num_samples, num_features) where num_samples is the number of supervectors
# and num_features is the dimensionality of each supervector.
X = transformed_supervectors # Assuming 100 supervectors with 840 features each
y = train_labels  # Assuming 5 different classes (speakers)

# Perform LDA with desired number of components
num_components = 10  # You can choose the desired number of components (smaller than the number of classes)
lda = LinearDiscriminantAnalysis(n_components=num_components)
X_lda = lda.fit_transform(X, y)

# X_lda will be the transformed data with reduced dimensionality based on the LDA analysis.

# Now you can use the transformed data for further tasks, such as speaker recognition or classification.
# For example, you can use the transformed data for training a classifier like SVM or k-NN.


In [None]:
class LDA():
  def __init__(self, num_components):
    self.num_components = num_components

  def fit(self, X, y):
    class_mean_vectors = []
    for label in np.unique(y):
        class_mean_vectors.append(np.mean(X[y == label], axis=0))
    class_mean_vectors = np.array(class_mean_vectors)
    within_class_scatter_matrix = np.zeros((X.shape[1], X.shape[1]))

    for label in np.unique(y):
        class_indices = np.where(y == label)[0]
        class_scatter_matrix = np.cov(X[class_indices].T)
        within_class_scatter_matrix += class_scatter_matrix

    for i in range(within_class_scatter_matrix.shape[0]):
      within_class_scatter_matrix[i, i] += 1e-3

    overall_mean_vector = np.mean(X, axis=0)
    between_class_scatter_matrix = np.zeros((X.shape[1], X.shape[1]))
    for i, class_mean in enumerate(class_mean_vectors):
        n_samples = X[y == i].shape[0]
        between_class_scatter_matrix += n_samples * np.outer((class_mean - overall_mean_vector), (class_mean - overall_mean_vector))

    matrix_product = np.dot(np.linalg.inv(within_class_scatter_matrix), between_class_scatter_matrix)
    eig_vals, eig_vecs = np.linalg.eig(matrix_product)
    sorted_indices = np.argsort(eig_vals)[::-1]
    sorted_eig_vals = eig_vals[sorted_indices]
    sorted_eig_vecs = eig_vecs[:,sorted_indices]

    self.transform_matrix = sorted_eig_vecs[:,:self.num_components]

  def transform(self, X):
    X_lda = np.dot(X, self.transform_matrix)
    return X_lda

In [None]:
# X = transformed_supervectors # Assuming 100 supervectors with 840 features each
# y = train_labels  # Assuming 5 different classes (speakers)

# # Perform LDA with desired number of components
# num_components = 10  # You can choose the desired number of components (smaller than the number of classes)
# lda = LDA(num_components)
# lda.fit(X, y)
# X_lda = lda.transform(X)


In [None]:
(X_lda[0].shape)

(10,)

In [None]:
# import numpy as np
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# # Dummy data for illustration (Replace this with your actual speaker data)
# # X should be a numpy array of shape (num_samples, num_features) where num_samples is the number of supervectors
# # and num_features is the dimensionality of each supervector.
# X = train_super_vectors # Assuming 100 supervectors with 840 features each
# y = train_labels  # Assuming 5 different classes (speakers)

# # Perform LDA with desired number of components
# num_components = 10  # You can choose the desired number of components (smaller than the number of classes)
# lda = LinearDiscriminantAnalysis(n_components=num_components)
# X_lda = lda.fit_transform(X, y)

# # X_lda will be the transformed data with reduced dimensionality based on the LDA analysis.

# # Now you can use the transformed data for further tasks, such as speaker recognition or classification.
# # For example, you can use the transformed data for training a classifier like SVM or k-NN.


In [None]:
# claim_vectors=[]
# for label in set(train_labels):
#     speaker_feature_super_vector=get_super_vector(ubm_gmm,speaker_features)
#     tvs_features=tvs.transform([speaker_feature_super_vector])
#     claim_vectors.append(lda.transform(tvs_features))

In [None]:
# claim_vectors=[]
# for label in set(train_labels):
#     speaker_features = [feat for feat, lbl in zip(train_features, train_labels) if lbl == label]
#     speaker_features = np.concatenate(speaker_features)
#     speaker_feature_super_vector=get_super_vector(ubm_gmm,speaker_features)
#     tvs_features=tvs.transform([speaker_feature_super_vector)
#     claim_vectors.append(lda.transform(tvs_features))

In [None]:
test_super_vectors=[]
for feature in test_features:
  test_speaker_feature_super_vector=get_super_vector(ubm_gmm,feature)
  test_super_vectors.append(test_speaker_feature_super_vector)

In [None]:
test_vectors=[]
transformed_test_supervectors=tvs.transform(test_super_vectors)
for feature in transformed_test_supervectors:
  test_vectors.append(lda.transform([feature])[0])


In [None]:
(test_vectors[0].shape)

(10,)

In [None]:
(test_vectors[0])

array([-5.58626815, -4.74554822, -2.31316677, -1.14749552,  0.82678825,
        1.11439636,  0.25501306, -0.68276513,  4.26039574,  0.83775426])

In [None]:
(X_lda[0])

array([12.78918652, -4.05922756, -1.36915298, -2.58332096, -6.08958021,
       -0.87106843, -7.629604  , -3.7611331 ,  1.29795279, -0.02541987])

In [None]:
# test_vectors=[]
# for feature in test_features:
#   test_speaker_feature_super_vector=get_super_vector(ubm_gmm,feature)
#   test_vectors.append(lda.transform([test_speaker_feature_super_vector]))


In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error

In [None]:
train_super_vectors = []
test_super_vectors = []
for feature in train_features:
  train_super_vectors.append(get_super_vector(ubm_gmm, feature))
for feature in test_features:
  test_super_vectors.append(get_super_vector(ubm_gmm, feature))

  posteriors = likelihoods/np.sum(likelihoods, axis=1, keepdims= True)


In [None]:
print(train_super_vectors[0][:20])

[-5.49201590e+02  5.67159921e+02 -4.04128506e+02  3.80113514e+01
  1.33943010e+02 -4.12435886e+02 -1.34666657e+02 -1.07361075e+02
 -3.52079790e+02 -5.18476894e+00 -1.13543549e+02 -2.61566043e+02
  4.03756642e+01  0.00000000e+00 -4.44451719e+02  2.07816308e+04
  1.86234053e+04 -2.41734577e+04 -2.85304516e+04  1.06782705e+04]


In [None]:
print(test_super_vectors[0][:20])

[-5.49877649e+02  5.67089266e+02 -4.03761138e+02  3.91424251e+01
  1.34869061e+02 -4.12218690e+02 -1.34334034e+02 -1.07864841e+02
 -3.53501427e+02 -5.89195150e+00 -1.13807981e+02 -2.62289011e+02
  3.94676601e+01  0.00000000e+00 -4.47898282e+02  2.07825522e+04
  1.86213581e+04 -2.41339366e+04 -2.84869989e+04  1.06861753e+04]


In [None]:
predictions=[]
for test in test_vectors:
  ind_scores=[]
  for x in X_lda:
    ind_scores.append(mean_squared_error(x,test))
  #print(ind_scores)
  scores=[]
  for label in set(train_labels):
   speaker_scores = [feat for feat, lbl in zip(ind_scores, train_labels) if lbl == label]
   scores.append(sum(speaker_scores))
  #print(len(ind_scores), len(scores))
  #print(scores)
  min_index = scores.index(min(scores))
  predictions.append(min_index)


In [None]:
predictions=[]
for test in test_super_vectors:
  ind_scores=[]
  for x in train_super_vectors:
    ind_scores.append(mean_squared_error(x,test))
  # print(ind_scores)
  scores=[]
  for label in set(train_labels):
   speaker_scores = [feat for feat, lbl in zip(ind_scores, train_labels) if lbl == label]
   scores.append(sum(speaker_scores))
  # print(len(ind_scores), len(scores))
  # print(scores)
  min_index = scores.index(min(scores))
  predictions.append(min_index)

In [None]:
print(predictions)
print(test_labels)

[0, 16, 13, 2, 17, 4, 1, 8, 7, 9, 13, 7, 18, 9, 13, 1, 6, 11, 13, 10, 9, 2, 14, 0, 0, 13, 18, 13]
[0, 16, 9, 2, 17, 4, 1, 8, 7, 18, 19, 7, 18, 9, 13, 1, 6, 11, 13, 10, 9, 2, 14, 0, 0, 7, 18, 13]


In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix

accuracy = accuracy_score(test_labels, predictions)
conf_matrix = confusion_matrix(test_labels, predictions)

print("Accuracy: ", accuracy*100)
print("Confusion Matrix: \n", conf_matrix)

Accuracy:  85.71428571428571
Confusion Matrix: 
 [[3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 2 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 0 2 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]]
