In [None]:
import math

def mean_vector(X):
    n_samples = len(X)
    n_features = len(X[0])
    mean = [0.0] * n_features
    for row in X:
        for j in range(n_features):
            mean[j] += row[j]
    return [m / n_samples for m in mean]

def center_mean(X, mean):
    centered = []
    for row in X:
        centered_row = [x - mean[j] for j, x in enumerate(row)]
        centered.append(centered_row)
    return centered

def covarianse_matrix(B):
    n_samples = len(B)
    n_features = len(B[0])
    cov = [[0.0]*n_features for k in range(n_features)]

    for i in range(n_features):
        for j in range(i, n_features):
            sum_val = sum(B[k][i] * B[k][j] for k in range(n_samples))
            cov[i][j] = sum_val / (n_samples - 1)
            if i != j:
                cov[j][i] = cov[i][j]
    return cov

def QR_factorization(matrix):
    n = len(matrix)
    columns = [[matrix[i][j] for i in range(n)] for j in range(n)]
    Q_columns = []
    R = [[0.0]*n for w in range(n)]

    for j in range(n):
        v = columns[j].copy()
        for i in range(j):
            R[i][j] = sum(Q_columns[i][k] * columns[j][k] for k in range(n))
            for k in range(n):
                v[k] -= R[i][j] * Q_columns[i][k]
        R[j][j] = math.sqrt(sum(x**2 for x in v))
        Q_columns.append([x / R[j][j] for x in v])

    Q = [[Q_columns[j][i] for j in range(n)] for i in range(n)]
    return Q, R

def matrix_multiply(A, B):
    n = len(A)
    result = [[0.0]*n for _ in range(n)]
    for i in range(n):
        for j in range(n):
            result[i][j] = sum(A[i][k] * B[k][j] for k in range(n))
    return result

def eigen_value_and_vec(matrix):
    n = len(matrix)
    A = [row.copy() for row in matrix]
    Q_total = [[1.0 if i == j else 0.0 for j in range(n)] for i in range(n)]

    for e in range(100):
        Q, R = QR_factorization(A)
        A = matrix_multiply(R, Q)
        Q_total = matrix_multiply(Q_total, Q)

    eigen_values = [A[i][i] for i in range(n)]
    eigen_vectors = [[Q_total[i][j] for i in range(n)] for j in range(n)]
    return eigen_values, eigen_vectors

def pca(X, n_components):
    mean = mean_vector(X)
    centered_X = center_mean(X, mean)
    cov_matrix = covarianse_matrix(centered_X)
    eigen_values, eigen_vectors = eigen_value_and_vec(cov_matrix)

    sorted_indices = sorted(range(len(eigen_values)), key=lambda k: eigen_values[k], reverse=True)
    top_components = [eigen_vectors[i] for i in sorted_indices[:n_components]]

    projected_data = []
    for r in centered_X:
        new = []
        for comp in top_components:
            new.append(sum(x * c for x, c in zip(r, comp)))
        projected_data.append(new)
    return projected_data

In [44]:
import csv
import math
from collections import Counter
import random

class KNN:
    def __init__(self, k=5):
        self.k = k

    def fit(self, X, y):
        self.X_train, self.y_train = X, y

    def predict(self, X_test):
        return [self._predict_one(x) for x in X_test]

    def _predict_one(self, x):
        distances = [(math.dist(x, train_x), i) for i, train_x in enumerate(self.X_train)]
        k_labels = [self.y_train[i] for _, i in sorted(distances)[:self.k]]
        return Counter(k_labels).most_common(1)[0][0]

def read_csv(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        reader = csv.reader(f)
        headers = next(reader)
        label_idx = headers.index('label')
        exclude = {headers.index('filename'), headers.index('length'), label_idx}

        X, y = [], []
        for row in reader:
            features = [float(val) for i, val in enumerate(row) if i not in exclude]
            X.append(features)
            y.append(row[label_idx])
    return X, y

def normalize(X):
    n, m = len(X), len(X[0])
    means = [sum(X[i][j] for i in range(n)) / n for j in range(m)]
    stds = [math.sqrt(sum((X[i][j] - means[j])**2 for i in range(n)) / n) for j in range(m)]
    return [[(X[i][j] - means[j]) / stds[j] if stds[j] else 0 for j in range(m)] for i in range(n)]

import random

if __name__ == "__main__":
    X, y = read_csv("features_30_sec.csv")
    X = normalize(X)

    combined = list(zip(X, y))
    random.shuffle(combined)
    X, y = zip(*combined)
    X, y = list(X), list(y)

    split_idx = int(0.8 * len(X))

    X_train, y_train = X[:split_idx], y[:split_idx]
    X_test, y_test = X[split_idx:], y[split_idx:]

    knn = KNN(k=5)
    knn.fit(X_train, y_train)
    predictions = knn.predict(X_test)

    acc = sum(p == t for p, t in zip(predictions, y_test)) / len(y_test) * 100

    print(f'Accuracy: {acc:.0f}%')
    print("Real labels:     ", y_test[:10])
    print("Predicted labels:", predictions[:10])

Accuracy: 70%
Real labels:      ['blues', 'rock', 'disco', 'rock', 'jazz', 'disco', 'country', 'jazz', 'disco', 'jazz']
Predicted labels: ['blues', 'rock', 'blues', 'rock', 'jazz', 'jazz', 'blues', 'classical', 'rock', 'jazz']
