In [5]:
import numpy as np

def load_arff(file_path):
    with open(file_path, 'r') as f:
        lines = f.readlines()

    data_start = lines.index('@data\n') + 1
    attributes = [line.split()[1] for line in lines if line.startswith('@attribute')]
    data_list = []

    for line in lines[data_start:]:
        values = line.strip().split(',')
        data_dict = {attr: val if val != 'm' else None for attr, val in zip(attributes, values)}
        data_list.append(data_dict)

    return data_list

def preprocess_data(data_list):
    for entry in data_list:
        for key, value in entry.items():
            if value == 'm':
                entry[key] = None
    return data_list

def linear_interpolation(data_list):
    for i in range(1, len(data_list) - 1):
        for key, value in data_list[i].items():
            if value is None and data_list[i - 1][key] is not None and data_list[i + 1][key] is not None:
                data_list[i][key] = (float(data_list[i - 1][key]) + float(data_list[i + 1][key])) / 2
    return data_list

def z_score_standardization(matrix):
    for i in range(2, len(matrix[0])):
        column = [float(row[i]) for row in matrix if row[i] is not None and row[i] != 'm']
        if len(set(column)) == 1: continue
        mean_val = sum(column) / len(column)
        std_dev = (sum((x - mean_val) ** 2 for x in column) / len(column)) ** 0.5 if len(column) > 1 else 0
        for row in matrix:
            if row[i] is not None and row[i] != 'm':
                row[i] = (float(row[i]) - mean_val) / std_dev if std_dev != 0 else 0
    return matrix

def dot_product(v1, v2):
    return sum(x * y for x, y in zip(v1, v2) if isinstance(x, (int, float)) and isinstance(y, (int, float)))

def multiply_matrix(matrix1, matrix2):
    result = []
    for row in matrix1:
        new_row = [dot_product(row, col) for col in transpose(matrix2)]
        result.append(new_row)
    return result

def transpose(matrix):
    return [[row[i] for row in matrix] for i in range(len(matrix[0]))]

def mean(column):
    values = [float(val) for val in column if val is not None]
    return sum(values) / len(values) if values else 0

def covariance_matrix(matrix):
    n = len(matrix)
    num_features = len(matrix[0])
    transposed_matrix = transpose(matrix)
    cov_matrix = [[0] * num_features for _ in range(num_features)]

    for i in range(num_features):
        for j in range(num_features):
            mean_i = mean(matrix[i])
            mean_j = mean(matrix[j])
            values_i = [float(val) for val in matrix[i] if val is not None]
            values_j = [float(val) for val in matrix[j] if val is not None]
            cov_matrix[i][j] = sum((val_i - mean_i) * (val_j - mean_j) for val_i, val_j in zip(values_i, values_j)) / (n - 1)

    return cov_matrix

def custom_random():
    seed = 1
    while True:
        seed = (seed * 1103515245 + 12345) & 0x7FFFFFFF
        yield seed / 0x7FFFFFFF

def pca(data_matrix, num_components):
    cov_matrix = covariance_matrix(data_matrix)
    num_features = len(data_matrix[0])
    eigenvalues = [0] * num_features
    eigenvectors = [[0] * num_features for _ in range(num_features)]
    random_generator = custom_random()

    for i in range(num_features):
        vector = np.array([next(random_generator) for _ in range(num_features)])
        for _ in range(1000):
            new_vector = np.dot(cov_matrix, vector)
            magnitude = np.linalg.norm(new_vector)
            vector = new_vector / magnitude
        eigenvalues[i] = np.dot(new_vector, vector)
        eigenvectors[i] = vector.tolist()

    sorted_indices = sorted(range(num_features), key=lambda k: eigenvalues[k], reverse=True)
    eigenvalues = [eigenvalues[i] for i in sorted_indices]
    eigenvectors = [[eigenvectors[j][i] for j in sorted_indices] for i in range(num_features)]
    top_eigenvectors = eigenvectors[:num_components]
    pca_result = multiply_matrix(data_matrix, transpose(top_eigenvectors))
    return pca_result

def svd(matrix):
    matrix = [[0.0 if val is None else float(val) for val in row] for row in matrix]
    u, s, vh = np.linalg.svd(matrix, full_matrices=False)
    return u, s, vh

def display_data_table(data_list):
    attributes = list(data_list[0].keys())
    column_widths = {attr: max(len(attr), max(len(str(entry[attr])) for entry in data_list)) for attr in attributes}
    header = "|".join(f"{attr:^{column_widths[attr]}}" for attr in attributes)
    print(header)
    print("-" * sum(column_widths.values()))
    for entry in data_list:
        row = "|".join(f"{str(entry[attr]):^{column_widths[attr]}}" if entry[attr] is not None else 'm' for attr in attributes)
        print(row)

def display_matrix(matrix):
    for row in matrix:
        print("|".join(f"{str(cell):^10}" for cell in row))

# Usage
file_path = './V4-data/2017.arff'
data = load_arff(file_path)
preprocessed_data = preprocess_data(data)
interpolated_data = linear_interpolation(preprocessed_data)
matrix = [[entry[attr] for attr in list(preprocessed_data[0].keys())[2:]] for entry in interpolated_data]  # Modify as per your attribute indices
standardized_data = z_score_standardization(matrix)
num_components = 2
pca_result = pca(standardized_data, num_components)
svd_result = svd(standardized_data)

print("\nPCA Result:")
display_matrix(pca_result)

print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSVD Result:")
display_matrix(svd_result)

print("Finished")



PCA Result:
-0.006386506179314646|-0.048509995267149345
-0.007774483896660703|-0.059052659849146476
-0.000994699947840929|-0.007555443995072707
-0.0025469334648559154|-0.019345746619032225
-0.06553651812092845|-0.4977958361911207
-0.06553651812092845|-0.4977958361911207
-0.0027784281735655798|-0.021104111350635994
0.008112070812624808|0.06161686933017883
0.0009219162771914331|0.0070026009507538275
0.018744578542098676|0.1423782254070401
0.0003825054366388466|0.002905397160830707
-9.46972307794237e-05|-0.0007192919082739252
-0.06553651812092845|-0.4977958361911207
-0.06553651812092845|-0.4977958361911207
0.007936467062464201|0.06028303564239881
0.0007533686710990816|0.005722363627831954
-0.003871073421088272|-0.02940351861617272
0.0014849135826370814|0.011278960490034642
-0.006242037930572026|-0.047412657557611516
0.007539257841542033|0.0572659529236174
-0.08112675374661935|-0.6162146140301745
0.00042124765587797696|0.003199671497872404
0.0008106090498213518|0.006157144464637797
0.0014