In [11]:
def generate_synthetic_data(rows, cols, seed=None):
    if seed is not None:
        import random
        random.seed(seed)
    return [[random.random() for _ in range(cols)] for _ in range(rows)]

def transpose(matrix):
    return [[matrix[j][i] for j in range(len(matrix))] for i in range(len(matrix[0]))]

def dot_product(vec1, vec2):
    return sum(x * y for x, y in zip(vec1, vec2))

def scalar_multiply(scalar, vector):
    return [scalar * x for x in vector]

def matrix_vector_multiply(matrix, vector):
    return [dot_product(row, vector) for row in matrix]

def subtract_vectors(vec1, vec2):
    return [x - y for x, y in zip(vec1, vec2)]

def norm_squared(vector):
    return dot_product(vector, vector)

def norm(vector):
    return dot_product(vector, vector) ** 0.5

def normalize(vector):
    magnitude = norm(vector)
    return [x / magnitude for x in vector]

def covariance_matrix(data):
    num_samples = len(data)
    num_features = len(data[0])
    means = [sum(data[i][j] for i in range(num_samples)) / num_samples for j in range(num_features)]
    centered_data = [[data[i][j] - means[j] for j in range(num_features)] for i in range(num_samples)]
    cov_matrix = [[dot_product(centered_data[i], centered_data[j]) / (num_samples - 1) for j in range(num_features)] for i in range(num_features)]
    return cov_matrix

def custom_sqrt(x):
    if x == 0:
        return 0
    guess = x
    i = 0
    while i < 10:  # Adjust the number of iterations as needed
        guess = 0.5 * (guess + x / guess)
        i += 1
    return guess

def parse_arff(file_path):
    attributes = []
    data = []
    with open(file_path, 'r') as file:
        lines = file.read().splitlines()
        parsing_data = False
        for line in lines:
            line = line.strip()
            if not line or line.startswith('%'):
                continue
            elif line.lower().startswith('@attribute'):
                attribute_name = line.split()[1]
                attributes.append(attribute_name)
            elif line.lower().startswith('@data'):
                parsing_data = True
                break
        if parsing_data:
            for line in lines:
                line = line.strip()
                if line and not line.startswith('%'):
                    values = [value.strip() for value in line.split(',')]
                    numeric_values = []
                    for value in values:
                        try:
                            numeric_values.append(float(value))
                        except ValueError:
                            pass  # Ignore non-numeric values
                    if numeric_values:
                        data.append(numeric_values)
    return data, attributes

def list_files_in_folder(folder_path):
    files = [
        '2017 Q1.arff', '2017 Q2.arff', '2017 Q3.arff', '2017 Q4.arff', '2017.arff', 
        '2018 Q1.arff', '2018 Q2.arff', '2018 Q3.arff', '2018 Q4.arff', '2018.arff', 
        '2019 Q1.arff', '2019 Q2.arff', '2019 Q3.arff', '2019 Q4.arff', '2019.arff', 
        '2020 Q1.arff', '2020 Q2.arff', '2020 Q3.arff', '2020 Q4.arff', '2020.arff', 
        '2021 Q1.arff', 
    ]
    return files

def load_arff_folder(folder_path):
    all_data = []
    all_attributes = []
    folder_content = list_files_in_folder(folder_path)
    for file_name in folder_content:
        file_path = folder_path + "\\" + file_name
        data, attributes = parse_arff(file_path)
        all_data.append(data)
        all_attributes.append(attributes)
    return all_data, all_attributes

def pca(data, num_components):
    num_samples = len(data)
    num_features = len(data[0])
    cov_matrix = covariance_matrix(data)
    eigenvalues, eigenvectors = eigenvalue_decomposition(cov_matrix)
    sorted_indices = sorted(range(num_features), key=lambda i: eigenvalues[i], reverse=True)
    eigenvectors = [eigenvectors[i] for i in sorted_indices]
    principal_components = eigenvectors[:num_components]
    transformed_data = [matrix_vector_multiply(transpose(principal_components), data[i]) for i in range(num_samples)]
    return transformed_data, principal_components

def eigenvalue_decomposition(matrix):
    n = len(matrix)
    eigenvalues = [0.0] * n
    eigenvectors = [[0.0] * n for _ in range(n)]
    for i in range(n):
        eigenvectors[i][i] = 1.0
        eigenvalues[i] = matrix[i][i]
    return eigenvalues, eigenvectors

def svd(data):
    num_samples = len(data)
    num_features = len(data[0])
    means = [sum(data[i]) / num_samples for i in range(num_samples)]
    standardized_data = [[data[i][j] - means[j] for j in range(num_features)] for i in range(num_samples)]
    cov_matrix = [[dot_product(standardized_data[i], standardized_data[j]) / (num_samples - 1) for j in range(num_features)] for i in range(num_features)]
    eigenvalues, eigenvectors = eigenvalue_decomposition(cov_matrix)
    singular_values = [custom_sqrt(val) for val in eigenvalues]
    vt = transpose(eigenvectors)
    transformed_data = [matrix_vector_multiply(transpose(vt), standardized_data[i]) for i in range(num_samples)]
    return transformed_data, vt, singular_values

folder_path = 'dataset'
all_data, all_attributes = load_arff_folder(folder_path)

data = all_data[0]

svd_result, _, _ = svd(data)

num_pca_components = 2
pca_result, _ = pca(data, num_pca_components)

print("\nPCA Result:")
print(pca_result)
print("\nSVD Result:")
print(svd_result)

IndexError: list index out of range