In [1]:
def load_data(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            if line.strip() and line.strip()[0].isalpha() == False:
                try:
                    parts = line.strip().split()
                    data.append([float(parts[0]), float(parts[1])])
                except (ValueError, IndexError):
                    pass # Skip lines that don't match the expected format
    return data

def get_class_data(file_name):
    # This assumes class1.txt, class2.txt, etc.
    class_label = file_name.split('.')[0]
    return load_data(file_name), class_label

def split_data(data, split_ratio=0.7):
    # Split data into training and testing sets
    np.random.shuffle(data)
    train_size = int(len(data) * split_ratio)
    train_set = data[:train_size]
    test_set = data[train_size:]
    return train_set, test_set

def mean(data):
    if not data:
        return [0, 0]
    return [sum(col) / len(col) for col in zip(*data)]

def covariance(data):
    n = len(data)
    if n < 2:
        return [[0, 0], [0, 0]]

    features = list(zip(*data))
    cov_matrix = [[0.0, 0.0], [0.0, 0.0]]
    x_mean, y_mean = mean(data)

    for i in range(n):
        cov_matrix[0][0] += (features[0][i] - x_mean) ** 2
        cov_matrix[1][1] += (features[1][i] - y_mean) ** 2
        cov_matrix[0][1] += (features[0][i] - x_mean) * (features[1][i] - y_mean)

    cov_matrix[0][0] /= (n - 1)
    cov_matrix[1][1] /= (n - 1)
    cov_matrix[0][1] /= (n - 1)
    cov_matrix[1][0] = cov_matrix[0][1]

    return cov_matrix

# Invert a 2x2 matrix
def inverse(matrix):
    a, b = matrix[0]
    c, d = matrix[1]
    det = a * d - b * c
    if det == 0:
        return None
    inv_det = 1.0 / det
    return [[d * inv_det, -b * inv_det],
            [-c * inv_det, a * inv_det]]

# Calculate determinant of a 2x2 matrix
def determinant(matrix):
    return matrix[0][0] * matrix[1][1] - matrix[0][1] * matrix[1][0]

# Dot product for 2D vectors and matrices
def dot(vec1, vec2):
    if isinstance(vec2[0], list): # Matrix-vector multiplication
        return [vec1[0] * vec2[0][0] + vec1[1] * vec2[1][0],
                vec1[0] * vec2[0][1] + vec1[1] * vec2[1][1]]
    else: # Vector-vector dot product
        return vec1[0] * vec2[0] + vec1[1] * vec2[1]

# Transpose a 2D vector
def transpose(vec):
    return [[vec[0]], [vec[1]]]

def subtract(vec1, vec2):
    return [vec1[0] - vec2[0], vec1[1] - vec2[1]]

def mat_vec_mult(mat, vec):
    return [mat[0][0] * vec[0] + mat[0][1] * vec[1],
            mat[1][0] * vec[0] + mat[1][1] * vec[1]]

def mat_mult(mat1, mat2):
    return [[mat1[0][0]*mat2[0][0] + mat1[0][1]*mat2[1][0], mat1[0][0]*mat2[0][1] + mat1[0][1]*mat2[1][1]],
            [mat1[1][0]*mat2[0][0] + mat1[1][1]*mat2[1][0], mat1[1][0]*mat2[0][1] + mat1[1][1]*mat2[1][1]]]

def confusion_matrix(true_labels, predicted_labels, class_labels):
    matrix = [[0] * len(class_labels) for _ in range(len(class_labels))]
    label_map = {cls: i for i, cls in enumerate(class_labels)}

    for t, p in zip(true_labels, predicted_labels):
        matrix[label_map[t]][label_map[p]] += 1
    return matrix

def calculate_metrics(cm, class_labels):
    total_correct = sum(cm[i][i] for i in range(len(class_labels)))
    total_samples = sum(sum(row) for row in cm)
    accuracy = total_correct / total_samples if total_samples > 0 else 0

    precision = []
    recall = []
    f1 = []

    for i in range(len(class_labels)):
        TP = cm[i][i]
        FP = sum(cm[j][i] for j in range(len(class_labels)) if i != j)
        FN = sum(cm[i][j] for j in range(len(class_labels)) if i != j)

        prec_val = TP / (TP + FP) if (TP + FP) > 0 else 0
        rec_val = TP / (TP + FN) if (TP + FN) > 0 else 0
        f1_val = (2 * prec_val * rec_val) / (prec_val + rec_val) if (prec_val + rec_val) > 0 else 0

        precision.append(prec_val)
        recall.append(rec_val)
        f1.append(f1_val)

    return accuracy, precision, recall, f1

In [2]:
def main():
    datasets = {
        "Dataset 1 (Linearly Separable)": ['Class1.txt', 'Class2.txt', 'Class3.txt'],
        "Dataset 2 (Nonlinearly Separable)": ['NLS_Group04.txt'],
        "Dataset 3 (Vowel Data)": ['Class3.txt']
    }

    for dataset_name, file_names in datasets.items():
        print(f"--- Processing {dataset_name} ---")
        
        all_train_data = []
        all_test_data = []
        class_labels = []

        for file_name in file_names:
            data, class_label = get_class_data(file_name)
            train_set, test_set = split_data(data)
            
            all_train_data.append({'label': class_label, 'data': train_set})
            all_test_data.append({'label': class_label, 'data': test_set})
            class_labels.append(class_label)

        # SCENARIO 1: Same sigma^2 * I
        print("\n--- Classifier 1: Shared Covariance (sigma^2 * I) ---")
        train_data_all = [item for sublist in [d['data'] for d in all_train_data] for item in sublist]
        avg_var = sum(covariance(d['data'])[0][0] + covariance(d['data'])[1][1] for d in all_train_data) / (2 * len(all_train_data))
        common_cov_sigma2I = [[avg_var, 0], [0, avg_var]]

        class_params = {}
        for d in all_train_data:
            class_params[d['label']] = {
                'mean': mean(d['data']),
                'prior': len(d['data']) / len(train_data_all)
            }

        true_labels = [d['label'] for d in all_test_data for _ in d['data']]
        predicted_labels = []
        for d in all_test_data:
            for x in d['data']:
                scores = {label: g_x_sigma2I(x, class_params[label]['mean'], avg_var, class_params[label]['prior']) for label in class_labels}
                predicted_labels.append(max(scores, key=scores.get))
        
        cm = confusion_matrix(true_labels, predicted_labels, class_labels)
        acc, prec, rec, f1 = calculate_metrics(cm, class_labels)
        print(f"Confusion Matrix: {cm}")
        print(f"Accuracy: {acc}")
        # Add precision, recall, f1 printing here
        # Code for plotting decision regions would be here

        # ... (Repeat similar blocks for scenarios 2, 3, and 4)
        
if __name__ == '__main__':
    main()

--- Processing Dataset 1 (Linearly Separable) ---


FileNotFoundError: [Errno 2] No such file or directory: 'Class1.txt'