## Multivariate Linear Regression Problem using PSO

In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

def calculate_mse(weights, X_aug, y_true):
    n_samples = X_aug.shape[0]
    y_pred = X_aug @ weights
    mse = np.mean((y_true - y_pred)**2)
    return mse

def solve():
    # w, c1, c2, num_particles, num_iterations = map(float, input().split())
    w, c1, c2, num_particles, num_iterations = 0.7, 1.5, 1.5, 50, 2000
    num_particles = int(num_particles)
    num_iterations = int(num_iterations)

    boston = fetch_openml(name='boston', version=1, as_frame=False, parser='liac-arff')
    X = boston.data
    y = boston.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    X_train_aug = np.hstack((np.ones((X_train.shape[0], 1)), X_train))
    X_test_aug = np.hstack((np.ones((X_test.shape[0], 1)), X_test))

    n_features = X.shape[1]
    n_dimensions = n_features + 1  # 13 features + 1 intercept

    positions = np.random.uniform(-10, 10, (num_particles, n_dimensions))
    velocities = np.random.uniform(-1, 1, (num_particles, n_dimensions))

    pbest_positions = positions.copy()
    pbest_fitness = np.array([calculate_mse(p, X_train_aug, y_train) for p in positions])

    gbest_index = np.argmin(pbest_fitness)
    gbest_position = pbest_positions[gbest_index].copy()
    gbest_fitness = pbest_fitness[gbest_index]

    for _ in range(num_iterations):
        for i in range(num_particles):
            r1 = np.random.rand(n_dimensions)
            r2 = np.random.rand(n_dimensions)
            velocities[i] = (w * velocities[i] +
                             c1 * r1 * (pbest_positions[i] - positions[i]) +
                             c2 * r2 * (gbest_position - positions[i]))

            positions[i] = positions[i] + velocities[i]
            positions[i] = np.clip(positions[i], -10, 10)
            current_fitness = calculate_mse(positions[i], X_train_aug, y_train)

            if current_fitness < pbest_fitness[i]:
                pbest_fitness[i] = current_fitness
                pbest_positions[i] = positions[i].copy()

                if current_fitness < gbest_fitness:
                    gbest_fitness = current_fitness
                    gbest_position = positions[i].copy()

    best_weights = gbest_position
    final_train_mse = calculate_mse(best_weights, X_train_aug, y_train)
    final_test_mse = calculate_mse(best_weights, X_test_aug, y_test)

    formatted_weights = [f"{weight:.3f}" for weight in best_weights]
    print(f"Best Weights: [{', '.join(formatted_weights)}]")
    print(f"Training MSE: {final_train_mse:.2f}")
    print(f"Testing MSE: {final_test_mse:.2f}")

if __name__ == "__main__":
    solve()


Best Weights: [10.000, -0.092, 0.037, 0.032, 2.795, -10.000, 5.493, -0.011, -1.239, 0.401, -0.006, -0.580, 0.015, -0.457]
Training MSE: 22.37
Testing MSE: 27.08
