In [6]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import r2_score, mean_squared_error
import random
from sklearn.base import clone

ExE = pd.read_csv('extracted_data/ExE_imputed.csv', sep=',', index_col=0)
NxN = pd.read_csv('extracted_data/NxN_imputed.csv', sep=',', index_col=0)
ExN = pd.read_csv('extracted_data/ExN_filled.csv', sep=',', index_col=0)

predictive_models = [ 
    LinearRegression(),
    Lasso(alpha=0.0004),
    Ridge(alpha=200.0)
]

def evaluate_model(model, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    return r2, rmse

def iterate_over_proportion(data, proportion, models):
    num_columns = max(1, int(data.shape[1] * proportion))  # Ensure at least 1 column
    selected_columns = random.sample(data.columns.tolist(), num_columns)

    results = {}

    for model in models:
        model_name = model.__class__.__name__
        r2_scores = []
        rmse_scores = []

        for target_column in selected_columns:
            X = data.drop(columns=[target_column])
            y = data[target_column]

            model_copy = clone(model)

            r2, rmse = evaluate_model(model_copy, X, y)
            r2_scores.append(r2)
            rmse_scores.append(rmse)

        avg_r2 = np.mean(r2_scores)
        avg_rmse = np.mean(rmse_scores)

        results[model_name] = {
            'Average R2': avg_r2,
            'Average RMSE': avg_rmse,
            'r2 array': r2_scores
        }

    return results

random.seed(35)
proportion = 0.1
results_ExE = iterate_over_proportion(ExE, proportion, predictive_models)
results_NxN = iterate_over_proportion(NxN, proportion, predictive_models)
results_ExN = iterate_over_proportion(ExE, proportion, predictive_models)


def print_results(results):
    for model_name, metrics in results.items():
        print(f"\nModel: {model_name}")
        print(f"Average R²:   {metrics['Average R2']:.4f}")
        print(f"Average RMSE: {metrics['Average RMSE']:.4f}")


print("Results for ExE:")
print_results(results_ExE)

print("\nResults for NxN:")
print_results(results_NxN)

print("\nResults for ExN:")
print_results(results_ExN)


KeyboardInterrupt: 