In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Lasso, Ridge
import random

# Load the data
ExE = pd.read_csv('extracted_data/ExE_imputed.csv', sep=',', index_col=0)
NxN = pd.read_csv('extracted_data/NxN_imputed.csv', sep=',', index_col=0)
ExN = pd.read_csv('extracted_data/ExN_filled.csv', sep=',', index_col=0)

# Define parameter grids for Lasso and Ridge
param_grid_lasso = {
    'alpha': [0.0001, 0.0004, 0.001, 0.01, 0.1, 1, 10]
}

param_grid_ridge = {
    'alpha': [0.1, 1, 10, 100, 200, 500, 1000]
}

# Function to perform grid search for a model
def grid_search_for_model(model, param_grid, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='r2', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    
    return grid_search.best_params_

# Function to iterate over a proportion of columns and perform grid search
def iterate_over_proportion_for_best_params(data, proportion):
    num_columns = max(1, int(data.shape[1] * proportion))  # Ensure at least 1 column
    selected_columns = random.sample(data.columns.tolist(), num_columns)

    results = {}

    for target_column in selected_columns:
        X = data.drop(columns=[target_column])
        y = data[target_column]

        # Perform grid search for Lasso
        lasso_best_params = grid_search_for_model(Lasso(), param_grid_lasso, X, y)
        
        # Perform grid search for Ridge
        ridge_best_params = grid_search_for_model(Ridge(), param_grid_ridge, X, y)

        results[target_column] = {
            'Lasso Best Params': lasso_best_params,
            'Ridge Best Params': ridge_best_params
        }

    return results

# Set random seed for reproducibility
random.seed(35)

# Define proportion of columns to use
proportion = 0.01

# Perform grid search on each dataset
best_params_ExE = iterate_over_proportion_for_best_params(ExE, proportion)
best_params_NxN = iterate_over_proportion_for_best_params(NxN, proportion)
best_params_ExN = iterate_over_proportion_for_best_params(ExN, proportion)

# Function to print best parameters
def print_best_params(results, dataset_name):
    print(f"\nBest Parameters for {dataset_name}:")
    for target_column, params in results.items():
        print(f"\nTarget Column: {target_column}")
        print(f"Lasso Best Params: {params['Lasso Best Params']}")
        print(f"Ridge Best Params: {params['Ridge Best Params']}")

# Print best parameters
print_best_params(best_params_ExE, "ExE")
print_best_params(best_params_NxN, "NxN")
print_best_params(best_params_ExN, "ExN")




Best Parameters for ExE:

Target Column: YPL169C
Lasso Best Params: {'alpha': 0.0001}
Ridge Best Params: {'alpha': 10}

Target Column: YGL112C
Lasso Best Params: {'alpha': 0.0004}
Ridge Best Params: {'alpha': 1}

Target Column: YKL104C
Lasso Best Params: {'alpha': 0.0001}
Ridge Best Params: {'alpha': 10}

Target Column: YPL209C.1
Lasso Best Params: {'alpha': 0.0004}
Ridge Best Params: {'alpha': 10}

Target Column: YLR105C
Lasso Best Params: {'alpha': 0.0001}
Ridge Best Params: {'alpha': 10}

Target Column: YML015C
Lasso Best Params: {'alpha': 0.0004}
Ridge Best Params: {'alpha': 1}

Target Column: YLR045C.2
Lasso Best Params: {'alpha': 0.0004}
Ridge Best Params: {'alpha': 1}

Best Parameters for NxN:

Target Column: YDR493W
Lasso Best Params: {'alpha': 0.0001}
Ridge Best Params: {'alpha': 10}

Target Column: YMR087W
Lasso Best Params: {'alpha': 0.0001}
Ridge Best Params: {'alpha': 10}

Target Column: YMR101C
Lasso Best Params: {'alpha': 0.0001}
Ridge Best Params: {'alpha': 10}

Target