In [22]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Lasso, Ridge
import random

In [23]:
ExE = pd.read_csv('extracted_data/ExE_imputed.csv', sep=',', index_col=0)
NxN = pd.read_csv('extracted_data/NxN_imputed.csv', sep=',', index_col=0)
ExN = pd.read_csv('extracted_data/ExN_filled.csv', sep=',', index_col=0)

param_grid_lasso = {
    'alpha': [0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.01]
}

param_grid_ridge = {
    'alpha': [1, 5, 10, 50, 100, 200]
}

In [24]:
def grid_search_for_model(model, param_grid, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='r2', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    
    results = pd.DataFrame(grid_search.cv_results_)

    return grid_search.best_params_, results

def iterate_over_proportion_for_best_params(data, proportion):
    num_columns = max(1, int(data.shape[1] * proportion))
    selected_columns = random.sample(data.columns.tolist(), num_columns)

    results = {}

    for target_column in selected_columns:
        X = data.drop(columns=[target_column])
        y = data[target_column]

        lasso_best_params, lasso_results = grid_search_for_model(Lasso(), param_grid_lasso, X, y)
        ridge_best_params, ridge_results = grid_search_for_model(Ridge(), param_grid_ridge, X, y)

        results[target_column] = {
            'Lasso Best Params': lasso_best_params,
            'Lasso Results': lasso_results,
            'Ridge Best Params': ridge_best_params,
            'Ridge Results': ridge_results
        }

    return results

In [25]:
def print_average_r2(results, dataset_name):
    print(f"\nAverage R² Scores for {dataset_name}:")

    lasso_scores = {}
    ridge_scores = {}

    for run in results.values():
        lasso = run['Lasso Results']
        ridge = run['Ridge Results']

        for alpha, r2 in zip(lasso['param_alpha'], lasso['mean_test_score']):
            lasso_scores.setdefault(alpha, []).append(r2)

        for alpha, r2 in zip(ridge['param_alpha'], ridge['mean_test_score']):
            ridge_scores.setdefault(alpha, []).append(r2)

    def print_scores(scores, model_name):
        print(f"\n{model_name} Regression:")
        for alpha in sorted(scores):
            avg_r2 = np.mean(scores[alpha])
            print(f"Alpha: {alpha}, Average R²: {avg_r2:.4f}")

    print_scores(lasso_scores, "Lasso")
    print_scores(ridge_scores, "Ridge")


# print best parameter for every single result
def print_best_params(results, dataset_name):
    print(f"\nBest Parameters for {dataset_name}:")
    for target_column, params in results.items():
        print(f"\nTarget Column: {target_column}")
        print(f"Lasso Best Params: {params['Lasso Best Params']}")
        print(f"Ridge Best Params: {params['Ridge Best Params']}")


In [26]:
random.seed(35)
proportion = 0.01

best_params_ExE = iterate_over_proportion_for_best_params(ExE, proportion)
best_params_NxN = iterate_over_proportion_for_best_params(NxN, proportion)
best_params_ExN = iterate_over_proportion_for_best_params(ExN, proportion)

  model = cd_fast.enet_coordinate_descent(


In [27]:
print_average_r2(best_params_ExE, "ExE")


Average R² Scores for ExE:

Lasso Regression:
Alpha: 1e-05, Average R²: -0.2024
Alpha: 5e-05, Average R²: 0.2250
Alpha: 0.0001, Average R²: 0.3069
Alpha: 0.0005, Average R²: 0.3053
Alpha: 0.001, Average R²: 0.2414
Alpha: 0.01, Average R²: -0.0162

Ridge Regression:
Alpha: 1, Average R²: 0.2847
Alpha: 5, Average R²: 0.3603
Alpha: 10, Average R²: 0.3476
Alpha: 50, Average R²: 0.2431
Alpha: 100, Average R²: 0.1786
Alpha: 200, Average R²: 0.1159


In [28]:
print_average_r2(best_params_NxN, "NxN")



Average R² Scores for NxN:

Lasso Regression:
Alpha: 1e-05, Average R²: 0.1142
Alpha: 5e-05, Average R²: 0.2528
Alpha: 0.0001, Average R²: 0.2139
Alpha: 0.0005, Average R²: 0.0556
Alpha: 0.001, Average R²: 0.0191
Alpha: 0.01, Average R²: -0.0031

Ridge Regression:
Alpha: 1, Average R²: 0.1020
Alpha: 5, Average R²: 0.2210
Alpha: 10, Average R²: 0.2291
Alpha: 50, Average R²: 0.1759
Alpha: 100, Average R²: 0.1353
Alpha: 200, Average R²: 0.0937


In [29]:
print_average_r2(best_params_ExN, "ExN")


Average R² Scores for ExN:

Lasso Regression:
Alpha: 1e-05, Average R²: 0.0162
Alpha: 5e-05, Average R²: 0.1642
Alpha: 0.0001, Average R²: 0.1550
Alpha: 0.0005, Average R²: 0.0355
Alpha: 0.001, Average R²: 0.0117
Alpha: 0.01, Average R²: -0.0044

Ridge Regression:
Alpha: 1, Average R²: 0.1731
Alpha: 5, Average R²: 0.2239
Alpha: 10, Average R²: 0.2260
Alpha: 50, Average R²: 0.1711
Alpha: 100, Average R²: 0.1297
Alpha: 200, Average R²: 0.0878


## Individual results for genes

In [31]:
print_best_params(best_params_ExE, "ExE")


Best Parameters for ExE:

Target Column: YPL169C
Lasso Best Params: {'alpha': 0.0001}
Ridge Best Params: {'alpha': 5}

Target Column: YGL112C
Lasso Best Params: {'alpha': 0.0001}
Ridge Best Params: {'alpha': 5}

Target Column: YKL104C
Lasso Best Params: {'alpha': 0.0001}
Ridge Best Params: {'alpha': 5}

Target Column: YPL209C.1
Lasso Best Params: {'alpha': 0.0005}
Ridge Best Params: {'alpha': 10}

Target Column: YLR105C
Lasso Best Params: {'alpha': 0.0001}
Ridge Best Params: {'alpha': 10}

Target Column: YML015C
Lasso Best Params: {'alpha': 0.0005}
Ridge Best Params: {'alpha': 5}

Target Column: YLR045C.2
Lasso Best Params: {'alpha': 0.0005}
Ridge Best Params: {'alpha': 5}


In [32]:
print_best_params(best_params_NxN, "NxN")


Best Parameters for NxN:

Target Column: YDR493W
Lasso Best Params: {'alpha': 5e-05}
Ridge Best Params: {'alpha': 10}

Target Column: YMR087W
Lasso Best Params: {'alpha': 5e-05}
Ridge Best Params: {'alpha': 10}

Target Column: YMR101C
Lasso Best Params: {'alpha': 5e-05}
Ridge Best Params: {'alpha': 50}

Target Column: YGR254W
Lasso Best Params: {'alpha': 5e-05}
Ridge Best Params: {'alpha': 10}

Target Column: YDL037C
Lasso Best Params: {'alpha': 5e-05}
Ridge Best Params: {'alpha': 10}

Target Column: YBR076W
Lasso Best Params: {'alpha': 5e-05}
Ridge Best Params: {'alpha': 10}

Target Column: YKL081W
Lasso Best Params: {'alpha': 1e-05}
Ridge Best Params: {'alpha': 1}

Target Column: YDR127W
Lasso Best Params: {'alpha': 5e-05}
Ridge Best Params: {'alpha': 1}

Target Column: YPL184C
Lasso Best Params: {'alpha': 5e-05}
Ridge Best Params: {'alpha': 10}

Target Column: YHR047C
Lasso Best Params: {'alpha': 5e-05}
Ridge Best Params: {'alpha': 10}

Target Column: YJL211C
Lasso Best Params: {'a

In [33]:
print_best_params(best_params_ExN, "ExN")


Best Parameters for ExN:

Target Column: YLR410W
Lasso Best Params: {'alpha': 0.0001}
Ridge Best Params: {'alpha': 5}

Target Column: YFL011W
Lasso Best Params: {'alpha': 5e-05}
Ridge Best Params: {'alpha': 10}

Target Column: YBL087C
Lasso Best Params: {'alpha': 0.0001}
Ridge Best Params: {'alpha': 10}

Target Column: YLR241W
Lasso Best Params: {'alpha': 0.0005}
Ridge Best Params: {'alpha': 10}

Target Column: YBR275C
Lasso Best Params: {'alpha': 0.0001}
Ridge Best Params: {'alpha': 5}

Target Column: YLL028W
Lasso Best Params: {'alpha': 5e-05}
Ridge Best Params: {'alpha': 5}

Target Column: YLR164W
Lasso Best Params: {'alpha': 5e-05}
Ridge Best Params: {'alpha': 10}

Target Column: YLL025W
Lasso Best Params: {'alpha': 5e-05}
Ridge Best Params: {'alpha': 5}

Target Column: YGR214W
Lasso Best Params: {'alpha': 0.0001}
Ridge Best Params: {'alpha': 5}

Target Column: YKR034W
Lasso Best Params: {'alpha': 5e-05}
Ridge Best Params: {'alpha': 10}

Target Column: YOR029W
Lasso Best Params: {