In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import ElasticNetCV
from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import r2_score
from scipy.stats import pearsonr
import multiprocessing
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Initialize the Model
model = ElasticNetCV(
    l1_ratio = [0, 0.2, 0.5, 0.8, 1.0],  # Mix ratio: Ridge to Lasso (0 is pure ridge, 1 is pure lasso)
    alphas = [0.01, 0.1, 1.0, 10.0],  # Penalty strength to avoid overfitting (higher is more flattening)
    cv = RepeatedKFold(n_splits=5, n_repeats=10, random_state=123), #5-fold CV with 10 repeats, random state/seed for reproducibility
    max_iter= 2000, #if it keeps failing we could increase it
    n_jobs= multiprocessing.cpu_count() - 2  # Set up parallel processing (this makes it go faster, and 2 fewer cores than max avoids freezing the system)
)

# Fit the Model
model.fit(TRAINING_DATASET_COLUMNS, TRAINING_PFACTOR) # need to replace the columns with whatever the columns are 

# Evaluate Model performance on the held-out test set
predicted_pfactor = model.predict(TESTING_DATASET_COLUMNS) # replace these columns with the testing set minus the pfactor column 
    
# Evaluate the Model
corr_final = pearsonr(ACTUAL_TESTING_PFACTOR, predicted_pfactor) # this is the correlation between predicted versus the actual p-factor value and also gives a significance p-value 
R_squared_final = r2_score(ACTUAL_TESTING_PFACTOR, predicted_pfactor) # percentage of variance explained by our model 

In [None]:
# Print results 
results = {
    'model': model,
    'best_alpha': model.alpha_,
    'best_l1_ratio': model.l1_ratio_,
    'predicted_testdata': predicted_testdata,
    'corr_final': corr_final,
    'R_squared_final': R_squared_final
}

print(results)