# <span style="color:green"> Import libraries<span style="color:green">

In [2]:
#Import all the required libraries
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sklearn 
import random
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_regression
from sklearn.linear_model import Lasso, LassoCV, Ridge, RidgeCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import tensorflow as tf
from tensorflow import keras
from keras import layers
from tensorflow.keras.layers import Input, Dense, LeakyReLU, ReLU
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from sklearn.metrics import mean_absolute_error, mean_squared_error, explained_variance_score
from sklearn.feature_selection import f_regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
import pybiomart
from biomart import BiomartServer
from sklearn.feature_selection import RFECV
import xgboost as xgb
from sklearn.inspection import permutation_importance
import os

2025-01-06 10:01:32.804108: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-06 10:01:32.804221: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-06 10:01:32.910307: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-06 10:01:33.175267: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Define function for feature selection and association test

In [47]:
def process_file(file_path):
    # Load the data
    df = pd.read_table(file_path, delim_whitespace=True)
    print("Shape of file", df.shape)
    
    #X is Independent feature and y is dependent feature
    X= df.iloc[:, 6:]
    print("Shape of X", X.shape)
    y= df.PHENOTYPE
    print("Shape of y", y.shape)
    
    # Create and fit the Elasticnet model
    elastic_cv = linear_model.ElasticNetCV(l1_ratio=[0.5],alphas=[0.0033], cv=5, random_state= 2000)
    elastic_cv.fit(X, y)

    # Print the optimal alpha value
    print ("Optimal alpha:", elastic_cv.alpha_)
    print("Optimal l1_ratio: ", elastic_cv.l1_ratio_)

    # Indices of the SNPs with non-zero coefficients
    elastic_snps_ldl_arrange = np.flip(np.argsort(elastic_cv.coef_))[:5000]
    elastic_snps_ldl = np.where(elastic_cv.coef_)[0]

    # See how many SNPs have a non-zero coefficient
    print(len(elastic_snps_ldl), "selected SNPs")

    #Final SNPs
    E1_re= X.iloc[:,elastic_snps_ldl[:][:5000]]
    
    # Construct the output path
    output_dir = os.path.dirname(file_path)
    output_file = os.path.join(output_dir, "Elasticnet_SVR_5000")

    # Save the intermediate CSV in the same folder as the .raw file
    E1_re.to_csv(output_file, index=False)
    print(f"Results saved to: {output_file}")
    print("print shape of selected snps by Elastic net", E1_re.shape)
    
    Y= df.PHENOTYPE.values
    print("Y.shape",Y.shape)
    # Split Data
    x_train, x_test, Y_train, Y_test= train_test_split(E1_re, Y, test_size=0.3, random_state=2000)
    print(f"x_train={x_train.shape}, x_test={x_test.shape}, Y_train= {Y_train.shape}, Y_test= {Y_test.shape}")
    # Scale data
    scaler= StandardScaler()
    x_train= scaler.fit_transform(x_train)
    x_test= scaler.transform(x_test)
    
    
    # Association test using SVR
    svr = SVR(kernel = 'sigmoid' , C=100, epsilon= 0.01, gamma= 'scale', cache_size=800)
    svr.fit(x_train, Y_train)
    y_pred = svr.predict(x_test)
    r2_man = r2_score_manual(Y_test, y_pred)
    print("r2 is ",r2_man)
    mae = mean_absolute_error(Y_test, y_pred)
    variance = explained_variance_score(Y_test, y_pred)
    print("mean_absolute_error", mae)
    print("mean_squared_error ", mean_squared_error(Y_test, y_pred))
    print("explained_variance_score", variance)
    
    return r2_man,mae,variance
    

## <span style="color:green"> r2_score_manual <span style="color:green"> 

In [48]:
def r2_score_manual(y_true, y_pred):
    # Convert inputs to NumPy arrays
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    
    # Calculate the mean of the actual values
    y_mean = np.mean(y_true)
    
    # Residual sum of squares
    ss_res = np.sum((y_true - y_pred) ** 2)
    
    # Total sum of squares
    ss_tot = np.sum((y_true - y_mean) ** 2)
    
    # R² score
    r2 = 1 - (ss_res / ss_tot)
    return r2

In [49]:
# Define the root directory containing the subdirectories
root_dir = "/home/Vaishnavi/Simulation_rep_G2P"

# Initialize a list to store results
results = []

# Walk through subdirectories to find .raw files
for subdir, dirs, files in os.walk(root_dir):
    for file in files:
        if file.endswith(".raw"):
            file_path = os.path.join(subdir, file)
            print(f"Processing file: {file_path}")
            
            # Process the file and calculate metrics 
            try:
                r2, mae, variance = process_file(file_path)
                results.append({
                    "file": file,
                    "directory": subdir,
                    "r2_score": r2,
                    "mae": mae,
                    "explained_variance": variance
                })
            except Exception as e:
                print(f"Error processing {file_path}: {e}")


Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_1/GWAStutorialldlraw_1.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6644 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_1/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.895129444359299
mean_absolute_error 12.210119342053224
mean_squared_error  226.45916189674676
explained_variance_score 0.8952438470572425
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_2/GWAStutorialldlraw_2.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6178 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_2/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9169203621089936
mean_absolute_error 9.520777356910045
mean_squared_error  142.34422718878258
explained_variance_score 0.9171540892478006
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_3/GWAStutorialldlraw_3.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6990 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_3/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.8883029942881665
mean_absolute_error 13.36671750567825
mean_squared_error  274.9183827236978
explained_variance_score 0.88842790337581
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_4/GWAStutorialldlraw_4.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
5919 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_4/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9190964460055472
mean_absolute_error 9.045061074219712
mean_squared_error  120.94277571578006
explained_variance_score 0.9192762784830782
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_5/GWAStutorialldlraw_5.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6817 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_5/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.8747658230766738
mean_absolute_error 13.079018126044643
mean_squared_error  259.5018213534675
explained_variance_score 0.8759241782753961
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_6/GWAStutorialldlraw_6.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
7851 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_6/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.8380645024012637
mean_absolute_error 19.382892199844555
mean_squared_error  610.7301621323463
explained_variance_score 0.8383289358541774
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_7/GWAStutorialldlraw_7.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6895 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_7/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.8784425151398992
mean_absolute_error 13.095812900925054
mean_squared_error  287.3836253819917
explained_variance_score 0.8785755013574335
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_8/GWAStutorialldlraw_8.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
5724 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_8/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9286114097767745
mean_absolute_error 7.453028435224109
mean_squared_error  86.4246947848389
explained_variance_score 0.9288846894485902
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_9/GWAStutorialldlraw_9.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6279 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_9/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9119069509859029
mean_absolute_error 10.208154197296043
mean_squared_error  162.5582799765794
explained_variance_score 0.9119327485908357
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_10/GWAStutorialldlraw_10.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6827 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_10/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.8864928144263755
mean_absolute_error 12.46872241703466
mean_squared_error  236.10743025676308
explained_variance_score 0.8865440878863462
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_11/GWAStutorialldlraw_11.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6116 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_11/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9240040913327012
mean_absolute_error 9.306451164782976
mean_squared_error  140.78523060728452
explained_variance_score 0.9248875102709139
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_12/GWAStutorialldlraw_12.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6200 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_12/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9183566912219969
mean_absolute_error 9.416847628753187
mean_squared_error  143.15367941352673
explained_variance_score 0.9185153283510424
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_13/GWAStutorialldlraw_13.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
7605 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_13/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.8398561002243934
mean_absolute_error 18.519272156069103
mean_squared_error  539.1755207700377
explained_variance_score 0.8401927024576026
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_14/GWAStutorialldlraw_14.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6004 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_14/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9048501344525116
mean_absolute_error 8.545431008561517
mean_squared_error  117.14307327697885
explained_variance_score 0.9049685370981717
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_15/GWAStutorialldlraw_15.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
4470 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_15/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 4470)
Y.shape (1124,)
x_train=(786, 4470), x_test=(338, 4470), Y_train= (786,), Y_test= (338,)
r2 is  0.9424139463825986
mean_absolute_error 5.166094961850546
mean_squared_error  41.81568913335869
explained_variance_score 0.9426234704683025
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_16/GWAStutorialldlraw_16.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
4764 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_16/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 4764)
Y.shape (1124,)
x_train=(786, 4764), x_test=(338, 4764), Y_train= (786,), Y_test= (338,)
r2 is  0.9427869434414508
mean_absolute_error 5.10226656726807
mean_squared_error  40.94067682978889
explained_variance_score 0.9431155570478631
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_17/GWAStutorialldlraw_17.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6060 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_17/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9095016106458251
mean_absolute_error 9.65133287224008
mean_squared_error  145.61332507658915
explained_variance_score 0.9095445178089284
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_18/GWAStutorialldlraw_18.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6747 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_18/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9054616730243485
mean_absolute_error 11.877152263617434
mean_squared_error  209.51374224967105
explained_variance_score 0.9069351448611727
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_19/GWAStutorialldlraw_19.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6275 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_19/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9226947374087979
mean_absolute_error 9.538370949379166
mean_squared_error  146.27994321254067
explained_variance_score 0.9227079018788932
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_20/GWAStutorialldlraw_20.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
5576 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_20/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9177200448213443
mean_absolute_error 7.55813834171887
mean_squared_error  92.7477162586203
explained_variance_score 0.9177200528671674
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_21/GWAStutorialldlraw_21.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6346 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_21/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9039280071233531
mean_absolute_error 10.761120030940285
mean_squared_error  181.5978623621526
explained_variance_score 0.9042972685957942
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_22/GWAStutorialldlraw_22.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6333 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_22/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9080091400542902
mean_absolute_error 10.535373426388622
mean_squared_error  173.35762020328127
explained_variance_score 0.9086066023030116
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_23/GWAStutorialldlraw_23.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6166 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_23/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9104535030267985
mean_absolute_error 9.446290828965674
mean_squared_error  141.53313382406805
explained_variance_score 0.9107758125285818
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_24/GWAStutorialldlraw_24.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
7316 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_24/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.872192627914717
mean_absolute_error 13.63065048545411
mean_squared_error  293.82064311351314
explained_variance_score 0.8722195353772207
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_25/GWAStutorialldlraw_25.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
5513 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_25/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9316963100573618
mean_absolute_error 7.144710737656088
mean_squared_error  77.61567120448288
explained_variance_score 0.9318061809890856
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_26/GWAStutorialldlraw_26.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6398 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_26/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9042444432934815
mean_absolute_error 10.549739957923546
mean_squared_error  178.0541653186787
explained_variance_score 0.9053494814645805
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_27/GWAStutorialldlraw_27.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6884 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_27/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.874360724909298
mean_absolute_error 14.085515967550968
mean_squared_error  305.97574856065444
explained_variance_score 0.874412853979804
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_28/GWAStutorialldlraw_28.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
7003 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_28/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.8840180870224063
mean_absolute_error 12.94333574768759
mean_squared_error  278.64349046413486
explained_variance_score 0.8858263989466858
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_33/GWAStutorialldlraw_33.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6137 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_33/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9097189600491777
mean_absolute_error 9.092927803352081
mean_squared_error  130.37955678557628
explained_variance_score 0.9097193402760708
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_34/GWAStutorialldlraw_34.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
5859 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_34/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9216568957925517
mean_absolute_error 9.077294682603771
mean_squared_error  130.13720560648838
explained_variance_score 0.9217630957936493
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_35/GWAStutorialldlraw_35.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6113 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_35/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9018894919268294
mean_absolute_error 9.527681481559059
mean_squared_error  146.11132511357368
explained_variance_score 0.9026777838150991
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_36/GWAStutorialldlraw_36.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6401 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_36/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.908154817225945
mean_absolute_error 10.778384562036932
mean_squared_error  181.70225991456314
explained_variance_score 0.9083798490159607
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_38/GWAStutorialldlraw_38.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
5053 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_38/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9382974386231757
mean_absolute_error 6.215818804849319
mean_squared_error  61.495989646955145
explained_variance_score 0.9386810481946198
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_39/GWAStutorialldlraw_39.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
5611 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_39/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9236974906624004
mean_absolute_error 7.827605085995307
mean_squared_error  98.31770301486958
explained_variance_score 0.9240426473318414
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_40/GWAStutorialldlraw_40.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
5823 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_40/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9279738314500846
mean_absolute_error 7.752781979030801
mean_squared_error  96.67430671694484
explained_variance_score 0.9282407000955825
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_41/GWAStutorialldlraw.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
5732 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep_41/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.9143581681099152
mean_absolute_error 8.691669992133365
mean_squared_error  116.18006969284879
explained_variance_score 0.9151196154638661
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep1-run-try/GWAStutorialldlraw.raw
Shape of file (1124, 184515)
Shape of X (1124, 184509)
Shape of y (1124,)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Optimal alpha: 0.0033
Optimal l1_ratio:  0.5
6882 selected SNPs
Results saved to: /home/Vaishnavi/Simulation_rep_G2P/rep1-run-try/Elasticnet_SVR_5000
print shape of selected snps by Elastic net (1124, 5000)
Y.shape (1124,)
x_train=(786, 5000), x_test=(338, 5000), Y_train= (786,), Y_test= (338,)
r2 is  0.8821323668374238
mean_absolute_error 12.79903698921781
mean_squared_error  273.5286007826961
explained_variance_score 0.8826350694200411


In [50]:
# Save results to a CSV file
results_df = pd.DataFrame(results)
results_df.to_csv("r2_scores.csv", index=False)

print("Processing completed. R-square scores saved to r2_scores.csv.")

Processing completed. R-square scores saved to r2_scores.csv.


In [None]:
# TRY

In [18]:
# Define the root directory containing the subdirectories
root_dir = "/home/Vaishnavi/Simulation_rep_G2P"

# Initialize a list to store results
results = []

# Walk through subdirectories to find .raw files
for subdir, dirs, files in os.walk(root_dir):
    for file in files:
        if file.endswith(".raw"):
            file_path = os.path.join(subdir, file)
            print(f"Processing file: {file_path}")
            


Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_1/GWAStutorialldlraw_1.raw
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_2/GWAStutorialldlraw_2.raw
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_3/GWAStutorialldlraw_3.raw
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_4/GWAStutorialldlraw_4.raw
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_5/GWAStutorialldlraw_5.raw
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_6/GWAStutorialldlraw_6.raw
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_7/GWAStutorialldlraw_7.raw
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_8/GWAStutorialldlraw_8.raw
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_9/GWAStutorialldlraw_9.raw
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_10/GWAStutorialldlraw_10.raw
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_11/GWAStutorialldlraw_11.raw
Processing file: /home/Vaishnavi/Simulation_rep_G2P/rep_12/GWAStutorialldlraw_12.ra