In [1]:
import pandas as pd
import os
import numpy as np
import sys
sys.path.append("../")
from data_perturbations import *
from competing_methods import *
from PCS_confidence_intervals import *
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from imodels import FIGSRegressor
import scipy.stats as st


# Load Data 

In [2]:
X  = pd.read_csv("../data/X_uncorrelated_enhancer.csv")
y = pd.read_csv("../data/y_enhancer.csv")
X_train, X_test, y_train, y_test  = train_test_split(X, y, test_size=0.2)
#X_train, X_val, y_train, y_val  = train_test_split(X_train, y_train, test_size=0.25,)

# PCS Prediction Intervals

In [3]:
predictability_screening(X_train,y_train,X_test,y_test)

  y = column_or_1d(y, warn=True)


{'OLS': 0.25846304464557524,
 'RidgeCV': 0.2585100770233232,
 'LassoCV': 0.23757326244547738}

## Generate perturbed data. 

In [4]:
def generate_perturbed_data(X,y):
    X_bootstrap, y_bootstrap = bootstrap(X,y)
    y_perturbed = add_laplace_measurement_noise(y_bootstrap)
    return X_bootstrap,y_perturbed

In [5]:
num_perturbations = 50
X_train_perturbed_list = []
y_train_perturbed_list = []
for i in range(num_perturbations):
    X_perturbed,y_perturbed =  generate_perturbed_data(X,y)
    X_train_perturbed_list.append(X_perturbed)
    y_train_perturbed_list.append(y_perturbed)

In [11]:
import warnings
warnings.filterwarnings("ignore")
p_screened_coefficients = fit_all_model_perturbed_datasets(["OLS","RidgeCV","LassoCV"], X_train_perturbed_list, y_train_perturbed_list)

In [12]:
compute_confidence_intervals(X_train,p_screened_coefficients)

array([[-2.57851114e-03,  6.91407640e-04],
       [-7.19717706e-04,  1.08050169e-03],
       [-6.15211228e-04,  8.48843522e-04],
       [-5.50010643e-04,  5.14123632e-04],
       [ 0.00000000e+00,  7.66968232e-04],
       [-2.40638507e-04,  1.59797817e-03],
       [-3.07236662e-04,  1.38745944e-04],
       [-1.63864224e-03,  1.75031076e-03],
       [-5.31132241e-04,  2.10704439e-03],
       [-2.34474291e-03,  3.37267192e-03],
       [-1.42044746e-03,  1.97618215e-03],
       [-1.66194875e-03,  1.02902182e-03],
       [-4.06268084e-04,  2.19338656e-03],
       [-2.29692640e-03,  2.03083655e-03],
       [-2.38666158e-03, -3.07404972e-04],
       [-3.52348752e-05,  6.73439423e-04],
       [-3.08902088e-04,  3.10198465e-04],
       [ 1.68068359e-04,  4.12502297e-04],
       [ 0.00000000e+00,  3.35343743e-02],
       [-1.44270064e-02,  9.95585084e-03],
       [-1.15243855e-02,  1.10076826e-02],
       [-4.61093722e-03,  1.07121296e-02],
       [-2.34014636e-02,  0.00000000e+00],
       [ 0.

# Bootstrap Confidence Intervals

In [14]:
bootstrap_confidence_intervals = bootstrap_confidence_interval(X,y)
bootstrap_confidence_intervals

[(-0.0029577437594025696, 0.001317963558925357),
 (-0.0009463784333888613, 0.0009181489524403647),
 (-0.0005334498191931396, 0.0008674032438958426),
 (-0.00048663909699446875, 0.00045238718508216135),
 (6.551198999910795e-05, 0.00078020304726879),
 (-0.00023350743956469713, 0.0015530541389838853),
 (-0.0002876551379366537, 0.0001436414075280586),
 (-0.0014265613347920986, 0.0018167578145201941),
 (-0.0008186975432101118, 0.002428879005797955),
 (-0.0019986327456318407, 0.003218948577026593),
 (-0.0016465463090242268, 0.0013597748522247866),
 (-0.001192222572230253, 0.0013342785213208011),
 (-0.0010339648170726245, 0.0022825279773608207),
 (-0.002151890187251639, 0.0020360120912398592),
 (-0.002105569736431298, -0.0003408133299825214),
 (-0.00011797730236034532, 0.0005577247044332667),
 (-0.0003861575891828396, 0.00023066468865268772),
 (0.0001410015954692217, 0.00031336823982839674),
 (0.002117025576824416, 0.032996496081781804),
 (-0.0144901127574335, 0.008028101674179423),
 (-0.01183

# Classic CI 

In [15]:
classic_confidence_intervals = classic_confidence_interval(X,y)
classic_confidence_intervals

Unnamed: 0,0,1
const,-0.04073879,0.028672
H3_c12,-0.002709611,0.001018
H3K18ac_c12,-0.0006775015,0.000776
H3K27ac_c12,-0.0004750742,0.000745
H3K27me3_c12,-0.0004616982,0.000394
H3K36me3_c12,4.840772e-05,0.000801
H3K4me1_c12,-0.000296031,0.001569
H3K4me3_c12,-0.0002918256,0.000155
H3K9ac_c12,-0.001159198,0.001594
H4K5ac_c12,-0.0007900484,0.002478
