In [83]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

pd.set_option("display.max_columns", 85)

from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [84]:
def fetch_data(drop_some=True):

    df = pd.read_csv("abnormal_writeout.data.csv")

    if drop_some:
        # trascurare da ACC a UVM
        start_drop = df.columns.get_loc("ACC")
        end_drop = df.columns.get_loc("UVM")
        cols = np.arange(start_drop, end_drop + 1)
        df.drop(df.columns[cols], axis=1, inplace=True)

        # trascurare old_phylo_factor e la prima colonna
        df.drop("oldest_phylostratum_factor", axis=1, inplace=True)
        df.drop("Unnamed: 0", axis=1, inplace=True)

    return df

In [92]:
df = fetch_data()
df # 85 columns remain.

Unnamed: 0,response,cds_length,occ_total_sum,oldest_phylostratum,gc_cds,dnase_gene,dnase_cds,H3k4me1_gene,H3k4me3_gene,H3k27ac_gene,H3k4me1_cds,H3k4me3_cds,H3k27ac_cds,lamin_gene,repli_gene,nsome_gene,nsome_cds,transcription_gene,repeat_gene,repeat_cds,recomb_gene,AAA_freq,AAC_freq,AAG_freq,AAT_freq,ACA_freq,ACC_freq,ACG_freq,ACT_freq,AGA_freq,AGC_freq,AGG_freq,AGT_freq,ATA_freq,ATC_freq,ATG_freq,ATT_freq,CAA_freq,CAC_freq,CAG_freq,CAT_freq,CCA_freq,CCC_freq,CCG_freq,CCT_freq,CGA_freq,CGC_freq,CGG_freq,CGT_freq,CTA_freq,CTC_freq,CTG_freq,CTT_freq,GAA_freq,GAC_freq,GAG_freq,GAT_freq,GCA_freq,GCC_freq,GCG_freq,GCT_freq,GGA_freq,GGC_freq,GGG_freq,GGT_freq,GTA_freq,GTC_freq,GTG_freq,GTT_freq,TAA_freq,TAC_freq,TAG_freq,TAT_freq,TCA_freq,TCC_freq,TCG_freq,TCT_freq,TGA_freq,TGC_freq,TGG_freq,TGT_freq,TTA_freq,TTC_freq,TTG_freq,TTT_freq
0,0,-0.231676,-0.130244,1.901010,1.447504,1.190150,1.406066,0.022281,1.801757,-0.591137,0.472681,1.949397,-0.455623,-0.479855,0.577770,-0.254277,-0.303028,-0.093107,0.311667,-0.187624,-1.174934,-0.962971,-0.909913,-1.508122,-1.087512,-0.872773,1.602265,0.829956,-1.047882,-1.270695,1.036989,0.069038,-0.612966,-1.112302,-1.409381,-0.649559,-1.176062,-1.312306,0.391324,0.058646,-1.265710,0.384585,0.694620,1.409963,0.829268,1.797335,1.548229,0.903817,0.933719,-0.397981,-0.086160,1.414753,-0.346887,-1.243235,1.238157,0.943669,-0.824115,-0.356196,0.875956,1.696366,1.730229,0.054776,0.825449,1.145792,0.554808,-1.577040,0.447125,1.275182,-1.257541,-1.204038,-0.352835,-1.386701,-1.119012,-1.408061,0.539373,0.823120,-0.884699,0.161293,0.846379,0.539268,-0.628905,-1.111766,-0.365622,-1.157372,-0.412003
1,0,-0.479485,-0.225925,-0.683952,-1.218667,-0.855190,-0.428028,0.270554,-0.960246,-1.130393,-1.107698,-0.782452,-0.930966,2.172908,-0.728317,-0.208823,0.333337,-0.119886,-0.398704,-0.187624,0.584057,0.296533,1.149042,-0.065966,1.388289,1.197949,0.253892,-0.778624,-0.132201,1.166816,-1.541005,-0.402150,-1.145869,1.333728,0.803758,-0.693085,2.705568,0.155822,-0.765681,-0.887983,1.251116,-0.837808,-0.990130,-0.957495,-0.087426,-0.863659,-0.832224,-0.722976,-0.935078,-0.265076,0.174855,-0.599223,0.643374,1.017265,-0.225132,-0.370956,-0.282280,-0.689413,-1.277163,-0.978026,-0.930006,-0.676247,-0.838694,-0.038881,1.621418,-0.204153,-0.754478,-0.854915,0.639916,0.874857,1.208705,0.421527,1.247031,-0.065484,-0.327448,-0.151612,0.993725,0.200062,-0.895657,0.323194,-0.540768,2.205386,0.249155,1.918490,1.857967
2,1,-0.391241,-0.072836,-0.683952,0.482389,0.672705,0.928634,0.768924,0.019239,0.408770,0.695625,-0.699896,-0.723082,-0.479855,0.541861,0.772266,0.752547,-0.095956,0.083483,-0.187624,-0.427398,-0.104715,-0.181644,0.094350,-0.968550,0.069944,1.245460,0.097957,0.348878,0.087571,-0.020092,0.793797,-0.262386,-0.625139,-0.015744,0.063679,-0.847768,-0.097626,0.058707,0.230716,0.410751,1.302120,0.057532,-0.127896,0.624825,-0.388801,0.004821,0.294804,-0.559719,1.592316,-0.331791,0.390118,-0.739430,-0.674981,1.372832,0.379733,-0.626983,-0.528783,0.706419,-0.258796,0.598618,0.637801,0.690812,0.387350,0.448131,0.524565,-0.196243,-0.236192,-1.185901,-0.125129,0.573989,0.087986,-0.064738,-0.423978,-0.208042,0.784968,-1.144029,-0.571964,-0.053218,0.515054,-0.711053,-0.483593,-0.921655,-1.267833,-0.522241
3,0,0.296983,1.649415,-0.683952,-0.794498,-0.525282,-0.152416,0.003209,-0.815461,-0.437862,-0.461587,-0.723912,-0.569118,-0.479855,-1.137745,0.007207,0.798077,-0.111170,-0.178076,-0.187624,-0.190946,0.099659,0.298522,-0.005526,1.484987,0.713881,-0.454725,-0.193328,0.464364,0.145263,-0.500734,-0.893309,0.686977,0.905230,1.622440,0.609473,0.770784,1.164331,-0.696708,-0.068439,0.339796,-0.022302,-0.340180,-0.520962,-0.388488,-0.352416,-0.573491,-0.361055,-0.499743,-0.710679,-1.045508,-0.383306,0.040561,0.011920,1.428825,-0.734995,0.908960,-0.647483,-0.473808,-0.526752,-0.708639,-0.813613,-0.925574,-0.740244,0.185416,0.788469,-0.953331,0.313504,0.496344,1.057425,-0.315237,0.226605,1.158869,0.707721,-0.020972,-0.420635,-1.054114,1.616000,-0.070003,-0.723890,0.253554,1.335076,-0.278880,0.378629,0.510510
4,1,-0.233288,0.290750,-0.683952,-1.458067,-0.633030,-0.965933,-0.391393,-0.666217,0.008395,0.584838,-0.697772,0.645437,-0.479855,-0.547933,0.098886,0.496031,-0.115838,-0.271790,-0.187624,2.455216,1.191996,0.474943,0.779322,1.538955,1.013696,-0.743015,-0.929633,1.358504,2.364951,-1.849695,-0.438100,-0.591726,2.053663,-0.711763,1.087081,1.470753,0.214475,-0.630416,-0.730091,0.086822,-1.097176,-1.324178,-0.768358,-1.082116,-0.415774,-0.811596,-0.995602,-0.996150,1.078210,-0.542113,-1.385397,-0.321958,1.113882,0.524697,0.212556,1.873450,-0.437169,-1.278794,-0.714242,-1.709055,-0.095598,-0.701969,-0.722531,-0.637239,0.846933,-1.551057,-0.692779,0.443985,1.848496,0.617875,0.776714,1.546713,-0.403397,-0.895191,-1.103011,-0.060126,0.658087,-1.092099,0.133426,0.477238,1.045356,0.008145,0.866053,1.720497
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19742,0,-0.008044,-0.053700,-0.448955,-0.982427,-0.552147,-0.984021,0.505295,1.002051,1.039536,0.768614,1.365052,1.458597,-0.479855,0.663963,-0.122193,0.575389,-0.089952,-0.186945,-0.187624,0.941305,0.247694,-0.939651,-0.201191,1.429151,-0.279492,0.022358,-0.875852,-0.264560,-0.571756,-0.436544,-1.322671,0.497995,0.762950,1.351383,0.453043,0.988812,-0.144056,-0.075994,-0.370951,0.521573,-0.316267,-0.667558,-0.781508,0.326564,-0.634507,-0.526259,-0.787409,-0.624621,1.370757,0.379016,-0.415046,2.719502,-0.075607,-1.105610,-1.383187,0.286833,-0.842614,-0.713383,-0.834216,0.137907,-1.134824,-0.795022,-0.803196,-0.805166,-0.407059,-0.150692,-0.803457,0.712130,1.097060,1.105630,0.185906,1.135716,1.333007,0.268711,0.042214,2.536203,-0.030048,-0.494799,-0.470321,-0.036806,1.680893,3.162962,-0.270327,1.338705
19743,1,-0.681359,-0.493831,1.901010,-0.767611,-0.544373,0.800940,0.783741,1.420323,1.356322,1.282143,1.949397,1.458597,-0.479855,-0.060838,0.837506,1.426474,-0.119591,-0.073143,-0.187624,0.548324,-0.092900,0.061328,0.458257,1.413067,1.465101,-0.552020,0.231724,1.154246,0.863690,-0.964231,-0.734662,-0.898042,-0.790804,2.757474,0.660989,0.920277,1.654027,0.375548,-1.748655,1.472459,1.362420,-0.396488,-0.915360,-0.722720,-0.435239,-0.442829,-0.635440,-0.129710,1.025711,-1.050458,-0.473807,0.092115,1.187293,0.223116,-0.135217,-0.391375,-1.793045,-0.535497,-0.459014,-1.149705,-0.601998,-0.425905,-1.206344,-0.064308,1.171202,-0.070138,-0.725401,-0.289256,-1.204038,2.129405,0.032187,0.693950,-0.226363,0.934897,-0.238250,0.399073,1.431014,-1.760851,0.254423,0.531283,0.478399,0.079103,0.423915,0.471658
19744,1,-0.027788,0.252478,0.491031,1.033839,0.216291,0.999358,1.151672,1.219472,0.664152,1.282143,-0.053593,0.291378,-0.479855,0.723738,3.691876,2.921017,-0.072679,0.063230,-0.187624,0.909979,-1.005636,-0.915411,-0.983445,-1.108265,-1.237069,1.164689,1.884213,-0.223508,-0.982068,0.684651,-0.116311,-0.585774,-1.058933,-1.110493,-0.460700,-0.861815,-1.218203,0.743927,0.412363,-0.252961,0.974854,0.600214,0.435618,0.720624,1.599806,0.827066,0.804827,-0.214999,-0.125386,0.214140,0.932036,-0.157570,-0.839763,1.161089,0.172240,-1.076924,0.674472,0.798632,0.922822,0.610677,0.134308,0.461517,0.566644,0.354854,-0.935152,0.017858,0.789543,-0.182111,-0.953362,-0.008744,-1.120209,-1.018815,-0.790571,0.242229,0.070321,0.080266,-0.814153,1.117985,0.105973,0.365179,-0.979377,0.255464,-0.614369,-0.144619
19745,0,1.586797,0.730881,,-1.791608,-0.710496,-0.626706,-0.437992,-0.339529,1.356322,-0.213250,0.240000,1.458597,2.172908,0.241114,-0.063052,0.337597,-0.100412,0.823720,-0.170479,1.173826,2.586235,1.338772,1.826043,1.635902,0.918387,-0.880112,-0.872814,0.328257,1.297233,-0.960317,-0.709130,2.415034,2.047422,0.770368,0.177542,1.311578,0.971775,-0.962400,-0.759275,0.671462,-0.981605,-1.257913,-1.022202,-1.367668,-0.849065,-0.814894,-1.091438,-1.053468,0.008093,-1.018916,-1.173959,0.092417,1.432271,-0.606222,-0.740705,1.332382,-0.804953,-1.319717,-0.881704,-1.425642,-0.763700,-1.373148,-1.112675,-0.983782,1.186647,-0.427305,-0.525210,0.862068,2.786691,0.119046,1.466022,0.673393,0.790606,-1.200644,-1.073769,0.419875,1.321714,-1.226774,-1.329099,0.117893,1.615740,0.164249,0.595836,0.910856


In [89]:
from sklearn.model_selection import train_test_split

train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)

In [91]:
data = train_set.drop("response", axis=1)  # training set w/o labels
labels = train_set["response"].copy()  # labels
data

Unnamed: 0,cds_length,occ_total_sum,oldest_phylostratum,gc_cds,dnase_gene,dnase_cds,H3k4me1_gene,H3k4me3_gene,H3k27ac_gene,H3k4me1_cds,H3k4me3_cds,H3k27ac_cds,lamin_gene,repli_gene,nsome_gene,nsome_cds,transcription_gene,repeat_gene,repeat_cds,recomb_gene,AAA_freq,AAC_freq,AAG_freq,AAT_freq,ACA_freq,ACC_freq,ACG_freq,ACT_freq,AGA_freq,AGC_freq,AGG_freq,AGT_freq,ATA_freq,ATC_freq,ATG_freq,ATT_freq,CAA_freq,CAC_freq,CAG_freq,CAT_freq,CCA_freq,CCC_freq,CCG_freq,CCT_freq,CGA_freq,CGC_freq,CGG_freq,CGT_freq,CTA_freq,CTC_freq,CTG_freq,CTT_freq,GAA_freq,GAC_freq,GAG_freq,GAT_freq,GCA_freq,GCC_freq,GCG_freq,GCT_freq,GGA_freq,GGC_freq,GGG_freq,GGT_freq,GTA_freq,GTC_freq,GTG_freq,GTT_freq,TAA_freq,TAC_freq,TAG_freq,TAT_freq,TCA_freq,TCC_freq,TCG_freq,TCT_freq,TGA_freq,TGC_freq,TGG_freq,TGT_freq,TTA_freq,TTC_freq,TTG_freq,TTT_freq
13300,0.175294,0.118525,-0.448955,-0.945011,-0.954265,-0.791490,-0.969238,-0.913798,-0.921833,-0.706840,-0.719022,-0.530154,1.742704,-0.733096,-0.093588,0.756527,-0.119712,-0.230090,-0.187624,-0.439347,1.011802,1.359812,1.328939,0.475220,0.935366,-0.910335,-1.173391,0.602358,1.759545,-0.009164,-0.435713,1.080259,0.132863,1.654689,0.212061,0.434113,0.148035,-0.847332,1.260937,1.275854,-0.512488,-0.572756,-0.943306,-0.998354,-1.261749,-0.640511,-0.911815,-0.794583,0.421274,-0.289151,-0.659877,0.363680,1.800193,-0.074192,-0.694977,0.621173,0.308043,-1.018806,-0.627598,-0.643064,0.074813,-1.097190,-0.979161,-0.937861,-0.111330,0.164004,-1.041603,0.044562,0.598269,-0.736450,0.848652,0.008999,1.591273,-0.615150,-0.796999,0.837193,0.532584,-0.319801,-0.625726,-0.734367,0.245486,0.285616,0.484034,0.205866
15976,-0.218379,0.214206,-0.448955,-0.896574,-0.906374,-1.067059,-1.259419,-0.778248,-1.086890,-1.107698,-0.782452,-0.930966,-0.479855,0.305307,-0.070784,0.952663,-0.119933,0.526177,5.249018,-0.282281,0.856195,1.564466,-0.531522,2.269556,1.775385,0.386247,-0.665688,-0.281972,1.620325,-1.619972,0.085543,-0.531024,0.145000,1.323112,1.363707,-0.033826,-0.671348,1.326346,-0.193461,0.634490,-1.897710,-0.566064,-0.784450,0.212270,-0.460995,-0.816330,-1.003960,-0.502652,-0.134163,-0.153518,-1.313721,0.364127,2.337934,-1.299553,0.996900,0.323301,-1.179687,-1.089154,-0.660719,-1.497621,0.861177,-1.032914,-0.238461,-1.001279,0.748671,-1.432690,1.360930,-0.868948,0.319128,-0.099011,-0.172252,-0.040092,2.784178,-1.472052,-1.293949,-0.244860,1.073666,-0.904200,-0.167087,1.710238,-0.388501,0.242866,-0.209590,-0.057372
4069,-0.670479,-0.589511,1.901010,1.241097,0.363426,0.429239,1.151672,1.801757,1.356322,1.282143,1.949397,1.458597,-0.479855,0.645800,0.742975,0.544949,-0.119284,-0.451617,-0.187624,-1.088147,-1.255100,-1.464816,-0.724319,-0.813038,-1.464159,-0.260989,0.128925,0.927787,0.193197,-0.052473,1.476165,-2.028698,-1.220246,-1.106632,-0.086475,-1.429155,-1.587724,-0.184758,1.524292,-0.412676,0.172267,1.012029,0.387018,1.768584,-0.504348,0.924649,-0.669502,0.447451,0.239612,-0.781410,3.611991,-0.066344,-0.648563,0.516346,0.293236,-1.322691,2.249418,2.221575,0.243567,2.770548,-1.095668,1.044105,-0.231981,-0.772869,0.309354,-1.481646,-0.468575,-1.432688,-0.785181,0.114240,-0.050852,-1.321669,-1.192579,-0.840111,-0.977862,-1.260715,-0.187565,5.210743,-0.615234,-0.728566,-1.218392,-1.372600,-0.461679,-1.049092
7479,-0.101123,0.003709,-0.448955,-1.368561,-0.595264,-0.375685,-0.476991,-0.181827,-0.458217,-0.346694,-0.259300,-0.381050,-0.479855,-0.101504,0.061116,0.599643,-0.088464,0.119760,-0.187624,-0.292981,1.502725,0.866358,1.182712,1.715321,0.942791,-1.397733,-0.430665,-0.534509,1.036302,-0.297339,-1.032131,1.673783,1.365566,0.167677,0.517057,1.706812,1.244160,-0.374710,0.204160,0.893817,0.079666,-1.206862,-1.079173,-1.353048,-0.880549,-0.885908,-0.854920,-0.657713,-0.043077,-1.466234,-0.388349,-0.460444,0.948207,-1.210187,-1.111731,1.045870,0.437350,-1.021737,-0.762499,-0.286237,-0.883716,-1.007920,-1.075076,-1.348929,0.978663,0.446937,-1.142983,0.466323,1.936683,-0.550414,1.412060,0.687164,0.680840,0.046633,-1.055383,-0.072484,0.060851,0.145773,-1.074287,-0.092613,1.071609,1.322789,-0.143767,1.108575
15978,-0.243764,0.041981,-0.448955,-0.986891,-0.634923,-0.460669,-0.570915,-0.225084,-0.514820,-1.107698,-0.782452,-0.930966,1.594385,-1.068212,0.913259,1.353427,-0.107112,-0.147522,-0.187624,-1.174934,1.241045,0.732353,-0.043384,1.706465,0.607600,0.176996,-1.226391,0.524559,2.377660,-0.195376,0.014830,1.233162,0.421218,1.077416,0.368093,0.623478,-0.585801,0.325854,1.349053,0.317706,-0.161573,-0.973602,-0.981345,-0.999875,-0.713377,-0.811250,-1.069146,-1.351467,-0.211405,-0.148842,-0.747660,-0.200648,1.631730,-0.732014,1.609272,0.090954,-0.318325,-1.058244,-0.845771,-0.458824,0.471469,-0.938988,-0.719727,-1.092946,0.480484,-1.038096,-0.082066,-0.964839,0.836591,-0.009389,0.601893,0.116473,1.697190,-1.100719,-0.923496,-0.155017,0.480015,-0.758526,-0.306126,-0.508059,0.727308,-0.217476,-0.641339,0.436397
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11284,-0.272373,-0.149380,-0.683952,-0.401324,0.185894,-0.380234,1.151672,0.673233,1.356322,1.282143,-0.341259,1.458597,-0.479855,0.978437,1.080897,1.362244,-0.010561,-0.169114,-0.169080,1.049719,0.542429,0.793776,0.388671,0.121886,0.366144,-0.123833,0.059970,0.127530,0.769966,-1.107806,-0.114478,-0.464503,-0.290363,-1.170053,0.960872,-0.429866,0.297401,-0.057950,-0.910015,0.253935,-0.527098,-0.723937,-0.133931,-0.570589,-0.018006,0.066405,-0.351031,-0.582429,-0.607520,-0.204072,-0.490736,1.537352,0.441735,0.200587,0.147025,0.111291,0.565544,-0.603410,-0.275632,-0.511417,0.103887,-0.053963,-0.078048,-0.028153,-0.372286,0.872167,-0.347539,-0.151507,0.361389,-0.545776,-0.042545,-0.557887,-0.831274,-0.324364,-0.122734,1.274618,-0.023501,-0.305482,0.545682,0.334939,0.274213,0.677039,-0.130521,1.351994
11964,0.005656,0.137661,-0.448955,-1.130603,-0.686902,-0.069641,-0.159816,0.289780,1.356322,-0.224021,0.878754,1.458597,-0.479855,-0.305310,0.543760,0.736043,-0.058159,-0.423864,-0.187624,-0.857777,0.714799,1.098764,0.369763,1.167253,0.661537,-0.010996,-0.990411,-0.091770,-0.070854,-0.201171,-0.631615,1.364286,1.131412,0.616192,0.469564,1.450303,0.549375,-1.164757,-0.109690,1.047168,-0.711053,-0.964725,-0.581861,-0.491499,-0.861679,-0.486001,-0.739381,-0.890917,0.049967,-0.627894,-0.714783,0.349917,0.630282,-0.309139,-0.457335,0.980666,-0.150618,-0.608862,-0.646442,-0.667694,-0.691944,-0.721375,-1.106750,-0.934961,1.182776,-1.398343,-0.320213,0.376368,1.394651,0.611156,0.426284,0.738698,0.652266,-1.201565,-0.615359,0.010986,1.949450,-0.694718,-0.889353,0.008741,1.104546,0.173374,1.156872,1.541760
5390,-0.278820,-0.015428,-0.448955,0.269463,1.147547,0.672422,1.151672,1.801757,1.356322,1.282143,1.949397,1.458597,-0.479855,0.630760,1.107368,1.053363,-0.002215,0.607039,1.782216,0.223014,0.238790,-0.384266,1.149236,-0.549157,-0.057100,-0.226447,-1.205501,-0.605134,0.155770,0.475460,1.434516,0.599156,-0.751879,-1.159312,-0.275982,-0.514016,0.319545,-0.831238,0.970348,-0.444363,0.938971,0.565925,-0.126108,0.113159,-0.650179,0.073801,-0.265300,-1.148095,-0.075045,0.228196,-0.270817,0.706661,0.460165,0.086947,-0.328484,-0.591718,-0.123192,0.987653,-0.128942,0.037134,-0.270553,0.858573,0.079820,-0.831832,0.637427,-0.737363,-0.006401,-0.441117,-0.476209,-0.703242,0.741109,-0.882006,-0.698688,-0.531710,-0.111585,0.651281,0.295776,-0.466509,-0.675125,0.247372,-0.987068,0.577012,0.099084,0.125744
860,-0.377944,-0.206789,-0.448955,-1.385522,-0.390536,-0.150681,-0.384072,0.169188,-0.769039,-0.139016,0.715816,-0.841755,-0.479855,-0.411839,-0.419155,0.212046,0.211946,-0.417410,-0.187624,-0.760659,1.306827,0.405344,1.022525,0.906316,0.806313,-1.290082,-0.739162,1.364235,0.187442,-0.107187,-1.312324,1.139135,0.364369,0.623945,0.527828,1.385539,-0.047436,-0.301565,-0.647369,0.319025,-1.505427,-1.047015,-0.913582,-1.122609,-0.625805,-0.771387,-0.729364,-0.590503,-0.449804,-1.551148,-0.626536,1.774439,1.207897,-0.257844,-0.627787,1.538178,-0.170240,-1.104019,-0.716873,-0.593713,-0.463204,-1.049212,-1.382106,-1.262132,0.943885,-0.500523,0.216587,0.466637,1.333026,0.484941,-0.196790,0.706742,0.370414,-1.538244,0.005003,-0.249972,2.811037,-0.888052,-0.550573,0.809938,1.627540,-0.056839,1.901697,1.926688


In [93]:
pipeline = Pipeline(
    [
        ("imputer", SimpleImputer(strategy="median")),  # Remove null values
        ("std_scaler", StandardScaler()),  # Feature scaling
    ]
)

data_tr = pipeline.fit_transform(data)
data_prepared = pd.DataFrame(data_tr, columns=data.columns)

In [None]:
def split_for_PCA(X):
    
    return