# Toxikind: Data Preprocessing

In [1]:
# OS I/O
import os

# Data handling
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import ColumnTransformer

# Feature Scaling Pipeline

**Notebook Codeblock Version (reference only, unaltered)**

# Question: Has a MinMaxScaler to be trained on training data (being a stateless scaler)?

**Function**

In [2]:
def minmax_scale_features(X: pd.DataFrame) -> pd.DataFrame:
    """
    This function scales feature tables  with a MinMaxScaler trained on the training data
    """
    # ColumnTransformer with MinMaxScaler
    column_transformer = ColumnTransformer([
        ("minmax_scaler", MinMaxScaler(), X.columns)
        ])

    # Pipeline
    pipeline = Pipeline([
        ("column_transformer", column_transformer)
        ])

    # Transform data while preserving column names and index
    X = pd.DataFrame(pipeline.fit_transform(X), columns=X.columns, index=X.index)

    return X    

# Training Features

**Load data**

In [3]:
path_features_train = "../raw_data/tox21_dense_train.csv.gz"
X_train_raw = pd.read_csv(path_features_train).set_index("Unnamed: 0")
X_train_raw

Unnamed: 0_level_0,AW,AWeight,Arto,BertzCT,Chi0,Chi1,Chi10,Chi2,Chi3,Chi3c,...,W3D,W3DH,WNSA1,WNSA2,WNSA3,WPSA1,WPSA2,WPSA3,grav,rygr
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NCGC00178831-03,5.436720e+07,13.053,2.176,3.194,23.112,15.868,1.496,15.127,12.592,2.619,...,2687.469,9241.018,115.371,-915.496,-39.983,290.078,2301.941,59.492,88.147,3.708
NCGC00166114-03,1.268818e+07,22.123,2.065,3.137,21.033,13.718,1.937,13.187,11.951,2.502,...,2184.384,3234.199,194.740,-1029.609,-34.205,235.360,1244.323,82.906,134.852,4.131
NCGC00263563-01,3.076932e+06,13.085,2.154,3.207,46.896,29.958,3.806,30.105,25.569,7.819,...,13803.524,76582.899,238.004,-4358.946,-106.537,868.685,15909.444,135.335,216.852,5.075
NCGC00013058-02,7.168569e+07,12.832,2.029,3.380,51.086,32.045,1.806,29.090,21.603,5.222,...,13807.345,50498.175,226.312,-2785.555,-61.923,763.288,9394.859,125.509,238.265,4.640
NCGC00167516-01,7.989702e+06,12.936,2.124,3.573,70.295,46.402,3.604,42.132,32.570,7.002,...,43231.286,163659.229,850.869,-21136.699,-367.122,1798.703,44681.209,362.168,317.901,7.845
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NCGC00261292-01,1.428572e+07,14.255,2.000,2.628,9.259,6.309,0.157,5.468,4.484,0.758,...,306.364,1435.590,76.419,-190.192,-13.757,100.624,250.402,19.275,29.148,2.581
NCGC00261245-01,1.193182e+07,13.674,2.061,2.920,21.142,15.382,1.201,12.713,10.576,1.082,...,2528.642,12293.627,94.878,-595.491,-22.275,324.131,2034.439,49.446,93.636,3.666
NCGC00260828-01,1.081800e+01,12.374,2.045,3.128,33.242,20.457,0.806,19.711,14.799,4.733,...,9171.300,44070.070,267.400,-2656.568,-104.039,874.679,8689.849,144.294,91.670,8.054
NCGC00260687-01,3.229000e+00,12.543,2.267,2.700,10.251,7.381,0.587,6.455,5.857,0.810,...,391.790,1815.417,39.578,-105.234,-9.967,146.565,389.732,23.879,28.201,2.954


**Scale data**

In [4]:
X_train = minmax_scale_features(X_train_raw)
print(X_train.min())
print(X_train.max())
print(X_train)

AW         0.0
AWeight    0.0
Arto       0.0
BertzCT    0.0
Chi0       0.0
          ... 
WPSA1      0.0
WPSA2      0.0
WPSA3      0.0
grav       0.0
rygr       0.0
Length: 801, dtype: float64
AW         1.0
AWeight    1.0
Arto       1.0
BertzCT    1.0
Chi0       1.0
          ... 
WPSA1      1.0
WPSA2      1.0
WPSA3      1.0
grav       1.0
rygr       1.0
Length: 801, dtype: float64
                           AW   AWeight      Arto   BertzCT      Chi0  \
Unnamed: 0                                                              
NCGC00178831-03  5.436720e-01  0.012041  0.815898  0.832518  0.244068   
NCGC00166114-03  1.268818e-01  0.076781  0.774278  0.816115  0.222113   
NCGC00263563-01  3.076931e-02  0.012270  0.807649  0.836259  0.495232   
NCGC00013058-02  7.168569e-01  0.010464  0.760780  0.886043  0.539479   
NCGC00167516-01  7.989701e-02  0.011206  0.796400  0.941583  0.742331   
...                       ...       ...       ...       ...       ...   
NCGC00261292-01  1.428572e-01 

**Save data**

In [5]:
X_train.to_csv("../data/X_train.csv")

# Training Targets

**Load data**

In [6]:
path_targets_train = "../raw_data/tox21_labels_train.csv.gz"  
y_train = pd.read_csv(path_targets_train).set_index("Unnamed: 0")
y_train

Unnamed: 0_level_0,NR.AhR,NR.AR,NR.AR.LBD,NR.Aromatase,NR.ER,NR.ER.LBD,NR.PPAR.gamma,SR.ARE,SR.ATAD5,SR.HSE,SR.MMP,SR.p53
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
NCGC00178831-03,,,,,,,,,,0.0,,
NCGC00166114-03,,,,,,,,,,0.0,,
NCGC00263563-01,,,,,,,,,,0.0,,
NCGC00013058-02,,,,,,,,,,1.0,,
NCGC00167516-01,,0.0,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
NCGC00261292-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NCGC00261245-01,0.0,0.0,0.0,,,0.0,0.0,,,,,
NCGC00260828-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NCGC00260687-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


**Save data**

In [7]:
y_train.to_csv("../data/y_train.csv")

# Testing Features

**Load data**

In [8]:
path_features_test = "../raw_data/tox21_dense_test.csv.gz"
X_test_raw = pd.read_csv(path_features_test).set_index("Unnamed: 0")
X_test_raw

Unnamed: 0_level_0,AW,AWeight,Arto,BertzCT,Chi0,Chi1,Chi10,Chi2,Chi3,Chi3c,...,W3D,W3DH,WNSA1,WNSA2,WNSA3,WPSA1,WPSA2,WPSA3,grav,rygr
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NCGC00261900-01,2.612482e+07,12.688,2.226,3.226,37.329,25.440,3.663,24.200,20.222,4.565,...,9687.312,42351.907,194.444,-2518.829,-83.110,772.051,10001.075,131.633,145.967,5.499
NCGC00260869-01,8.333337e+06,17.500,2.167,2.923,16.353,10.872,1.193,11.116,9.279,2.693,...,1256.410,2621.885,104.011,-475.829,-33.456,219.411,1003.763,76.703,76.043,3.728
NCGC00261776-01,4.074000e+00,12.464,2.364,3.043,14.681,10.826,2.149,9.980,9.469,1.342,...,1072.430,3152.648,93.486,-341.628,-21.327,174.791,638.757,32.885,45.933,3.657
NCGC00261380-01,8.000005e+06,13.827,2.080,2.845,16.778,11.720,0.777,10.139,8.207,1.251,...,1408.177,4596.402,127.215,-519.799,-27.729,199.061,813.323,35.712,58.214,3.659
NCGC00261842-01,4.838000e+00,14.509,2.087,2.880,16.872,10.920,0.413,10.035,7.719,2.090,...,1217.075,4343.460,134.802,-816.522,-55.496,192.858,1168.142,33.190,57.065,3.635
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NCGC00357168-01,2.000000e+00,16.820,1.600,1.157,4.121,2.414,0.000,1.354,0.707,0.000,...,25.161,207.065,25.378,-34.751,-8.135,45.801,62.715,7.705,6.893,1.868
NCGC00357283-01,3.714000e+00,13.208,2.000,2.134,11.096,7.296,0.072,5.671,4.091,0.612,...,406.748,2632.001,51.600,-186.503,-25.487,151.500,547.566,24.878,29.311,2.976
NCGC00357210-01,1.905000e+00,16.017,2.000,2.295,5.276,3.305,0.000,2.885,2.290,0.471,...,49.621,146.562,34.623,-39.767,-6.189,38.729,44.481,7.121,12.809,1.779
NCGC00357118-01,4.186000e+00,15.674,2.190,2.851,15.088,9.935,0.366,9.720,8.086,2.262,...,902.831,2855.278,112.538,-443.321,-28.525,142.805,562.539,28.106,55.845,3.282


**Scale data**

In [9]:
X_test = minmax_scale_features(X_test_raw)
print(X_test.min())
print(X_test.max())
print(X_test)

AW         0.0
AWeight    0.0
Arto       0.0
BertzCT    0.0
Chi0       0.0
          ... 
WPSA1      0.0
WPSA2      0.0
WPSA3      0.0
grav       0.0
rygr       0.0
Length: 801, dtype: float64
AW         1.0
AWeight    1.0
Arto       1.0
BertzCT    1.0
Chi0       1.0
          ... 
WPSA1      1.0
WPSA2      1.0
WPSA3      1.0
grav       1.0
rygr       1.0
Length: 801, dtype: float64
                           AW   AWeight      Arto   BertzCT      Chi0  \
Unnamed: 0                                                              
NCGC00261900-01  3.014403e-01  0.019417  0.876550  0.898342  0.749894   
NCGC00260869-01  9.615388e-02  0.157427  0.844744  0.805283  0.304657   
NCGC00261776-01  3.546923e-08  0.012992  0.950943  0.842138  0.269167   
NCGC00261380-01  9.230774e-02  0.052084  0.797844  0.781327  0.313678   
NCGC00261842-01  4.428462e-08  0.071644  0.801617  0.792076  0.315673   
...                       ...       ...       ...       ...       ...   
NCGC00357168-01  1.153846e-08 

**Save data**

In [10]:
X_test.to_csv("../data/X_test.csv")

# Testing Targets

**Load data**

In [11]:
path_targets_test = "../raw_data/tox21_labels_test.csv.gz"  
y_test = pd.read_csv(path_targets_test).set_index("Unnamed: 0")
y_test

Unnamed: 0_level_0,NR.AhR,NR.AR,NR.AR.LBD,NR.Aromatase,NR.ER,NR.ER.LBD,NR.PPAR.gamma,SR.ARE,SR.ATAD5,SR.HSE,SR.MMP,SR.p53
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
NCGC00261900-01,0.0,1.0,,0.0,0.0,0.0,0.0,,0.0,0.0,,0.0
NCGC00260869-01,0.0,1.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NCGC00261776-01,1.0,1.0,0.0,,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0
NCGC00261380-01,,0.0,,1.0,0.0,,,1.0,0.0,,0.0,
NCGC00261842-01,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
NCGC00357168-01,0.0,0.0,0.0,0.0,,0.0,0.0,1.0,0.0,0.0,0.0,0.0
NCGC00357283-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NCGC00357210-01,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,,0.0
NCGC00357118-01,0.0,0.0,0.0,,,0.0,,1.0,0.0,,,1.0


In [12]:
y_test.to_csv("../data/y_test.csv")

# Load processed data (double check)

**Training Features**

In [13]:
path_x_train = "../data/X_train.csv"
X_train = pd.read_csv(path_x_train).set_index("Unnamed: 0")
X_train

Unnamed: 0_level_0,AW,AWeight,Arto,BertzCT,Chi0,Chi1,Chi10,Chi2,Chi3,Chi3c,...,W3D,W3DH,WNSA1,WNSA2,WNSA3,WPSA1,WPSA2,WPSA3,grav,rygr
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NCGC00178831-03,5.436720e-01,0.012041,0.815898,0.832518,0.244068,0.251849,0.122744,0.255645,0.247884,0.131926,...,0.028705,0.025992,0.084419,0.992430,0.967627,0.086494,0.022640,0.042745,0.000218,0.231025
NCGC00166114-03,1.268818e-01,0.076781,0.774278,0.816115,0.222113,0.217725,0.158927,0.222859,0.235265,0.126033,...,0.023329,0.009094,0.142494,0.991487,0.972305,0.070178,0.012238,0.059568,0.000336,0.258584
NCGC00263563-01,3.076931e-02,0.012270,0.807649,0.836259,0.495232,0.475479,0.312274,0.508771,0.503347,0.393865,...,0.147491,0.215437,0.174151,0.963958,0.913740,0.259019,0.156472,0.097238,0.000542,0.320086
NCGC00013058-02,7.168569e-01,0.010464,0.760780,0.886043,0.539479,0.508602,0.148179,0.491618,0.425273,0.263047,...,0.147532,0.142056,0.165596,0.976968,0.949863,0.227592,0.092400,0.090178,0.000596,0.291745
NCGC00167516-01,7.989701e-02,0.011206,0.796400,0.941583,0.742331,0.736470,0.295701,0.712026,0.641167,0.352710,...,0.461955,0.460399,0.622595,0.825232,0.702753,0.536326,0.439447,0.260218,0.000796,0.500554
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NCGC00261292-01,1.428572e-01,0.020621,0.749906,0.669640,0.097777,0.100133,0.012882,0.092409,0.088271,0.038183,...,0.003261,0.004034,0.055917,0.998427,0.988861,0.030003,0.002463,0.013849,0.000069,0.157600
NCGC00261245-01,1.193182e-01,0.016474,0.772778,0.753669,0.223264,0.244135,0.098540,0.214848,0.208197,0.054503,...,0.027008,0.034580,0.069424,0.995076,0.981965,0.096647,0.020009,0.035527,0.000232,0.228288
NCGC00260828-01,9.818000e-08,0.007195,0.766779,0.813525,0.351043,0.324683,0.066131,0.333114,0.291330,0.238414,...,0.097991,0.123973,0.195661,0.978034,0.915763,0.260806,0.085466,0.103675,0.000227,0.514170
NCGC00260687-01,2.229000e-08,0.008401,0.850019,0.690360,0.108253,0.117148,0.048162,0.109089,0.115300,0.040802,...,0.004174,0.005102,0.028960,0.999130,0.991930,0.043702,0.003833,0.017157,0.000067,0.181901


**Training Targets**

In [14]:
path_y_train = "../data/y_train.csv" 
y_train = pd.read_csv(path_y_train).set_index("Unnamed: 0")
y_train

Unnamed: 0_level_0,NR.AhR,NR.AR,NR.AR.LBD,NR.Aromatase,NR.ER,NR.ER.LBD,NR.PPAR.gamma,SR.ARE,SR.ATAD5,SR.HSE,SR.MMP,SR.p53
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
NCGC00178831-03,,,,,,,,,,0.0,,
NCGC00166114-03,,,,,,,,,,0.0,,
NCGC00263563-01,,,,,,,,,,0.0,,
NCGC00013058-02,,,,,,,,,,1.0,,
NCGC00167516-01,,0.0,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
NCGC00261292-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NCGC00261245-01,0.0,0.0,0.0,,,0.0,0.0,,,,,
NCGC00260828-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NCGC00260687-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


**Testing Features**

In [15]:
path_x_test = "../data/X_test.csv"
X_test = pd.read_csv(path_x_test).set_index("Unnamed: 0")
X_test

Unnamed: 0_level_0,AW,AWeight,Arto,BertzCT,Chi0,Chi1,Chi10,Chi2,Chi3,Chi3c,...,W3D,W3DH,WNSA1,WNSA2,WNSA3,WPSA1,WPSA2,WPSA3,grav,rygr
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NCGC00261900-01,3.014403e-01,0.019417,0.876550,0.898342,0.749894,0.836184,0.631552,0.718016,0.827313,0.404483,...,0.529858,1.000000,0.326114,0.839733,0.812187,0.969037,0.744098,0.438166,0.000621,0.894202
NCGC00260869-01,9.615388e-02,0.157427,0.844744,0.805283,0.304657,0.337758,0.205690,0.329812,0.379618,0.238614,...,0.068624,0.061862,0.171498,0.969833,0.925921,0.275393,0.074682,0.255321,0.000322,0.560806
NCGC00261776-01,3.546923e-08,0.012992,0.950943,0.842138,0.269167,0.336184,0.370517,0.296107,0.387391,0.118908,...,0.058559,0.074395,0.153503,0.978379,0.953703,0.219388,0.047525,0.109464,0.000194,0.547440
NCGC00261380-01,9.230774e-02,0.052084,0.797844,0.781327,0.313678,0.366772,0.133966,0.300825,0.335761,0.110845,...,0.076927,0.108486,0.211171,0.967033,0.939039,0.249851,0.060513,0.118874,0.000246,0.547816
NCGC00261842-01,4.428462e-08,0.071644,0.801617,0.792076,0.315673,0.339401,0.071207,0.297739,0.315796,0.185185,...,0.066472,0.102514,0.224142,0.948138,0.875438,0.242065,0.086912,0.110479,0.000241,0.543298
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NCGC00357168-01,1.153846e-08,0.137924,0.539084,0.262899,0.045020,0.048378,0.000000,0.040173,0.028924,0.000000,...,0.001266,0.004842,0.037057,0.997922,0.983920,0.057487,0.004666,0.025648,0.000027,0.210655
NCGC00357283-01,3.131538e-08,0.034330,0.754717,0.562961,0.193072,0.215410,0.012414,0.168259,0.167369,0.054226,...,0.022141,0.062101,0.081889,0.988258,0.944175,0.190155,0.040740,0.082811,0.000123,0.419239
NCGC00357210-01,1.044231e-08,0.114894,0.754717,0.612408,0.069536,0.078863,0.000000,0.085598,0.093687,0.041733,...,0.002604,0.003413,0.052863,0.997602,0.988378,0.048611,0.003309,0.023704,0.000052,0.193901
NCGC00357118-01,3.676154e-08,0.105056,0.857143,0.783170,0.277806,0.305700,0.063103,0.288393,0.330810,0.200425,...,0.049281,0.067373,0.186077,0.971903,0.937216,0.179241,0.041854,0.093556,0.000236,0.476845


**Test Targets**

In [16]:
path_y_test = "../data/y_test.csv" 
y_test = pd.read_csv(path_y_test).set_index("Unnamed: 0")
y_test

Unnamed: 0_level_0,NR.AhR,NR.AR,NR.AR.LBD,NR.Aromatase,NR.ER,NR.ER.LBD,NR.PPAR.gamma,SR.ARE,SR.ATAD5,SR.HSE,SR.MMP,SR.p53
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
NCGC00261900-01,0.0,1.0,,0.0,0.0,0.0,0.0,,0.0,0.0,,0.0
NCGC00260869-01,0.0,1.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NCGC00261776-01,1.0,1.0,0.0,,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0
NCGC00261380-01,,0.0,,1.0,0.0,,,1.0,0.0,,0.0,
NCGC00261842-01,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
NCGC00357168-01,0.0,0.0,0.0,0.0,,0.0,0.0,1.0,0.0,0.0,0.0,0.0
NCGC00357283-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NCGC00357210-01,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,,0.0
NCGC00357118-01,0.0,0.0,0.0,,,0.0,,1.0,0.0,,,1.0
