# Parse Raw Data: LCIA QSAR Project
**Author:** Jacob Kvasnicka <br>
**Date:** February 24, 2023

This module should theoretically be ran once and then any feature engineering can be done separately.

In [1]:
import pandas as pd
import numpy as np
from os import path 
import parse
from configuration import LciaQsarConfiguration
# Enable modules to be imported from the parent directory.
import sys
sys.path.append('..')
from common import (
    comptox, 
    opera,
    features)

pd.set_option('display.max_columns', None)  

index_col = 'DTXSID'

config = LciaQsarConfiguration(
    'Input/path-configuration.json', 
    'Input/model-configuration.json'
)

## Chemical identifiers from CompTox

In [2]:
chem_identifiers = pd.read_csv(
    config.comptox_identifiers_file, 
    index_col=index_col
)

chem_identifiers

Unnamed: 0_level_0,INPUT,FOUND_BY,PREFERRED_NAME,DTXCID,CASRN,INCHIKEY,IUPAC_NAME,SMILES,INCHI_STRING,QSAR_READY_SMILES,MOLECULAR_FORMULA
DTXSID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
DTXSID5020281,DTXSID5020281,DSSTox_Substance_Id,1-Chloro-4-nitrobenzene,DTXCID10281,100-00-5,CZGCEKJOLUNIFY-UHFFFAOYSA-N,1-Chloro-4-nitrobenzene,[O-][N+](=O)C1=CC=C(Cl)C=C1,InChI=1S/C6H4ClNO2/c7-5-1-3-6(4-2-5)8(9)10/h1-...,[O-][N+](=O)C1=CC=C(Cl)C=C1,C6H4ClNO2
DTXSID8020961,DTXSID8020961,DSSTox_Substance_Id,4-Nitrobenzenamine,DTXCID40961,100-01-6,TYMLOMAKGOJONV-UHFFFAOYSA-N,4-Nitroaniline,NC1=CC=C(C=C1)[N+]([O-])=O,InChI=1S/C6H6N2O2/c7-5-1-3-6(4-2-5)8(9)10/h1-4...,NC1=CC=C(C=C1)[N+]([O-])=O,C6H6N2O2
DTXSID0021834,DTXSID0021834,DSSTox_Substance_Id,4-Nitrophenol,DTXCID201834,100-02-7,BTJIUGUIPKRLHP-UHFFFAOYSA-N,4-Nitrophenol,OC1=CC=C(C=C1)[N+]([O-])=O,"InChI=1S/C6H5NO3/c8-6-3-1-5(2-4-6)7(9)10/h1-4,...",OC1=CC=C(C=C1)[N+]([O-])=O,C6H5NO3
DTXSID3032622,DTXSID3032622,DSSTox_Substance_Id,Hymexazol,DTXCID1012622,10004-44-1,KGVPNLBXJKTABS-UHFFFAOYSA-N,"5-Methyl-1,2-oxazol-3(2H)-one",CC1=CC(=O)NO1,"InChI=1S/C4H5NO2/c1-3-2-4(6)5-7-3/h2H,1H3,(H,5...",CC1=CC(=O)NO1,C4H5NO2
DTXSID2044347,DTXSID2044347,DSSTox_Substance_Id,4'-Methoxyacetophenone,DTXCID0024347,100-06-1,NTPLXRHDUXRPNE-UHFFFAOYSA-N,1-(4-Methoxyphenyl)ethan-1-one,COC1=CC=C(C=C1)C(C)=O,InChI=1S/C9H10O2/c1-7(10)8-3-5-9(11-2)6-4-8/h3...,COC1=CC=C(C=C1)C(C)=O,C9H10O2
...,...,...,...,...,...,...,...,...,...,...,...
DTXSID5057882,DTXSID5057882,DSSTox_Substance_Id,CP-939689,DTXCID2031669,NOCAS_57882,PYUQQPZQUSFCHK-UHFFFAOYSA-N,"1-[2-(3,4-Dichlorophenoxy)-5-fluorophenyl]etha...",CC(N)C1=CC(F)=CC=C1OC1=CC=C(Cl)C(Cl)=C1,InChI=1/C14H12Cl2FNO/c1-8(18)11-6-9(17)2-5-14(...,CC(N)C1=CC(F)=CC=C1OC1=CC=C(Cl)C(Cl)=C1,C14H12Cl2FNO
DTXSID5057884,DTXSID5057884,DSSTox_Substance_Id,CJ-013974,DTXCID6031671,NOCAS_57884,LMPBXMBUTBQPJJ-QFBILLFUSA-N,"2-(3,4-Dichlorophenyl)-N-hydroxy-N-{(1S)-2-[(3...",O[C@H]1CCN(C[C@@H](N(O)C(=O)CC2=CC(Cl)=C(Cl)C=...,InChI=1S/C20H22Cl2N2O3/c21-17-7-6-14(10-18(17)...,O[C@H]1CCN(C[C@@H](N(O)C(=O)CC2=CC(Cl)=C(Cl)C=...,C20H22Cl2N2O3
DTXSID0057885,DTXSID0057885,DSSTox_Substance_Id,CP-395919,DTXCID1031672,NOCAS_57885,DJLMIXIBPMWLNC-UHFFFAOYSA-N,"N-[2-(2-Acetamidoethyl)-1,2,3,4-tetrahydroisoq...",CC(=O)NCCN1CCC2=CC(NC(=O)C3=CC=CC=C3C3=CC=C(C=...,InChI=1S/C27H26F3N3O2/c1-18(34)31-13-15-33-14-...,CC(=O)NCCN1CCC2=CC(NC(=O)C3=CC=CC=C3C3=CC=C(C=...,C27H26F3N3O2
DTXSID1057905,DTXSID1057905,DSSTox_Substance_Id,MK 0493,,1021945-00-5,,,,,,


### Define chemicals to exclude from QSAR

In [3]:
chemicals_to_exclude = (
    comptox.chemicals_to_exclude_from_qsar(
        chem_identifiers['QSAR_READY_SMILES']))

print(len(chemicals_to_exclude), 'chemicals to exclude from QSAR modeling')

1847 chemicals to exclude from QSAR modeling


## Target variable: Surrogate POD [mg/(kg-d)]

In [4]:
sheet_name = 'ORAL'
tox_metric = 'POD [mg/kg-d]'

# Map original keys to preferred keys for the return.
effect_mapper = {
    'non-reproductive/developmental effects' : 'general',
    'reproductive/developmental effects' : 'repro_dev'
}

surrogate_pods = parse.surrogate_toxicity_values_from_excel(
    config.raw_surrogate_pods_file, 
    sheet_name,
    tox_metric, 
    index_col.lower(), 
    log10=True,
    chemicals_to_exclude=chemicals_to_exclude, 
    effect_mapper=effect_mapper,
    write_path=config.surrogate_pods_file
)

surrogate_pods

Unnamed: 0_level_0,general,repro_dev
DTXSID,Unnamed: 1_level_1,Unnamed: 2_level_1
DTXSID001005033,,2.124155
DTXSID001006300,1.706121,
DTXSID001014636,,2.411296
DTXSID0020076,0.768667,0.435383
DTXSID0020107,,2.452717
...,...,...
DTXSID90894036,,2.013356
DTXSID90904982,,2.236554
DTXSID90916166,1.610567,
DTXSID90946069,,2.168963


## Regulatory PODs (fully adjusted to human equivalent dose) [mg(kg-d)]

See the tab “Data for Figure 5” – the CASRN are in columns A (general non-cancer) and G (repro/dev effects), and the regulatory PODs (fully adjusted to human equivalent dose) are in columns F and L.

In [5]:
%%time

# Define the integer locations of the relevant columns.
ilocs_for_effect = {
    'general' : [0, 5],
    'repro_dev' : [6, 11]
}

# Map CASRN to index_col for replacing the original index.
chem_id_for_casrn = (
    chem_identifiers
    .reset_index()
    .set_index('CASRN')[index_col]
    .to_dict()
)

reg_pods = parse.regulatory_toxicity_values_from_csv(
    config.raw_regulatory_pods_file, 
    ilocs_for_effect, 
    chem_id_for_casrn=chem_id_for_casrn, 
    new_chem_id=index_col, 
    write_path=config.regulatory_pods_file
)

reg_pods

CPU times: total: 46.9 ms
Wall time: 35.9 ms


Unnamed: 0_level_0,general,repro_dev
DTXSID,Unnamed: 1_level_1,Unnamed: 2_level_1
DTXSID5020281,-0.891,
DTXSID8020961,0.286,
DTXSID6026080,2.193,
DTXSID0021836,-0.534,
DTXSID3020596,2.028,
...,...,...
DTXSID9021762,,2.682
DTXSID0039229,,1.438
DTXSID5021386,,0.191
DTXSID3020207,,-0.763


## Oral equivalent doses for active ToxCast assays [mg/(kg-d)]

Data prepared by En-Hsuan Lu on May 28, 2023.

In [6]:
%%time

oed_columns = [
    'tox_httk.50',
    'tox_httk.95'
]

oeds = parse.toxcast_expocast_from_csv(
    config.raw_toxcast_oeds_file, 
    index_col, 
    data_columns=oed_columns,
    log10=True,
    write_path=config.toxcast_oeds_file
)
    
oeds

CPU times: total: 15.6 ms
Wall time: 17.9 ms


Unnamed: 0_level_0,tox_httk_50,tox_httk_95
DTXSID,Unnamed: 1_level_1,Unnamed: 2_level_1
DTXSID9034650,-4.465181,-5.456664
DTXSID6024177,-0.965089,-1.341372
DTXSID0020606,0.694738,0.187224
DTXSID7032555,-1.380711,-2.126687
DTXSID5034270,-0.389368,-0.780627
...,...,...
DTXSID3020964,1.435501,0.856183
DTXSID0034930,-1.244328,-1.659792
DTXSID4032615,-1.797070,-2.601392
DTXSID5021831,-0.871298,-1.521485


## Experimental LD50 values

In [7]:
%%time 

ld50_columns = [
    'median_LD50'
]

# Apply inverse-log10 transformation to get the original scale.
ld50s = parse.experimental_ld50s_from_excel(
    config.raw_ld50_experimental_file, 
    chem_identifiers, 
    index_col, 
    ld50_columns=ld50_columns, 
    write_path=config.ld50_experimental_file
)

ld50s

CPU times: total: 875 ms
Wall time: 886 ms


Unnamed: 0_level_0,median_LD50
DTXSID,Unnamed: 1_level_1
DTXSID5020281,138.120754
DTXSID8020961,126.969502
DTXSID0021834,69.543651
DTXSID2044347,420.473799
DTXSID9059204,518.581177
...,...
DTXSID60469235,73.338453
DTXSID2033447,134.034040
DTXSID3041794,366.640543
DTXSID6052667,345.720785


## CompTox features: OPERA + TEST predictions

In [8]:
%%time

columns_to_exclude = [
    'INPUT', 
    'FOUND_BY',
    'PREFERRED_NAME',
    'MONOISOTOPIC_MASS',
    'OPERA_PKAA_OPERA_PRED',
    'OPERA_PKAB_OPERA_PRED'
]
    
comptox_features = comptox.opera_test_predictions_from_csv(
    config.raw_comptox_features_file, 
    index_col, 
    chemicals_to_exclude=chemicals_to_exclude,
    columns_to_exclude=columns_to_exclude,
    log10_pat='LOG', 
    write_path=config.file_for_features_source['comptox']
)

comptox_features

CPU times: total: 141 ms
Wall time: 142 ms


Unnamed: 0_level_0,AVERAGE_MASS,48HR_DAPHNIA_LC50_MOL/L_TEST_PRED,DENSITY_G/CM^3_TEST_PRED,DEVTOX_TEST_PRED,96HR_FATHEAD_MINNOW_MOL/L_TEST_PRED,FLASH_POINT_DEGC_TEST_PRED,AMES_MUTAGENICITY_TEST_PRED,ORAL_RAT_LD50_MOL/KG_TEST_PRED,SURFACE_TENSION_DYN/CM_TEST_PRED,THERMAL_CONDUCTIVITY_MW/(M*K)_TEST_PRED,TETRAHYMENA_PYRIFORMIS_IGC50_MOL/L_TEST_PRED,VISCOSITY_CP_CP_TEST_PRED,ATMOSPHERIC_HYDROXYLATION_RATE_(AOH)_CM3/MOLECULE*SEC_OPERA_PRED,BIOCONCENTRATION_FACTOR_OPERA_PRED,BIODEGRADATION_HALF_LIFE_DAYS_DAYS_OPERA_PRED,BOILING_POINT_DEGC_OPERA_PRED,HENRYS_LAW_ATM-M3/MOLE_OPERA_PRED,OPERA_KM_DAYS_OPERA_PRED,OCTANOL_AIR_PARTITION_COEFF_KOA_OPERA_PRED,SOIL_ADSORPTION_COEFFICIENT_KOC_L/KG_OPERA_PRED,OCTANOL_WATER_PARTITION_P_OPERA_PRED,MELTING_POINT_DEGC_OPERA_PRED,VAPOR_PRESSURE_MMHG_OPERA_PRED,WATER_SOLUBILITY_MOL/L_OPERA_PRED
DTXSID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
DTXSID5020281,157.550,1.081430e-04,1.372,0.323,5.714790e-05,109.227,0.966,0.007079,,137.950,0.000179,2.46604,7.429490e-13,20.11010,4.84081,242.050,4.947790e-06,0.332365,2.124320e+04,507.6930,246.342872,82.9942,2.203470e-02,1.317610e-03
DTXSID8020961,138.126,6.486340e-05,1.309,0.250,4.385310e-04,141.229,0.642,0.009141,,162.043,0.000520,4.89779,1.480110e-12,3.47311,6.64833,331.672,1.265030e-09,0.306715,3.734737e+06,75.9962,24.436556,145.5400,3.265100e-06,4.815930e-03
DTXSID0021834,139.110,5.395110e-05,1.376,0.281,1.472310e-04,117.588,0.490,0.005140,,152.108,0.000184,7.97995,1.155450e-12,11.35610,4.09570,278.910,4.207650e-10,0.251436,1.297807e+06,233.1200,81.678923,113.5640,9.970590e-05,9.679290e-02
DTXSID3032622,99.089,,1.276,0.639,,36.844,0.372,0.004457,,155.108,,,2.727420e-11,1.46093,4.27009,215.029,9.095360e-08,0.288388,5.393492e+04,10.0605,2.852542,86.5110,1.396390e-03,8.066960e-01
DTXSID2044347,150.177,1.312200e-04,1.072,0.621,2.904020e-04,98.358,0.305,0.004887,36.672,139.705,0.000793,2.58821,7.147800e-12,7.55058,4.91679,248.981,9.683370e-08,0.279812,1.183477e+05,98.5113,55.324819,37.4772,6.500820e-03,1.201570e-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DTXSID5057880,353.860,7.144960e-06,1.256,0.846,1.023290e-06,261.844,0.204,,,,,,1.382820e-11,96.57770,3.35362,415.812,2.941970e-07,1.655430,4.403520e+10,961.6230,892.585855,125.4470,6.420140e-08,5.643740e-07
DTXSID5057882,300.150,2.065380e-07,1.363,0.679,7.211070e-07,187.442,0.313,,,138.484,0.000009,,1.594230e-11,225.96200,3.54031,337.893,2.333810e-07,3.627750,4.228633e+10,8156.8700,7006.481829,161.2890,2.468280e-07,1.982760e-06
DTXSID5057884,409.310,,1.434,0.933,,297.064,0.200,,,,,,2.432130e-11,82.99380,6.80176,375.225,1.494930e-10,0.148214,3.513581e+09,2186.0300,808.946872,181.9450,1.335850e-09,3.015700e-05
DTXSID0057885,481.519,8.035260e-06,1.289,1.027,1.009250e-07,324.706,0.049,,,,,,2.491050e-11,19.04870,5.27344,346.531,6.479200e-10,1.686170,4.748320e+09,161483.0000,309592.192625,168.4210,2.338530e-09,3.245060e-08


## OPERA 2.9 features

### Training chemicals

In [9]:
%%time

# TODO: Move to input_config
opera_file_namer = lambda name: 'OPERA2.9_' + name + '.csv'
opera_log10_pat = 'Log'

AD_flags_train, opera_features_train = opera.parse_data_with_applicability_domains(
    config.raw_opera_features_dir, 
    config.opera_mapper_file, 
    opera_file_namer, 
    index_name=index_col, 
    discrete_columns=config.discrete_columns_for_source['opera'],
    discrete_suffix=config.discrete_column_suffix,
    log10_pat=opera_log10_pat
)

opera_features_train

CPU times: total: 1.95 s
Wall time: 2.21 s


Unnamed: 0_level_0,CERAPP_Ago_pred_discrete,CERAPP_Anta_pred_discrete,CERAPP_Bind_pred_discrete,CoMPARA_Ago_pred_discrete,CoMPARA_Anta_pred_discrete,CoMPARA_Bind_pred_discrete,CATMoS_LD50_pred,FUB_pred,Clint_pred,CACO2_pred,OH_pred,BCF_pred,BioDeg_HalfLife_pred,ReadyBiodeg_pred_discrete,HL_pred,KM_pred,KOA_pred,Koc_pred,P_pred,MP_pred,MolWeight,nbAtoms_discrete,nbHeavyAtoms_discrete,nbC_discrete,nbO_discrete,nbN_discrete,nbAromAtom_discrete,nbRing_discrete,nbHeteroRing_discrete,Sp3Sp2HybRatio,nbRotBd_discrete,nbHBdAcc_discrete,ndHBdDon_discrete,nbLipinskiFailures_discrete,TopoPolSurfAir,MolarRefract,CombDipolPolariz,VP_pred,WS_pred
DTXSID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
DTXSID5020281,0.0,0.0,0.0,0.0,0.0,0.0,236.0,0.20,399.37,,7.413102e-13,19.952623,,0.0,4.897788e-06,0.331131,2.137962e+04,512.861384,245.470892,83.0,156.993056,14,10,6,2,1,6,1,0,0.000000,1,0,0,0,43.14,9.5056,0.900,2.187762e-02,1.318257e-03
DTXSID8020961,0.0,0.0,0.0,0.0,0.0,0.0,787.0,0.33,23.59,,1.479108e-12,3.467369,,0.0,1.258925e-09,0.309030,3.715352e+06,75.857758,24.547089,146.0,138.042927,16,10,6,2,2,6,1,0,0.000000,1,1,1,0,69.16,8.1121,1.201,3.311311e-06,4.786301e-03
DTXSID0021834,0.0,0.0,0.0,0.0,0.0,0.0,228.0,0.15,25.94,,1.148154e-12,11.481536,,0.0,4.265795e-10,0.251189,1.288250e+06,234.422882,81.283052,113.0,139.026943,15,10,6,3,1,6,1,0,0.000000,1,0,1,0,63.37,6.4546,1.065,1.000000e-04,9.772372e-02
DTXSID3032622,0.0,0.0,0.0,0.0,0.0,0.0,2446.0,0.93,4.35,,2.754229e-11,1.445440,,1.0,1.380384e-09,0.288403,5.370318e+04,10.000000,2.818383,87.0,99.032028,12,7,4,2,1,0,1,1,0.250000,0,1,1,0,38.33,24.3643,0.909,1.380384e-03,8.128305e-01
DTXSID2044347,0.0,0.0,0.0,0.0,0.0,0.0,2606.0,0.15,0.00,-4.92,7.079458e-12,7.585776,4.897788,1.0,6.456542e-07,0.281838,1.174898e+05,97.723722,56.234133,18.0,150.068080,21,11,9,2,0,6,1,0,0.222222,2,1,0,0,26.30,18.0862,1.185,6.606934e-03,1.412538e-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DTXSID6057879,0.0,0.0,0.0,0.0,1.0,1.0,2410.0,0.17,25.37,-4.62,,20.417379,,0.0,1.621810e-08,1.148154,3.311311e+10,891.250938,5248.074602,213.0,345.139865,47,24,19,3,1,12,3,0,0.368421,5,3,1,0,63.78,45.2203,2.828,7.079458e-09,7.943282e-07
DTXSID5057880,0.0,0.0,0.0,0.0,1.0,1.0,2659.0,0.14,20.48,-4.67,,95.499259,,0.0,1.230269e-08,1.659587,4.365158e+10,954.992586,891.250938,132.0,353.085242,43,23,17,3,1,12,2,0,0.294118,5,3,1,0,63.78,46.2861,2.811,5.754399e-10,1.148154e-06
DTXSID5057882,0.0,0.0,0.0,0.0,1.0,1.0,1138.0,0.08,12.07,-5.33,1.584893e-11,223.872114,,0.0,3.467369e-10,3.630781,4.265795e+10,8128.305162,13182.567386,87.0,299.027998,31,19,14,1,1,12,2,0,0.142857,3,1,1,0,35.25,27.2415,1.587,9.120108e-05,1.584893e-06
DTXSID5057884,0.0,0.0,0.0,0.0,1.0,1.0,1296.0,0.03,5.93,-5.26,,83.176377,,0.0,1.548817e-09,0.147911,,2187.761624,354.813389,229.0,408.100748,49,27,20,3,2,12,3,1,0.350000,7,3,2,0,64.01,55.0931,2.670,2.754229e-08,4.265795e-05


### Application chemicals

In [10]:
%%time 

# Unsure where these data belong.
structures_file_name = "chemical-identifiers.smi"
log_file_name = "log-batch-run.txt"

AD_flags_app, opera_features_app = opera.process_all_batches(
    config.opera_application_batches_dir, 
    config.opera_mapper_file,
    opera_file_namer,
    structures_file_name, 
    log_file_name, 
    index_name=index_col, 
    discrete_columns=config.discrete_columns_for_source['opera'],
    discrete_suffix=config.discrete_column_suffix,
    log10_pat=opera_log10_pat
)

CPU times: total: 1min 7s
Wall time: 1min 35s


In [22]:
all_chem_ids_file = 'Input/Raw/OPERA/Input/Application/chemical-identifiers.smi'    
all_chem_ids = opera.extract_dtxsid_from_structures_file(all_chem_ids_file, index_col)

f'{round(len(opera_features_app)/len(all_chem_ids)*100)}% ({len(opera_features_app)}) of all chemicals processed'

'93% (446602) of all chemicals processed'

### Merge all chemicals

In [13]:
# Drop duplicates. 
chem_intersection = list(
    opera_features_train.index.intersection(opera_features_app.index))
AD_flags_app = AD_flags_app.drop(chem_intersection)
opera_features_app = opera_features_app.drop(chem_intersection)

In [14]:
data_write_path=config.file_for_features_source['opera']
flags_write_path=config.opera_AD_file

pd.concat([opera_features_train, opera_features_app]).to_csv(data_write_path)
pd.concat([AD_flags_train, AD_flags_app]).to_csv(flags_write_path)

In [14]:
# Discrepancy.
discrepancies = pd.read_csv('OPERA-discrepancies.csv', index_col=0, header=[0, 1])

discrepancies

Unnamed: 0_level_0,CERAPP_Ago_pred,CERAPP_Ago_pred,AD_CERAPP_Ago,AD_CERAPP_Ago,AD_index_CERAPP_Ago,AD_index_CERAPP_Ago,Conf_index_CERAPP_Ago,Conf_index_CERAPP_Ago,CERAPP_Anta_pred,CERAPP_Anta_pred,AD_CERAPP_Anta,AD_CERAPP_Anta,AD_index_CERAPP_Anta,AD_index_CERAPP_Anta,Conf_index_CERAPP_Anta,Conf_index_CERAPP_Anta,CERAPP_Bind_pred,CERAPP_Bind_pred,AD_CERAPP_Bind,AD_CERAPP_Bind,AD_index_CERAPP_Bind,AD_index_CERAPP_Bind,Conf_index_CERAPP_Bind,Conf_index_CERAPP_Bind,CoMPARA_Ago_pred,CoMPARA_Ago_pred,AD_CoMPARA_Ago,AD_CoMPARA_Ago,AD_index_CoMPARA_Ago,AD_index_CoMPARA_Ago,Conf_index_CoMPARA_Ago,Conf_index_CoMPARA_Ago,CoMPARA_Anta_pred,CoMPARA_Anta_pred,AD_CoMPARA_Anta,AD_CoMPARA_Anta,AD_index_CoMPARA_Anta,AD_index_CoMPARA_Anta,Conf_index_CoMPARA_Anta,Conf_index_CoMPARA_Anta,CoMPARA_Bind_pred,CoMPARA_Bind_pred,AD_index_CoMPARA_Bind,AD_index_CoMPARA_Bind,Conf_index_CoMPARA_Bind,Conf_index_CoMPARA_Bind,CATMoS_EPA_pred,CATMoS_EPA_pred,CATMoS_GHS_pred,CATMoS_GHS_pred,CATMoS_LD50_pred,CATMoS_LD50_pred,CATMoS_LD50_predRange,CATMoS_LD50_predRange,AD_CATMoS,AD_CATMoS,AD_index_CATMoS,AD_index_CATMoS,Conf_index_CATMoS,Conf_index_CATMoS,FUB_pred,FUB_pred,FUB_predRange,FUB_predRange,AD_FUB,AD_FUB,AD_index_FUB,AD_index_FUB,Conf_index_FUB,Conf_index_FUB,Clint_pred,Clint_pred,Clint_predRange,Clint_predRange,AD_Clint,AD_Clint,AD_index_Clint,AD_index_Clint,Conf_index_Clint,Conf_index_Clint,CACO2_pred,CACO2_pred,CACO2_predRange,CACO2_predRange,AD_CACO2,AD_CACO2,AD_index_CACO2,AD_index_CACO2,Conf_index_CACO2,Conf_index_CACO2,LogOH_pred,LogOH_pred,LogOH_predRange,LogOH_predRange,AD_AOH,AD_AOH,AD_index_AOH,AD_index_AOH,Conf_index_AOH,Conf_index_AOH,LogBCF_pred,LogBCF_pred,BCF_predRange,BCF_predRange,AD_BCF,AD_BCF,AD_index_BCF,AD_index_BCF,Conf_index_BCF,Conf_index_BCF,BioDeg_LogHalfLife_pred,BioDeg_LogHalfLife_pred,BioDeg_predRange,BioDeg_predRange,AD_BioDeg,AD_BioDeg,AD_index_BioDeg,AD_index_BioDeg,Conf_index_BioDeg,Conf_index_BioDeg,ReadyBiodeg_pred,ReadyBiodeg_pred,AD_ReadyBiodeg,AD_ReadyBiodeg,AD_index_ReadyBiodeg,AD_index_ReadyBiodeg,Conf_index_ReadyBiodeg,Conf_index_ReadyBiodeg,LogHL_pred,LogHL_pred,HL_predRange,HL_predRange,AD_HL,AD_HL,AD_index_HL,AD_index_HL,Conf_index_HL,Conf_index_HL,LogKM_pred,LogKM_pred,KM_predRange,KM_predRange,AD_KM,AD_KM,AD_index_KM,AD_index_KM,Conf_index_KM,Conf_index_KM,LogKOA_pred,LogKOA_pred,KOA_predRange,KOA_predRange,AD_index_KOA,AD_index_KOA,Conf_index_KOA,Conf_index_KOA,LogKoc_pred,LogKoc_pred,Koc_predRange,Koc_predRange,AD_Koc,AD_Koc,AD_index_Koc,AD_index_Koc,Conf_index_Koc,Conf_index_Koc,LogP_pred,LogP_pred,LogP_predRange,LogP_predRange,AD_LogP,AD_LogP,AD_index_LogP,AD_index_LogP,Conf_index_LogP,Conf_index_LogP,MP_pred,MP_pred,MP_predRange,MP_predRange,AD_MP,AD_MP,AD_index_MP,AD_index_MP,Conf_index_MP,Conf_index_MP,MolWeight,MolWeight,nbAtoms,nbAtoms,nbHeavyAtoms,nbHeavyAtoms,nbC,nbC,nbO,nbO,nbAromAtom,nbAromAtom,nbRing,nbRing,nbHeteroRing,nbHeteroRing,Sp3Sp2HybRatio,Sp3Sp2HybRatio,nbRotBd,nbRotBd,nbHBdAcc,nbHBdAcc,ndHBdDon,ndHBdDon,nbLipinskiFailures,nbLipinskiFailures,TopoPolSurfAir,TopoPolSurfAir,MolarRefract,MolarRefract,CombDipolPolariz,CombDipolPolariz,LogVP_exp,LogVP_exp,LogVP_pred,LogVP_pred,VP_predRange,VP_predRange,AD_VP,AD_VP,AD_index_VP,AD_index_VP,Conf_index_VP,Conf_index_VP,LogWS_pred,LogWS_pred,WS_predRange,WS_predRange,AD_WS,AD_WS,AD_index_WS,AD_index_WS,Conf_index_WS,Conf_index_WS
Unnamed: 0_level_1,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL,UI,CL
DTXSID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2,Unnamed: 77_level_2,Unnamed: 78_level_2,Unnamed: 79_level_2,Unnamed: 80_level_2,Unnamed: 81_level_2,Unnamed: 82_level_2,Unnamed: 83_level_2,Unnamed: 84_level_2,Unnamed: 85_level_2,Unnamed: 86_level_2,Unnamed: 87_level_2,Unnamed: 88_level_2,Unnamed: 89_level_2,Unnamed: 90_level_2,Unnamed: 91_level_2,Unnamed: 92_level_2,Unnamed: 93_level_2,Unnamed: 94_level_2,Unnamed: 95_level_2,Unnamed: 96_level_2,Unnamed: 97_level_2,Unnamed: 98_level_2,Unnamed: 99_level_2,Unnamed: 100_level_2,Unnamed: 101_level_2,Unnamed: 102_level_2,Unnamed: 103_level_2,Unnamed: 104_level_2,Unnamed: 105_level_2,Unnamed: 106_level_2,Unnamed: 107_level_2,Unnamed: 108_level_2,Unnamed: 109_level_2,Unnamed: 110_level_2,Unnamed: 111_level_2,Unnamed: 112_level_2,Unnamed: 113_level_2,Unnamed: 114_level_2,Unnamed: 115_level_2,Unnamed: 116_level_2,Unnamed: 117_level_2,Unnamed: 118_level_2,Unnamed: 119_level_2,Unnamed: 120_level_2,Unnamed: 121_level_2,Unnamed: 122_level_2,Unnamed: 123_level_2,Unnamed: 124_level_2,Unnamed: 125_level_2,Unnamed: 126_level_2,Unnamed: 127_level_2,Unnamed: 128_level_2,Unnamed: 129_level_2,Unnamed: 130_level_2,Unnamed: 131_level_2,Unnamed: 132_level_2,Unnamed: 133_level_2,Unnamed: 134_level_2,Unnamed: 135_level_2,Unnamed: 136_level_2,Unnamed: 137_level_2,Unnamed: 138_level_2,Unnamed: 139_level_2,Unnamed: 140_level_2,Unnamed: 141_level_2,Unnamed: 142_level_2,Unnamed: 143_level_2,Unnamed: 144_level_2,Unnamed: 145_level_2,Unnamed: 146_level_2,Unnamed: 147_level_2,Unnamed: 148_level_2,Unnamed: 149_level_2,Unnamed: 150_level_2,Unnamed: 151_level_2,Unnamed: 152_level_2,Unnamed: 153_level_2,Unnamed: 154_level_2,Unnamed: 155_level_2,Unnamed: 156_level_2,Unnamed: 157_level_2,Unnamed: 158_level_2,Unnamed: 159_level_2,Unnamed: 160_level_2,Unnamed: 161_level_2,Unnamed: 162_level_2,Unnamed: 163_level_2,Unnamed: 164_level_2,Unnamed: 165_level_2,Unnamed: 166_level_2,Unnamed: 167_level_2,Unnamed: 168_level_2,Unnamed: 169_level_2,Unnamed: 170_level_2,Unnamed: 171_level_2,Unnamed: 172_level_2,Unnamed: 173_level_2,Unnamed: 174_level_2,Unnamed: 175_level_2,Unnamed: 176_level_2,Unnamed: 177_level_2,Unnamed: 178_level_2,Unnamed: 179_level_2,Unnamed: 180_level_2,Unnamed: 181_level_2,Unnamed: 182_level_2,Unnamed: 183_level_2,Unnamed: 184_level_2,Unnamed: 185_level_2,Unnamed: 186_level_2,Unnamed: 187_level_2,Unnamed: 188_level_2,Unnamed: 189_level_2,Unnamed: 190_level_2,Unnamed: 191_level_2,Unnamed: 192_level_2,Unnamed: 193_level_2,Unnamed: 194_level_2,Unnamed: 195_level_2,Unnamed: 196_level_2,Unnamed: 197_level_2,Unnamed: 198_level_2,Unnamed: 199_level_2,Unnamed: 200_level_2,Unnamed: 201_level_2,Unnamed: 202_level_2,Unnamed: 203_level_2,Unnamed: 204_level_2,Unnamed: 205_level_2,Unnamed: 206_level_2,Unnamed: 207_level_2,Unnamed: 208_level_2,Unnamed: 209_level_2,Unnamed: 210_level_2,Unnamed: 211_level_2,Unnamed: 212_level_2,Unnamed: 213_level_2,Unnamed: 214_level_2,Unnamed: 215_level_2,Unnamed: 216_level_2,Unnamed: 217_level_2,Unnamed: 218_level_2,Unnamed: 219_level_2,Unnamed: 220_level_2,Unnamed: 221_level_2,Unnamed: 222_level_2,Unnamed: 223_level_2,Unnamed: 224_level_2,Unnamed: 225_level_2,Unnamed: 226_level_2,Unnamed: 227_level_2,Unnamed: 228_level_2,Unnamed: 229_level_2,Unnamed: 230_level_2,Unnamed: 231_level_2,Unnamed: 232_level_2,Unnamed: 233_level_2,Unnamed: 234_level_2,Unnamed: 235_level_2,Unnamed: 236_level_2,Unnamed: 237_level_2,Unnamed: 238_level_2,Unnamed: 239_level_2,Unnamed: 240_level_2
DTXSID3041663,1.0,,,,0.52,1.0,0.607,0.87,,,,,,,,,,,,,,,,,,,,,,,1.0,0.985,,,,,,,1.0,0.884,,,,,1.0,0.959,,,,,,,[3500-11000],[3500:11000],,,,,0.987,0.623,0.03,,[0.01:0.06],,1.0,0.0,0.432,0.0,0.765,0.0,85.08,,[15.7:441.37],,1.0,0.0,0.372,0.0,0.311,0.0,-4.6,,[-5.05:-4.15],,1.0,0.0,0.234,0.0,0.426,0.0,-10.43,,[-10.57:-10.29],,1.0,0.0,0.251,0.0,0.658,0.0,2.07,,[1.43:2.71],,1.0,0.0,0.48,0.0,0.537,0.0,0.57,,[0.49:0.65],,,,0.443,0.0,0.751,0.0,1.0,,1.0,0.0,0.612138,0.0,0.612356,0.0,-6.0,-5.39,[-6.7:-5.3],[-6.51:-4.27],,,0.698,0.695,0.655,0.618,-0.4,,[-1.16:0.36],,1.0,0.0,0.475,0.0,0.492,0.0,,,,,,,,,2.29,,[2.12:2.46],,1.0,0.0,0.501,0.0,0.748,0.0,4.18,,[4.15:4.21],,1.0,0.0,1.0,0.0,0.898,0.0,40.0,,[39:46],,1.0,0.0,1.0,0.0,0.883,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-4.99,,[-5:-4.8],,1.0,0.0,1.0,0.0,0.76,0.0,-4.82,-4.69,[-4.89:-4.75],[-5.28:-4.1],,,,,0.948,0.789
DTXSID1020566,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,[70-220],[70:220],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.590385,0.590385,,,,,,,,,,,,,,,,,,,,,,,2.83,2.72,[2.7:2.96],[2.56:2.88],,,0.915,0.909,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.22,,,[1.22:1],[1.2:1.24],,,,,0.984,0.954,,,,,,,,,,
DTXSID5020023,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,[20-72],[20:72],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.559817,0.559817,,,,,,,,,,,,,,,,,,,,,,,2.11,2.21,[1.8:2.31],[1.78:2.64],,,0.862,0.83,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,2.44,,,[2.37:2],[2.37:2.44],,,,,0.977,0.897,,,,,,,,,,
DTXSID2020268,,,,,,,1.0,0.87,,,,,,,,,,,,,,,,,0.0,,,,0.266,0.92,0.603,0.945,0.0,,1.0,0.0,0.216,0.58,0.549,0.674,,,,,0.935,0.959,,,,,,,[140-440],[140:440],,,,,0.89,0.623,0.03,,[0:0.08],,,,0.22,0.0,0.649,0.0,0.0,,[0:7.95],,,,0.184,0.0,0.308,0.0,-4.82,,[-5.51:-4.13],,,,0.162,0.0,0.324,0.0,-11.04,,[-11.68:-10.4],,,,0.235,0.0,0.5,0.0,0.32,,[0.31:0.33],,1.0,0.0,1.0,0.0,0.943,0.0,1.78,,[0.9:2.66],,,,0.222,0.0,0.228,0.0,0.0,,1.0,0.0,0.233433,0.0,0.616717,0.0,-9.15,-7.42,[-10.63:-7.67],[-9.07:-5.81],0.0,1.0,0.193,0.29,,,-0.12,,[-1.39:1.15],,1.0,0.0,0.225,0.0,0.219,0.0,,,,,,,,,2.79,,[2.78:2.83],,1.0,0.0,1.0,0.0,0.867,0.0,2.78,,[1.85:3.71],,1.0,0.0,0.379,0.0,0.472,0.0,209.0,,[206:209],,1.0,0.0,1.0,0.0,0.799,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-10.15,,[-11.11:-9.19],,1.0,0.0,0.447,0.0,0.507,0.0,-2.05,-2.63,[-2.11:-1.99],[-3.25:-2.01],,,,,0.83,0.793
DTXSID5044786,,,,,,,1.0,0.87,0.0,,,,0.768,1.0,0.884,0.857,,,,,,,0.917,0.847,,,,,,,0.972,0.985,,,,,,,0.889,0.884,,,,,0.821,0.959,,,,,,,[150-480],[150:480],,,,,0.966,0.623,0.08,,[0.02:0.15],,1.0,0.0,0.527,0.0,0.731,0.0,7.81,,[1.34:19.12],,1.0,0.0,0.399,0.0,0.475,0.0,-4.63,,[-4.81:-4.45],,1.0,0.0,0.325,0.0,0.621,0.0,-10.63,,[-10.93:-10.33],,,,0.137,0.0,0.562,0.0,1.95,,[1.27:2.63],,1.0,0.0,0.39,0.0,0.484,0.0,0.53,,[0.48:0.59],,,,0.219,0.0,0.687,0.0,0.0,,1.0,0.0,0.345039,0.0,0.518011,0.0,-8.9,,[-9.92:-7.88],,1.0,0.0,0.569,0.0,0.554,0.0,0.38,,[0.08:0.68],,1.0,0.0,0.321,0.0,0.588,0.0,,,,,,,,,3.08,,[2.37:3.79],,,,0.223,0.0,0.391,0.0,2.91,,[2.91:2.98],,1.0,0.0,1.0,0.0,0.823,0.0,113.0,,[109:117],,1.0,0.0,1.0,0.0,0.88,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-8.0,-8.24,-8.22,,[-8:-7.91],,1.0,0.0,1.0,0.0,0.722,0.0,-4.8,-4.47,[-4.88:-4.72],[-6.25:-2.69],,,,,0.849,0.509
DTXSID3044338,0.0,,0.0,1.0,0.297,1.0,0.608,0.87,,,,,,,,,,,,,,,,,,,,,,,0.974,0.985,,,,,,,0.975,0.884,,,,,0.897,0.959,,,,,,,[660-2100],[660:2100],,,,,0.64,0.623,0.09,,[0.05:0.13],,1.0,0.0,0.431,0.0,0.757,0.0,4.16,,[1.54:9.26],,1.0,0.0,0.529,0.0,0.615,0.0,-4.63,,[-4.84:-4.42],,1.0,0.0,0.268,0.0,0.597,0.0,-10.78,,[-10.96:-10.56],,,,0.213,0.0,0.598,0.0,1.88,,[1.2:2.56],,1.0,0.0,0.433,0.0,0.487,0.0,0.71,,[0.32:1.1],,1.0,0.0,0.364,0.0,0.538,0.0,0.0,,1.0,0.0,0.444811,0.0,0.722405,0.0,-9.62,-8.02,[-10.46:-8.62],[-9.57:-6.47],,,0.586,0.531,0.583,0.498,0.31,,[0:0.62],,1.0,0.0,0.497,0.0,0.63,0.0,,,,,,,,,2.44,,[1.69:3.19],,1.0,0.0,0.585,0.0,0.504,0.0,3.08,,[2.78:3.38],,1.0,0.0,0.694,0.0,0.778,0.0,123.0,,[108:140],,1.0,0.0,0.602,0.0,0.714,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-6.87,,[-8:-5.52],,1.0,0.0,0.65,0.0,0.547,0.0,-3.79,-2.84,[-4.28:-3.36],[-3.45:-2.23],,,0.762,0.59,0.743,0.64
DTXSID60746295,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,[1100-3600],[1100:3600],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,151.0,224.0,[151:153],[184:259],,,,,0.887,0.649,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,[-9.59:-5.31],[-9.43:-5.47],,,,,0.306,0.324,,,,,,,,,,
DTXSID8027032,,,,,,,1.0,0.87,,,,,,,,,,,,,,,,,,,,,,,1.0,0.985,,,,,,,1.0,0.884,,,,,1.0,0.959,,,,,,,[960-3000],[960:3000],,,,,0.938,0.623,0.87,,[0.83:0.91],,,,0.24,0.0,0.666,0.0,0.52,,[0.04:1.19],,1.0,0.0,0.214,0.0,0.568,0.0,-5.15,,[-5.53:-4.77],,,,0.194,0.0,0.472,0.0,-10.55,,[-10.57:-10.53],,1.0,0.0,1.0,0.0,0.988,0.0,0.61,,[0.44:0.78],,1.0,0.0,0.593,0.0,0.794,0.0,0.85,,[0.58:1.12],,,,0.391,0.0,0.575,0.0,1.0,,1.0,0.0,0.628783,0.0,0.659883,0.0,-8.06,-8.22,[-9.09:-7.03],[-8.95:-7.49],,,0.741,0.607,0.595,0.634,-1.18,,[-1.4:-0.88],,1.0,0.0,0.306,0.0,0.581,0.0,,,,,,,,,0.96,,[0.45:1.47],,1.0,0.0,0.617,0.0,0.598,0.0,-0.63,,[-0.69:-0.57],,1.0,0.0,1.0,0.0,0.949,0.0,25.0,,[22:26],,1.0,0.0,1.0,0.0,0.894,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-0.01,,[-0.11:0],,1.0,0.0,1.0,0.0,0.824,0.0,1.05,-0.45,[1.05:1.05],[-1.45:0.55],,,,,0.998,0.731
DTXSID7034834,,,,,,,1.0,0.87,,,,,,,1.0,0.857,,,,,,,0.972,0.847,,,,,,,1.0,0.985,,,,,,,0.952,0.884,0.0,1.0,0.678,0.88,0.836,0.899,,,,,,,[450-1400],[450:1400],,,,,0.949,0.623,0.85,,[0.8:0.9],,,,0.327,0.0,0.683,0.0,0.68,,[0.02:4.78],,1.0,0.0,0.268,0.0,0.498,0.0,-5.02,,[-5.81:-4.32],,,,0.199,0.0,0.226,0.0,-11.01,,[-11.16:-10.86],,1.0,0.0,0.392,0.0,0.689,0.0,0.83,,[0.31:1.35],,1.0,0.0,0.634,0.0,0.629,0.0,1.37,,[1.08:1.66],,,,0.311,0.0,0.553,0.0,0.0,,1.0,0.0,0.387055,0.0,0.539019,0.0,-7.25,,[-7.34:-7.25],,1.0,0.0,0.998,0.0,0.864,0.0,-0.51,,[-1.09:0.07],,1.0,0.0,0.547,0.0,0.58,0.0,,,,,,,,,1.77,,[1.44:2.1],,1.0,0.0,0.356,0.0,0.611,0.0,-0.81,,[-2.26:0.64],,1.0,0.0,0.637,0.0,0.42,0.0,158.0,,[102:214],,1.0,0.0,0.67,0.0,0.539,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-6.0,-6.18,-6.37,,[-6.87:-5.87],,1.0,0.0,1.0,0.0,0.815,0.0,-3.51,-1.85,[-3.69:-3.33],[-2.13:-1.57],,,0.998,0.597,0.799,0.746
DTXSID4044531,0.0,,0.0,1.0,0.143,1.0,0.506,0.87,0.0,,0.0,1.0,0.122,1.0,0.47,0.857,,,0.0,1.0,0.318,0.96,0.479,0.827,0.0,,,,0.401,0.92,0.542,0.945,,,,,,,0.941,0.966,,,,,0.891,0.869,,,,,,,[5100-16000],[5100:16000],,,,,0.941,0.612,0.01,,[0.01:0.01],,,,0.154,0.0,0.711,0.0,0.0,,[0:248.04],,,,0.272,0.0,0.1,0.0,-4.48,,[-4.75:-4.21],,,,0.053,0.0,0.485,0.0,-10.86,,[-11.07:-10.65],,,,0.156,0.0,0.579,0.0,2.88,,[2.28:3.48],,1.0,0.0,0.241,0.0,0.461,0.0,2.53,,[2.4:2.66],,,,0.189,0.0,0.614,0.0,0.0,,,,0.225167,0.0,0.612583,0.0,-6.66,,[-7.64:-5.68],,1.0,0.0,0.332,0.0,0.46,0.0,0.11,,[-0.37:0.59],,,,0.077,0.0,0.422,0.0,11.67,,[11.39:11.95],,0.248,0.0,0.595,0.0,4.88,,[4.02:5.74],,,,0.08,0.0,0.275,0.0,3.6,,[2.1:5.9],,1.0,0.0,0.407,0.0,0.207,0.0,451.0,,[374:500],,,,0.322,0.0,0.299,0.0,516.136159,516.136159,,,,,,,,,32.0,0.0,9.0,,0.0,,,,,,2.0,4.0,,,1.0,0.0,52.6,,25.3236,,4.197,,,,-6.72,,[-7.53:-6],,,,0.274,0.0,0.495,0.0,-5.6,-6.04,[-5.67:-5.53],[-7.23:-4.85],,,,,0.837,0.596
