### Predict LQTS Diagnosis Probability Using Structure, Function, and *In Silico* Features

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [7]:
updated_h2dist = pd.read_csv('updated_h2dist.csv')

In [11]:
updated_h2dist

Unnamed: 0.1,Unnamed: 0,V1,V2,V3,V4,V5,V7,V8,V5.2,V7.2
0,2,1,1,BC,right,1.0000,1.0000,0.0,2.910000,0.878500
1,3,1,2,DD,intra,0.5183,0.1449,3.9,5.614509,6.062802
2,4,1,3,DD,intra,0.3910,0.1069,5.7,7.442455,8.217961
3,5,1,4,BB,intra,0.2733,0.0837,10.2,10.647640,10.495818
4,6,1,5,DD,intra,0.2727,0.0714,9.4,10.671067,12.303922
...,...,...,...,...,...,...,...,...,...,...
56194,388126,863,859,CC,intra,0.3262,0.0924,12.5,8.920907,9.507576
56195,388127,863,860,CC,intra,0.3266,0.0927,9.5,8.909982,9.476807
56196,388128,863,861,CC,intra,0.5123,0.1406,5.8,5.680265,6.248222
56197,388129,863,862,CC,intra,0.5118,0.1430,3.9,5.685815,6.143357


In [22]:
list(updated_h2dist)

['Unnamed: 0', 'V1', 'V2', 'V3', 'V4', 'V5', 'V7', 'V8', 'V5.2', 'V7.2']

In [12]:
# V1 and V2: residue numbers
updated_h2dist["V1"].unique()

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
       105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
       118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
       131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 381, 382, 383,
       384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396,
       397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409,
       410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 42

In [23]:
updated_h2dist["V2"].unique()

array([  1,   2,   3,   4,   5,   6, 399, 400, 401, 402, 403, 404, 405,
       406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 466, 469, 470,
       471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483,
       484, 489, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543,
       544, 545, 546, 547, 548, 549, 550, 552, 668, 669, 670, 671, 672,
       673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685,
       689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701,
       702, 703, 704, 705, 706, 707, 708,   7, 398, 488, 687, 709, 710,
       720, 724, 727, 728, 764, 827,   8, 485, 486, 493, 765, 767,   9,
        10,  12,  13, 487, 490, 492, 686, 711, 721, 723, 725, 726, 761,
       762, 763, 766, 768, 782, 783, 784, 785, 786, 787, 788, 823, 824,
       825, 826, 828, 829, 830,  11,  14, 822,  15,  16,  17,  18,  19,
        20, 118, 388, 391, 394, 769, 789, 790, 795, 797, 798, 799, 800,
        21,  22,  23,  31,  32,  33,  41,  42,  43,  44,  45, 11

In [24]:
updated_h2dist["V3"].unique()

array(['BC', 'DD', 'BB', 'CC', 'AA', 'CD', 'DA', 'BD', 'CB', 'DC', 'BA',
       'AD', 'AB', 'DB'], dtype=object)

In [25]:
updated_h2dist["V4"].unique()

array(['right', 'intra', 'oppo', 'left'], dtype=object)

In [31]:
# V5 (max) and V7 (median or mean): degree of correlation
updated_h2dist["V5"].unique()

array([1.    , 0.5183, 0.391 , ..., 0.3957, 0.4207, 0.5391])

In [27]:
updated_h2dist["V7"].unique()

array([1.    , 0.1449, 0.1069, ..., 0.1714, 0.1801, 0.1618])

In [28]:
# distance between V1 and V2
updated_h2dist["V8"].unique()

array([ 0. ,  3.9,  5.7, 10.2,  9.4, 12. , 18.2, 15.5, 12.4,  9.2,  7.9,
       11.6, 13. , 15.2, 12.7, 11. , 14.6, 12.5, 13.7, 17.3, 17.6, 16.5,
       19. , 18. , 16.8, 19.3, 17.9, 14. , 10.8, 10. , 13.3, 11.3, 15.1,
       13.9, 12.8, 15. , 16.7, 19.8, 17.8, 14.9, 16. , 14.1, 10.5,  8.3,
        6.4,  8.9, 10.1,  6.1,  8. , 17.4, 16.1, 12.1, 13.4,  9.9,  9.1,
       12.3, 11.9,  8.6, 11.1, 14.8, 16.2, 16.3, 17.5, 10.7, 11.2,  7.1,
       13.8, 12.9,  6.7,  7.6, 13.1,  8.5, 11.7, 11.4, 14.4,  7.2,  8.7,
       15.7, 20.8, 15.4,  9.3,  8.8, 12.6, 16.9, 13.5, 20.4, 18.6, 17. ,
       18.8, 13.6,  6.8,  9.8,  9. , 10.6, 15.9, 14.2,  9.5,  9.7, 14.3,
       10.3,  8.1, 14.7,  4.7,  6.3,  6.2,  5.3, 11.8, 11.5, 15.3, 19.5,
       18.9,  3.8,  6.9, 12.2, 18.4, 19.7,  7.3,  7.7, 10.9, 19.6, 20.5,
       20. , 20.3, 14.5,  6.6, 17.1, 13.2,  8.4,  5.8, 16.4, 20.1, 20.2,
       15.8, 17.2, 18.3,  5.6, 18.7, 20.7, 18.5, 15.6, 19.1, 19.9, 19.2,
       17.7,  7.8,  5.1, 16.6, 20.6,  8.2, 10.4,  5

In [29]:
updated_h2dist["V5.2"].unique()

array([2.91      , 5.61450897, 7.44245524, ..., 7.3540561 , 6.91704302,
       5.39788536])

In [30]:
updated_h2dist["V7.2"].unique()

array([0.8785    , 6.06280193, 8.21796071, ..., 5.12543757, 4.87784564,
       5.42954265])

In [3]:
updated_h2dist = pd.read_csv('data/Covariates/herg-correlation-analysis.csv')

In [4]:
updated_h2dist

Unnamed: 0,resA,resB,chains,type,max,median,mean,distance
0,1,1,BC,right,0.0516,0.01695,0.0204,40.0
1,1,2,DD,intra,0.5183,0.03140,0.1449,3.9
2,1,3,DD,intra,0.3910,0.03400,0.1069,5.7
3,1,4,BB,intra,0.2733,0.03255,0.0837,10.2
4,1,5,DD,intra,0.2727,0.03200,0.0714,9.4
...,...,...,...,...,...,...,...,...
388124,863,859,CC,intra,0.3262,0.03200,0.0924,12.5
388125,863,860,CC,intra,0.3266,0.02580,0.0927,9.5
388126,863,861,CC,intra,0.5123,0.03205,0.1406,5.8
388127,863,862,CC,intra,0.5118,0.03395,0.1430,3.9
