# `targets`

Transform the data set into targets for machine learning and visualization. 

In [6]:
import pandas 
import numpy as np 

pandas.options.display.max_rows = 1000

In [7]:
df = pandas.read_csv( 'data_sets/data_set.csv', index_col=0 ) 


## Targets needed 

The targets we need are: 

- `expression`. Unchanged. 
- `tm`. We want the $\Delta T_m$ 
- `kcat`, `km`, and `kcatkm`. We want the fold change from native value. 

In [8]:
df[ 'target_expression' ] = df[ 'expression' ] 
df[ 'target_tm' ] = df[ 'tm' ] - df.loc[ 'BglB' ][ 'tm' ] 
df[ 'target_kcat' ] = np.log10( df[ 'kcat' ] / df.loc[ 'BglB' ][ 'kcat' ] )
df[ 'target_km' ] = np.log10( (1/df[ 'km' ]) / (1/df.loc[ 'BglB' ][ 'km' ]) )
df[ 'target_kcatkm' ] = np.log10( df[ 'kcatkm' ] / df.loc[ 'BglB' ][ 'kcatkm' ] )

In [12]:
conserved = {1: 0.057595392368610512,
 2: 0.026334519572953737,
 3: 0.01889763779527559,
 4: 0.0092735703245749607,
 5: 0.81195335276967928,
 6: 0.82278481012658233,
 7: 0.12653061224489795,
 8: 0.018111254851228976,
 9: 0.97104247104247099,
 10: 0.050706033376123234,
 11: 0.64769820971867009,
 12: 0.9621794871794872,
 13: 0.15930902111324377,
 14: 0.15738963531669867,
 15: 0.61583387410772228,
 16: 0.40233614536015572,
 17: 0.2046783625730994,
 18: 0.45884640311082309,
 19: 0.99028497409326421,
 20: 0.35568326947637291,
 21: 0.98849104859335035,
 22: 0.96151379089159716,
 23: 0.22065426555484285,
 24: 0.063480741797432239,
 25: 0.15975103734439833,
 26: 0.48862853204686424,
 27: 0.27194244604316548,
 28: 0.82138200782268578,
 29: 0.37219730941704038,
 30: 0.06458333333333334,
 31: 0.38386648122392214,
 32: 0.71747448979591832,
 33: 0.47770700636942676,
 34: 0.62277353689567427,
 35: 0.89108280254777072,
 36: 0.25308241401687215,
 37: 0.42012987012987013,
 38: 0.1152832674571805,
 39: 0.077844311377245512,
 40: 0.045685279187817257,
 41: 0.41052631578947368,
 42: 0.34670231729055256,
 43: 0.34592680047225499,
 44: 0.22620169651272384,
 45: 0.031847133757961783,
 46: 0.098730606488011283,
 47: 0.36049926578560942,
 48: 0.16547696301103179,
 49: 0.0077269800386349004,
 50: 0.4829090909090909,
 51: 0.42877906976744184,
 52: 0.59745762711864403,
 53: 0.72560202788339667,
 54: 0.27485750474984166,
 55: 0.82583913869537684,
 56: 0.33058898036732109,
 57: 0.036732108929702342,
 58: 0.6901140684410646,
 59: 0.15842839036755388,
 60: 0.097839898348157567,
 61: 0.43690551680405831,
 62: 0.67997465145754121,
 63: 0.98353388220392657,
 64: 0.30754597336715284,
 65: 0.091312618896639192,
 66: 0.77157360406091369,
 67: 0.44233206590621038,
 68: 0.34474017743979724,
 69: 0.063371356147021551,
 70: 0.41038632045598478,
 71: 0.89043698543381888,
 72: 0.26299112801013941,
 73: 0.0019011406844106464,
 74: 0.029784537389100127,
 75: 0.61406844106463876,
 76: 0.98479087452471481,
 77: 0.58961367954401522,
 78: 0.91956934768841037,
 79: 0.12286257124762508,
 80: 0.35104364326375709,
 81: 0.98608475648323846,
 82: 0.23354430379746835,
 83: 0.98797468354430384,
 84: 0.6335443037974684,
 85: 0.058823529411764705,
 86: 0.90177439797211656,
 87: 0.024453024453024452,
 88: 0.021390374331550801,
 89: 0.88288288288288286,
 90: 0.043859649122807015,
 91: 0.13479623824451412,
 92: 0.84040531982267253,
 93: 0.36311787072243346,
 94: 0.26742712294043092,
 95: 0.83533882203926535,
 96: 0.48385053831538949,
 97: 0.0094876660341555973,
 98: 0.64791403286978511,
 99: 0.97661188369152974,
 100: 0.06637168141592921,
 101: 0.012642225031605562,
 102: 0.53413400758533502,
 103: 0.089759797724399501,
 104: 0.60493046776232617,
 105: 0.50695322376738305,
 106: 0.038558786346396964,
 107: 0.025284450063211124,
 108: 0.015782828282828284,
 109: 0.12878787878787878,
 110: 0.71780303030303028,
 111: 0.07575757575757576,
 112: 0.028409090909090908,
 113: 0.90593434343434343,
 114: 0.13320707070707072,
 115: 0.084595959595959599,
 116: 0.84217171717171713,
 117: 0.75820707070707072,
 118: 0.42297979797979796,
 119: 0.9867424242424242,
 120: 0.45265151515151514,
 121: 0.68982943777637395,
 122: 0.47664141414141414,
 123: 0.99873817034700318,
 124: 0.39078282828282829,
 125: 0.1167929292929293,
 126: 0.041692987997473153,
 127: 0.38036410923276981,
 128: 0.33115183246073299,
 129: 0.22492401215805471,
 130: 0.84803605924018033,
 131: 0.9213483146067416,
 132: 0.78164556962025311,
 133: 0.10484927916120576,
 134: 0.006333122229259025,
 135: 0.58592263792010146,
 136: 0.24065864471184295,
 137: 0.25316455696202533,
 138: 0.32237673830594182,
 139: 0.02718078381795196,
 140: 0.059418457648546141,
 141: 0.75079063883617958,
 142: 0.032890575585072739,
 143: 0.021505376344086023,
 144: 0.75822784810126587,
 145: 0.70886075949367089,
 146: 0.030341340075853349,
 147: 0.26422250316055623,
 148: 0.1080227416298168,
 149: 0.04421983575489577,
 150: 0.087121212121212127,
 151: 0.38005050505050503,
 152: 0.45299684542586749,
 153: 0.6145431145431145,
 154: 0.041348600508905854,
 155: 0.48732572877059571,
 156: 0.06683480453972257,
 157: 0.027742749054224466,
 158: 0.013240857503152586,
 159: 0.86893509766855703,
 160: 0.0069313169502205419,
 161: 0.88909892879647134,
 162: 0.17076244486452427,
 163: 0.98299748110831231,
 164: 1.0,
 165: 0.59357277882797732,
 166: 0.029392971246006389,
 167: 0.16133162612035851,
 168: 0.04158669225847729,
 169: 0.1329073482428115,
 170: 0.060903732809430254,
 171: 0.25463851567498402,
 172: 0.70089858793324777,
 173: 0.50835475578406175,
 174: 0.17945383615084526,
 175: 0.088621444201312904,
 176: 0.86482758620689659,
 177: 0.10699588477366255,
 178: 0.46393210749646391,
 179: 0.52744425385934823,
 180: 0.86461538461538456,
 181: 0.58497011101622542,
 182: 0.068627450980392163,
 183: 0.039473684210526314,
 184: 0.13895993179880647,
 185: 0.02004008016032064,
 186: 0.045210727969348656,
 187: 0.4021887824897401,
 188: 0.24317295188556567,
 189: 0.10102960102960103,
 190: 0.060952380952380952,
 191: 0.4674256799493991,
 192: 0.33080328905755851,
 193: 0.7462025316455696,
 194: 0.5104364326375711,
 195: 0.085281111813013261,
 196: 0.46809854706253951,
 197: 0.011378002528445006,
 198: 0.023388116308470291,
 199: 0.60910815939278939,
 200: 0.23023402909550916,
 201: 0.015812776723592662,
 202: 0.71979759645793806,
 203: 0.010113780025284451,
 204: 0.04743833017077799,
 205: 0.23402909550917142,
 206: 0.0018975332068311196,
 207: 0.14953933380581147,
 208: 0.1683673469387755,
 209: 0.12847222222222221,
 210: 0.077288941736028544,
 211: 0.044499381953028432,
 212: 0.016694490818030049,
 213: 0.18992248062015504,
 214: 0.30872913992297818,
 215: 0.56943563728598601,
 216: 0.88227848101265827,
 217: 0.38544303797468354,
 218: 0.22278481012658227,
 219: 0.43074003795066412,
 220: 0.36185044359949303,
 221: 0.027027027027027029,
 222: 0.0567741935483871,
 223: 0.014857881136950904,
 224: 0.078024337866857557,
 225: 0.031383737517831668,
 226: 0.11476466795615732,
 227: 0.23901808785529716,
 228: 0.21171171171171171,
 229: 0.12698412698412698,
 230: 0.030678851174934726,
 231: 0.090196078431372548,
 232: 0.30870712401055411,
 233: 0.83368128044537226,
 234: 0.2919614147909968,
 235: 0.14678899082568808,
 236: 0.73185088293001965,
 237: 0.6086387434554974,
 238: 0.021625163826998691,
 239: 0.34512115258677145,
 240: 0.047405509288917361,
 241: 0.28920308483290491,
 242: 0.087651951375559825,
 243: 0.1318051575931232,
 244: 0.032303370786516857,
 245: 0.43258426966292135,
 246: 0.30410183875530411,
 247: 0.59463276836158196,
 248: 0.54148195060164661,
 249: 0.1390644753476612,
 250: 0.10183428209993675,
 251: 0.50641025641025639,
 252: 0.19138149556400508,
 253: 0.14586070959264127,
 254: 0.050228310502283102,
 255: 0.92833662064431299,
 256: 0.09526938239159001,
 257: 0.84407894736842104,
 258: 0.87123862841147659,
 259: 0.1543046357615894,
 260: 0.12881806108897742,
 261: 0.30587440918298447,
 262: 0.091823056300268102,
 263: 0.16838487972508592,
 264: 0.047058823529411764,
 265: 0.10882352941176471,
 266: 0.079518072289156624,
 267: 0.012437810945273632,
 268: 0.027355623100303952,
 269: 0.0845771144278607,
 270: 0.04924242424242424,
 271: 0.019138755980861243,
 272: 0.13202933985330073,
 273: 0.036876355748373099,
 274: 0.050932568149210905,
 275: 0.14047287899860919,
 276: 0.080711354309165526,
 277: 0.21907894736842104,
 278: 0.32200000000000001,
 279: 0.63786279683377312,
 280: 0.10178453403833443,
 281: 0.31639871382636658,
 282: 0.26611796982167352,
 283: 0.39243986254295532,
 284: 0.047193877551020405,
 285: 0.14456140350877192,
 286: 0.37862796833773088,
 287: 0.0052805280528052806,
 288: 0.95523329129886503,
 289: 0.65321563682219419,
 290: 0.33459357277882795,
 291: 0.73282923755513552,
 292: 0.22936357908002519,
 293: 0.65994962216624686,
 294: 0.68998109640831762,
 295: 1.0,
 296: 0.28085642317380355,
 297: 0.16539923954372623,
 298: 0.11064425770308123,
 299: 0.05182072829131653,
 300: 0.12800565770862801,
 301: 0.15051311288483465,
 302: 0.093877551020408165,
 303: 0.032626427406199018,
 304: 0.071523178807947022,
 305: 0.07672849915682968,
 306: 0.086021505376344093,
 307: 0.081871345029239762,
 308: 0.1044776119402985,
 309: 0.27659574468085107,
 310: 0.053140096618357488,
 311: 0.082840236686390539,
 312: 0.11521547933157432,
 313: 0.026839826839826841,
 314: 0.11065989847715736,
 315: 0.012326656394453005,
 316: 0.017730496453900711,
 317: 0.046015712682379348,
 318: 0.11363636363636363,
 319: 0.24216959511077157,
 320: 0.090019569471624261,
 321: 0.65325670498084287,
 322: 0.15379008746355685,
 323: 0.22559366754617413,
 324: 0.23350923482849603,
 325: 0.85640695428203473,
 326: 0.25410958904109587,
 327: 0.59004092769440653,
 328: 0.021949644932214331,
 329: 0.89929441949967925,
 330: 0.19252691576947434,
 331: 0.045512010113780026,
 332: 0.14032869785082175,
 333: 0.23175416133162613,
 334: 0.11794546607482562,
 335: 0.26405559065066331,
 336: 0.70202020202020199,
 337: 0.075173720783322809,
 338: 0.16560913705583757,
 339: 0.2484197218710493,
 340: 0.0070558050032071837,
 341: 0.068769716088328076,
 342: 0.065189873417721519,
 343: 0.046954314720812185,
 344: 0.020338983050847456,
 345: 0.048192771084337352,
 346: 0.16610398379473329,
 347: 0.26481084939329053,
 348: 0.74804177545691908,
 349: 0.35964353914704011,
 350: 0.080655324511657217,
 351: 0.65680100755667503,
 352: 0.57997481108312343,
 353: 1.0,
 354: 0.93131695022054195,
 355: 0.98358585858585856,
 356: 0.13074901445466491,
 357: 0.45942720763723149,
 358: 0.053619302949061663,
 359: 0.026392961876832845,
 360: 0.59699248120300752,
 361: 0.14193548387096774,
 362: 0.25366876310272535,
 363: 0.07716049382716049,
 364: 0.077889447236180909,
 365: 0.60851063829787233,
 366: 0.058035714285714288,
 367: 0.11074380165289256,
 368: 0.11506140917905623,
 369: 0.9267676767676768,
 370: 0.12216884008236102,
 371: 0.030710172744721688,
 372: 0.97220467466835125,
 373: 0.0012634238787113076,
 374: 0.014529374605180037,
 375: 0.72411616161616166,
 376: 0.14015151515151514,
 377: 0.060606060606060608,
 378: 0.14574132492113565,
 379: 0.70870113493064313,
 380: 0.5100882723833543,
 381: 0.12113564668769716,
 382: 0.36593059936908517,
 383: 0.034722222222222224,
 384: 0.13969658659924147,
 385: 0.17405063291139242,
 386: 0.0050632911392405064,
 387: 0.54446577095329496,
 388: 0.22073342736248236,
 389: 0.20331421287444232,
 390: 0.92639593908629436,
 391: 0.0050697084917617234,
 392: 0.017121116043119847,
 393: 0.2758402029169309,
 394: 0.17501585288522511,
 395: 0.93468611287254277,
 396: 0.93274111675126903,
 397: 0.36104060913705582,
 398: 0.16740646797717185,
 399: 0.98732572877059566,
 400: 0.59378960709759188,
 401: 0.17184527584020293,
 402: 0.26125554850982879,
 403: 0.99176172370088722,
 404: 0.53138871274571975,
 405: 0.43599493029150826,
 406: 0.55323193916349811,
 407: 0.64661177960734639,
 408: 0.25380710659898476,
 409: 0.017408123791102514,
 410: 0.80257234726688098,
 411: 0.56618112729575676,
 412: 0.21659278024065864,
 413: 0.50126903553299496,
 414: 0.89017341040462428,
 415: 0.51614946168461051,
 416: 0.99619530754597341,
 417: 0.1534559289790742,
 418: 0.33861762840837034,
 419: 0.15619047619047619,
 420: 0.072380952380952379,
 421: 0.12515883100381195,
 422: 0.27902501603592045,
 423: 0.15985663082437276,
 424: 0.24943820224719102,
 425: 0.24145454545454545,
 426: 0.145236508994004,
 427: 0.96770025839793283,
 428: 0.28526048284625161,
 429: 0.41142857142857142,
 430: 0.87936507936507935,
 431: 0.03619047619047619,
 432: 0.94349206349206349,
 433: 0.35768742058449809,
 434: 0.091968911917098439,
 435: 0.73583662714097497,
 436: 0.20216836734693877,
 437: 0.39436619718309857,
 438: 0.098360655737704916,
 439: 0.040055248618784532,
 440: 0.017482517482517484,
 441: 0.29377593360995852,
 442: 0.033950617283950615,
 443: 0.49693721286370596,
 444: 0.54430379746835444,
 445: 0.16573816155988857}

In [13]:
df[ 'conserved' ] = df[ 'sequence_pos' ].map( conserved ) 

In [14]:
df.describe()



Unnamed: 0,sequence_pos,expression,tm,k,err_tm,err_k,kcat,err_kcat,km,err_km,...,ki,ki_percent_err,ki_err,gel_number,target_expression,target_tm,target_kcat,target_km,target_kcatkm,conserved
count,128.0,129.0,78.0,78.0,78.0,78.0,75.0,75.0,75.0,75.0,...,8.0,8.0,8.0,128.0,129.0,78.0,75.0,75.0,91.0,128.0
mean,241.53125,0.705426,39.525256,-0.855,0.258077,0.15859,546.626667,13.242667,11.096133,1.002267,...,227.7725,35.595,111.18625,10.390625,0.705426,-0.404744,-0.709503,-0.169143,-1.482423,0.49922
std,122.181737,0.457628,1.666047,0.373265,0.142656,0.157115,1270.32611,31.536556,13.526521,1.19479,...,167.270111,22.658287,165.916493,5.749679,0.457628,1.666047,0.848192,0.392397,1.58674,0.355953
min,12.0,0.0,34.91,-1.73,0.04,0.02,1.0,0.1,0.4,0.02,...,95.24,13.7,13.05,1.0,0.0,-5.02,-2.944483,-1.251297,-4.240549,0.001263
25%,,0.0,,,,,,,,,...,,,,,0.0,,,,,
50%,,1.0,,,,,,,,,...,,,,,1.0,,,,,
75%,,1.0,,,,,,,,,...,,,,,1.0,,,,,
max,423.0,1.0,45.99,-0.26,0.71,0.86,11011.0,258.0,89.18,5.89,...,590.71,86.56,511.32,21.0,1.0,6.06,1.097344,1.09691,0.95535,1.0


In [15]:
df.to_csv( 'targets.csv' ) 