In [1]:
import numpy as np
import pandas as pd

In [2]:
!cat pdtc/readme.txt

Bruna et al, Cell 2016 PDTX data v1.1 14th September 2016.
This repository contains summarised data and plots from the Caldas Lab PDTX cohort described in the paper Bruna et al, Cell 2016. Note that these data is updated with new models/new analysis, therefore there might be slight differences with the paper.

Scripts:
Please note that the scripts are included for illustrative purposes only, and they might need tweaking (directory names, etc) to make them run as they are.

Supplementary Items:

SNVsPlots.pdf:Whole exome sequencing data. Page 1: Pearson correlation of SNVs. Page 2: Spearman correlation of SNVs. Pages 3-32: Pairwise scatterplots showing variant allele frequencies in each model.Pages 33-66: Mutational profiles in each sample.CNPlots.pdf:Shallow whole genome sequencing data. Page 1: Pearson correlation of copy number log-ratios. Page 2: Spearman correlation of copy number log-ratios. Pages 3-134: Copy number plots for each of the samples.GeneExpression

In [3]:
!tree pdtc | grep txt

├── [00mCNAModels.txt[0m
├── [00mCNASamples.txt[0m
├── [00mDrugResponsesAUCModels.txt[0m
├── [00mDrugResponsesAUCSamples.txt[0m
├── [00mExpressionModels.txt[0m
├── [00mExpressionSamples.txt[0m
├── [00mPromoterMethylationModels.txt[0m
├── [00mPromoterMethylationSamples.txt[0m
├── [00mRawDataDrugsSingleAgents.txt[0m
├── [00mSNVsModels.txt[0m
├── [00mSNVsSamples.txt[0m
└── [00mreadme.txt[0m


# IC50

In [4]:
dr = pd.read_csv("pdtc/DrugResponsesAUCModels.txt", sep="\t")
print(dr.shape)
print(
    "Unique model: ",
    dr["Model"].unique(),
)
print("Unique model num:", len(dr["Model"].unique()))
dr

(1636, 7)
Unique model:  ['HCI001' 'HCI002' 'HCI005' 'HCI008' 'HCI009' 'HCI010' 'HCI011' 'IC007'
 'STG139' 'STG139M' 'STG143' 'STG195' 'STG201' 'STG282' 'STG316' 'STG335'
 'VHIO098' 'VHIO169' 'VHIO179' 'VHIO244']
Unique model num: 20


Unnamed: 0,Model,Drug,AUC,iC50,D1_CONC,D5_CONC,perc.iC50
0,HCI001,(5Z)-7-Oxozeaenol,0.217813,5.483270e+00,7.701322,2.337740,71.507443
1,HCI001,17-AAG,0.301919,1.834974e+04,1.000000,0.003906,183.038003
2,HCI001,5-Fluorouracil,0.494210,1.350264e+00,20.000000,0.078125,51.391421
3,HCI001,681640,0.102932,1.521258e+02,2.000000,0.007812,150.681089
4,HCI001,ABT-263,0.497307,1.246564e-01,2.000000,0.007812,49.591347
...,...,...,...,...,...,...,...
1631,VHIO244,Vorinostat,0.560835,4.783231e-01,10.000000,0.039062,44.636232
1632,VHIO244,XAV 939,0.080441,1.033271e+05,5.581430,1.175631,702.942004
1633,VHIO244,YK 4-279,0.216032,8.690911e+00,7.701322,2.337740,79.903643
1634,VHIO244,ZM-447439,0.127893,1.493730e+06,3.275568,0.740057,587.743723


# Exp

In [5]:
exp = pd.read_csv("pdtc/ExpressionSamples.txt", sep="\t", index_col="Gene")
exp.index = list(exp.index)
exp

Unnamed: 0,AB521-T,AB521M-T,AB521M-TR,AB521M-X1,AB521M-X1R1,AB521M-XC7,AB551-T,AB551-TR,AB551-X0R,AB551-X1,...,VHIO131-X,VHIO161-X6,VHIO169-X5,VHIO169-XC7,VHIO179-X10C7,VHIO179-X7,VHIO179-X7C,VHIO179-X7R,VHIO179-X9,VHIO244-X4
RNF165,7.962514,7.510755,8.474643,8.705916,8.794421,8.000047,6.195396,6.254949,6.071341,6.123061,...,6.215167,6.156368,7.171175,7.511222,6.103678,6.963911,6.595537,6.590126,6.863109,6.676043
RMRP,6.269633,5.834819,5.886496,5.996797,6.030305,6.014519,6.232428,5.755409,5.964700,6.284483,...,5.860998,6.085601,6.012846,6.104872,7.270124,6.039016,5.874704,6.006936,5.739858,5.707880
BC033982,5.861072,5.835638,5.862216,5.759493,5.982095,6.014935,6.019519,5.931477,6.002527,5.776786,...,6.074940,5.916300,5.748262,5.710017,5.804132,5.711673,5.768193,5.857741,5.840177,5.538693
PHF7,6.220150,6.093939,6.472200,6.302795,6.346807,6.200309,6.523679,6.099005,6.525307,6.678793,...,6.109771,6.238810,6.478422,6.513064,6.672457,6.519513,6.449631,6.345546,6.345090,6.444590
BCAP29,7.604451,7.644064,6.987816,7.230650,7.278334,8.180922,8.199991,8.616634,8.497809,8.744787,...,7.603956,7.765299,8.743755,8.602192,8.466519,8.517818,8.667432,8.531301,8.653383,8.988260
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
UGCGL1,7.976069,7.564156,7.796486,7.525729,7.529645,7.085914,7.257368,7.226156,7.614684,7.251649,...,8.361275,7.682693,8.236507,7.492431,7.791859,7.422430,7.237058,7.789192,8.610931,8.038738
VPS72,7.839631,7.427590,7.781041,7.785813,7.610787,8.382814,8.360376,8.403276,8.089161,8.454691,...,8.105059,8.695151,8.949787,8.744461,9.168366,9.368211,8.887872,8.858159,8.515512,8.141914
CSMD3,5.619762,5.843024,5.872439,5.636650,5.567546,5.648225,5.933260,5.745459,5.782602,5.936025,...,7.137538,5.711487,5.694987,5.698292,5.848356,5.920681,5.783432,5.823674,5.792328,5.873695
CC2D1A,6.587251,6.590437,6.992506,6.947622,6.888878,6.809409,6.415244,6.573713,6.696129,6.889566,...,6.936255,6.381853,6.714279,6.758739,6.831259,6.829676,7.024869,6.788477,6.698296,6.816372


In [6]:
print(list(exp.columns))

['AB521-T', 'AB521M-T', 'AB521M-TR', 'AB521M-X1', 'AB521M-X1R1', 'AB521M-XC7', 'AB551-T', 'AB551-TR', 'AB551-X0R', 'AB551-X1', 'AB551-X1C7', 'AB555-T', 'AB555-X0', 'AB559-T', 'AB559-X1', 'AB580-T', 'AB580-X0', 'AB630-T', 'AB630-X0', 'HCI001-X0', 'HCI001-X0R', 'HCI001-X3', 'HCI001-X3C7', 'HCI001-X4', 'HCI001-X5', 'HCI001-X5R', 'HCI002-X0', 'HCI002-X0R', 'HCI002-X2', 'HCI002-X3R', 'HCI002-X4C7', 'HCI002-X6', 'HCI002-X6R', 'HCI004-X1', 'HCI004-X1R', 'HCI004-X2', 'HCI004-X2R', 'HCI004-X3', 'HCI004-X4', 'HCI005-X2', 'HCI005-X2R', 'HCI006-X1', 'HCI008-X1', 'HCI008-X1R', 'HCI008-X1R1', 'HCI009-X1', 'HCI009-X1R', 'HCI009-X1R1', 'HCI009-X4', 'HCI009-X4C', 'HCI009-X4CR', 'HCI009-X4R', 'HCI009-X6', 'HCI009-X8', 'HCI010-X0', 'HCI010-X1', 'HCI010-X1R', 'HCI010-X2', 'HCI010-X2R', 'HCI010-X3', 'HCI010-X3C7', 'HCI011-X1', 'HCI014-X0', 'IC006-X0', 'IC007-X0', 'IC007-X1', 'IC007-X3', 'STG139-T', 'STG139-TR', 'STG139-TR1', 'STG139-X0', 'STG139-X12', 'STG139-X13', 'STG139-X13R', 'STG139-X14', 'STG139-X2',

# Methylation

In [7]:
met = pd.read_csv("pdtc/PromoterMethylationSamples.txt", sep="\t", index_col="Gene")
met.index = list(met.index)
met

Unnamed: 0,AB521-N,AB521-T,AB521M-T,AB521M-X1R,AB551-N,AB555-N,AB555-T,AB555-X0,AB559-N,AB559-X2C7,...,VHIO039-X13,VHIO039-X14,VHIO089-X11,VHIO093-X7C7,VHIO098-X11,VHIO102-X7,VHIO102-X9,VHIO124-X13R,VHIO169-X4,VHIO179-X8
A1BG,85.389277,67.852564,73.479491,70.125343,71.111111,84.503968,64.025735,70.104119,76.889719,52.499903,...,71.683139,75.224909,61.428571,16.481482,69.007035,67.785481,37.336310,68.551587,62.619048,60.662229
A1BG-AS1,84.257115,77.237835,82.297932,76.425657,69.730812,82.006897,72.491811,80.026196,74.449855,69.423246,...,77.934896,80.027015,80.307540,8.599034,82.965978,74.209833,56.300305,71.003401,76.958238,53.173346
A1CF,,,,,,,,,,,...,,,,,,,,,,
A2M,,,,,,,,,,,...,,,,,,,,,,
A2M-AS1,30.944923,20.127218,16.944444,12.278258,15.086996,18.082289,1.608560,18.148486,22.907600,1.135531,...,0.000000,3.562091,0.427350,17.031280,0.000000,16.138349,8.172140,0.534188,0.370370,15.850916
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZYG11A,2.645503,20.075758,0.000000,1.769663,8.333333,3.864097,25.262308,33.193277,11.018519,0.000000,...,1.644737,1.973684,0.000000,37.239734,4.000000,2.171717,3.035714,0.000000,0.793651,1.345455
ZYG11B,79.723502,25.471216,20.238095,21.317237,83.116883,29.126984,24.877345,23.171565,22.619048,23.227513,...,45.502645,12.698413,50.739348,14.781746,48.214286,18.657358,60.714286,7.756892,56.368103,19.164835
ZYX,9.212608,5.742691,3.618003,3.364002,7.647185,10.326873,10.209585,9.001303,5.177524,7.170039,...,21.120371,7.662224,9.145833,7.662189,12.247265,5.498908,7.460901,3.610029,3.605776,5.208364
ZZEF1,2.688172,2.546458,0.895979,0.951199,1.397059,2.708333,1.432881,1.570921,0.737752,0.940860,...,1.023392,1.696429,3.854875,1.693548,2.138889,0.850340,0.559180,18.750000,4.370915,1.518743


In [8]:
print(list(met.columns))

['AB521-N', 'AB521-T', 'AB521M-T', 'AB521M-X1R', 'AB551-N', 'AB555-N', 'AB555-T', 'AB555-X0', 'AB559-N', 'AB559-X2C7', 'AB564-X0', 'AB569-N', 'AB572-X0', 'AB580-N', 'AB580-X0', 'AB582-N', 'AB630-N', 'HCI001-X0', 'HCI001-X3', 'HCI001-X4', 'HCI001-X5', 'HCI002-X0', 'HCI002-X6', 'HCI004-X3', 'HCI004-X4', 'HCI005-X2', 'HCI006-X1', 'HCI008-X1', 'HCI009-X1', 'HCI009-X4', 'HCI009-X6', 'HCI009-X7', 'HCI010-C', 'HCI010-X0', 'HCI011-X1', 'HCI012-X0', 'HCI014-X0', 'IC006-X0', 'STG139-X0', 'STG139-X10', 'STG139-X13', 'STG139-X2', 'STG139-X3', 'STG139-X9', 'STG139M-X2R', 'STG139M-X5', 'STG195-X0', 'STG201-T', 'STG201-X2', 'STG201-X6', 'STG282-T', 'STG282-X1', 'STG282-X3', 'STG316-X1', 'STG316-X4', 'STG335-N', 'STG335-T', 'STG335-X1', 'VHIO039-X13', 'VHIO039-X14', 'VHIO089-X11', 'VHIO093-X7C7', 'VHIO098-X11', 'VHIO102-X7', 'VHIO102-X9', 'VHIO124-X13R', 'VHIO169-X4', 'VHIO179-X8']


# CNA

In [9]:
cna = pd.read_csv("pdtc/CNASamples.txt", sep="\t")
cna.index = list(cna.index)
cna

Unnamed: 0,ID,chrom,loc.start,loc.end,num.mark,seg.mean,call
0,AB521-T,1,900001,31900000,274,-0.632732,HETD
1,AB521-T,1,31900001,78000000,441,-0.037411,NEUT
2,AB521-T,1,78000001,115200000,344,-0.508919,HETD
3,AB521-T,1,115200001,119600000,43,0.026450,NEUT
4,AB521-T,1,119600001,120500000,9,0.609891,GAIN
...,...,...,...,...,...,...,...
21200,VHIO244-X4,X,31900001,33300000,13,-0.643000,HETD
21201,VHIO244-X4,X,33300001,52200000,183,-1.645000,HOMD
21202,VHIO244-X4,X,52200001,52500000,3,-10.000000,HOMD
21203,VHIO244-X4,X,52500001,150500000,915,-1.603000,HOMD


In [10]:
cna = pd.read_csv("pdtc/CNAModels.txt", sep="\t")
cna.index = list(cna.Symbol)
cna = cna.drop("Symbol", axis=1)
cna

Unnamed: 0,AB521M,AB551,AB555,AB559,AB580,AB630,CAMBMT1,HCI001,HCI002,HCI004,...,STG335,VHIO039,VHIO089,VHIO093,VHIO098,VHIO102,VHIO124,VHIO131IGFRES,VHIO179,VHIO244
DDX11L1,,,,,,,,,,,...,,,,,,,,,,
WASH7P,,,,,,,,,,,...,,,,,,,,,,
MIR6859-1,LOSS,LOSS,UNKNOWN,UNKNOWN,GAIN,GAIN,LOSS,LOSS,LOSS,UNKNOWN,...,LOSS,LOSS,UNKNOWN,LOSS,LOSS,LOSS,UNKNOWN,UNKNOWN,UNKNOWN,LOSS
MIR6859-2,LOSS,LOSS,UNKNOWN,UNKNOWN,GAIN,GAIN,LOSS,LOSS,LOSS,UNKNOWN,...,LOSS,LOSS,UNKNOWN,LOSS,LOSS,LOSS,UNKNOWN,UNKNOWN,UNKNOWN,LOSS
FAM138A,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TMLHE,NEUT,NEUT,NEUT,GAIN,GAIN,LOSS,LOSS,NEUT,NEUT,LOSS,...,LOSS,LOSS,UNKNOWN,LOSS,LOSS,UNKNOWN,GAIN,NEUT,NEUT,NEUT
SPRY3,LOSS,LOSS,LOSS,UNKNOWN,UNKNOWN,LOSS,LOSS,LOSS,LOSS,LOSS,...,LOSS,LOSS,LOSS,LOSS,LOSS,UNKNOWN,UNKNOWN,LOSS,LOSS,NEUT
VAMP7,LOSS,LOSS,LOSS,LOSS,LOSS,LOSS,LOSS,LOSS,LOSS,LOSS,...,LOSS,LOSS,LOSS,LOSS,LOSS,LOSS,LOSS,LOSS,LOSS,NEUT
IL9R,,,,,,,,,,,...,,,,,,,,,,


In [11]:
tmp = cna.values
np.unique(tmp[~pd.isnull(tmp)])

array(['GAIN', 'LOSS', 'NEUT', 'UNKNOWN'], dtype=object)

# No mutation