# GDSC2 - Docetaxel Drug Sensitivity
## Data Prep

### DepMap Data

In [1]:
import pandas as pd

# Source: https://depmap.org/portal/download/all/?releasename=Sanger+GDSC1+and+GDSC2&filename=sanger-dose-response.csv
depm = pd.read_csv('sanger-dose-response.csv')
depm

Unnamed: 0,DATASET,COSMIC_ID,DRUG_ID,MIN_CONC,MAX_CONC,RMSE_PUBLISHED,Z_SCORE_PUBLISHED,IC50_PUBLISHED,AUC_PUBLISHED,ARXSPAN_ID,DRUG_NAME,BROAD_ID,upper_limit,ec50,slope,lower_limit,auc,log2.ic50,mse,R2
0,GDSC1,683665,1,0.007813,2.0,0.022518,-0.192056,10.977393,0.982116,ACH-002270,ERLOTINIB,BRD-K70401845,0.992788,2.839376e+00,-5.670993,0.514389,0.990834,,0.000034,0.904675
1,GDSC1,684055,1,0.007813,2.0,0.031831,0.505823,23.133991,0.984820,ACH-002104,ERLOTINIB,BRD-K70401845,1.006405,2.864875e-02,-0.186377,0.990054,0.997138,,0.000057,0.028903
2,GDSC1,684057,1,0.007813,2.0,0.026047,1.280750,52.935278,0.985696,ACH-002106,ERLOTINIB,BRD-K70401845,,,,,,,,
3,GDSC1,684059,1,0.007813,2.0,0.110056,0.086028,14.774223,0.972701,ACH-002108,ERLOTINIB,BRD-K70401845,,,,,,,,
4,GDSC1,684062,1,0.007813,2.0,0.087010,-0.114395,11.926884,0.944463,ACH-002111,ERLOTINIB,BRD-K70401845,0.989580,7.580375e-02,-12.222777,0.894027,0.933185,,0.000623,0.777093
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387621,GDSC2,905951,2172,0.009766,10.0,0.143863,0.042524,25.410793,0.877741,ACH-000288,JQ1,"BRD-K54606188, NA",3.929045,3.287745e+11,-0.012190,-3.449953,0.856099,,0.006178,0.292447
387622,GDSC2,906862,2172,0.009766,10.0,0.088631,-2.223819,0.339325,0.510590,ACH-001065,JQ1,"BRD-K54606188, NA",0.935866,4.096968e-01,-2.014115,0.176515,0.585800,-1.073816,0.003198,0.971991
387623,GDSC2,907046,2172,0.009766,10.0,0.114748,-0.578942,7.780877,0.843211,ACH-000930,JQ1,"BRD-K54606188, NA",0.960799,3.695946e-01,-1.161533,0.687792,0.830671,,0.002792,0.795935
387624,GDSC2,749709,2172,0.009766,10.0,0.047625,1.642265,534.688321,0.983634,ACH-000859,JQ1,"BRD-K54606188, NA",0.977260,1.161541e+30,-0.031889,0.900234,0.969870,,0.000879,0.000540


### Cell Line Name Translation

In [2]:
## Translation Table: https://ndownloader.figshare.com/files/20274744
sample_info = pd.read_csv('sample_info.csv')[['DepMap_ID', 'stripped_cell_line_name']]
sample_info

Unnamed: 0,DepMap_ID,stripped_cell_line_name
0,ACH-000001,NIHOVCAR3
1,ACH-000002,HL60
2,ACH-000003,CACO2
3,ACH-000004,HEL
4,ACH-000005,HEL9217
...,...,...
1751,ACH-002397,KMHDASH2
1752,ACH-002458,HT144SKINFV1
1753,ACH-002459,HT144SKINFV3
1754,ACH-002460,HT144SKINFV2


In [3]:
## Get Docetaxel IC50 values
import numpy as np

doce = depm[(depm.DRUG_NAME == 'DOCETAXEL')][['ARXSPAN_ID', 'IC50_PUBLISHED']]
doce['L10_IC_50'] = np.log10(doce['IC50_PUBLISHED'])

## Join the sample info with the Docetaxel data to get appropriate cell line names
doce = doce.set_index('ARXSPAN_ID').join(sample_info.set_index('DepMap_ID'), how='inner')
doce.rename(columns = {'stripped_cell_line_name':'CELL_LINE_NAME', 'IC50_PUBLISHED':'IC_50'}, inplace = True)

doce['CELL_LINE_NAME'] = doce['CELL_LINE_NAME'].str.lower().replace({'-': ''}, regex=True).replace({' ': ''}, regex=True).replace({'.': ''}, regex=False)
doce['ACH_NAME'] = doce.index

doce = doce.drop(['IC_50'], axis='columns').groupby(['CELL_LINE_NAME','ACH_NAME']).mean().reset_index()
doce = doce.set_index('ACH_NAME')

doce

Unnamed: 0_level_0,CELL_LINE_NAME,L10_IC_50
ACH_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1
ACH-002089,201t,-2.583563
ACH-000956,22rv1,-1.664372
ACH-000948,2313287,-2.265796
ACH-000323,42mgba,-2.194771
ACH-001002,451lu,-2.180313
...,...,...
ACH-000332,yapc,-0.244988
ACH-000469,yh13,-2.664806
ACH-000570,ykg1,-3.034370
ACH-002317,yt,-1.808243


### CCLE Data

In [4]:
ccle = pd.read_csv('CCLE_expression.csv')
ccle.columns.values[0] = 'cell_line'

## Replace " (0000)" suffix values in column name
ccle.columns = ccle.columns.str.replace(r" \([0-9]+\)", "")

ccle

  ccle.columns = ccle.columns.str.replace(r" \([0-9]+\)", "")


Unnamed: 0,cell_line,TSPAN6,TNMD,DPM1,SCYL3,C1orf112,FGR,CFH,FUCA2,GCLC,...,ARHGAP11B,AC004593.2,AC090517.4,AL160269.1,ABCF2-H2BE1,POLR2J3,H2BE1,AL445238.1,GET1-SH3BGR,AC113348.1
0,ACH-001113,4.990501,0.000000,7.273702,2.765535,4.480265,0.028569,1.269033,3.058316,6.483171,...,1.214125,0.000000,0.111031,0.150560,1.427606,5.781884,0.000000,0.000000,0.799087,0.000000
1,ACH-001289,5.209843,0.545968,7.070604,2.538538,3.510962,0.000000,0.176323,3.836934,4.200850,...,1.835924,0.000000,0.310340,0.000000,0.807355,4.704319,0.000000,0.000000,0.464668,0.070389
2,ACH-001339,3.779260,0.000000,7.346425,2.339137,4.254745,0.056584,1.339137,6.724241,3.671293,...,1.823749,0.084064,0.176323,0.042644,1.384050,4.931683,0.000000,0.028569,0.263034,0.000000
3,ACH-001538,5.726831,0.000000,7.086189,2.543496,3.102658,0.000000,5.914565,6.099716,4.475733,...,0.871844,0.137504,0.263034,2.485427,0.713696,3.858976,0.000000,0.000000,0.000000,0.000000
4,ACH-000242,7.465648,0.000000,6.435462,2.414136,3.864929,0.831877,7.198003,5.452530,7.112492,...,2.324811,0.163499,0.163499,0.000000,1.117695,4.990501,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1388,ACH-000750,4.173127,0.000000,6.400879,1.807355,3.303050,0.014355,0.137504,4.944858,4.528571,...,1.280956,0.176323,0.084064,0.000000,2.615887,5.102658,0.000000,0.097611,0.214125,0.000000
1389,ACH-000285,0.097611,0.000000,6.428276,3.257011,4.980482,0.411426,0.124328,0.704872,4.829342,...,2.939227,0.000000,0.014355,0.000000,3.326250,6.341630,0.000000,0.000000,0.310340,0.000000
1390,ACH-001858,5.045268,0.000000,6.991749,1.807355,3.270529,0.028569,3.333424,6.819796,5.395063,...,1.510962,0.056584,0.097611,0.286881,1.925999,4.607626,0.111031,0.000000,1.090853,0.000000
1391,ACH-001997,5.805292,0.000000,7.792855,2.482848,3.903038,0.028569,4.124328,6.816600,4.458119,...,0.757023,0.150560,0.432959,0.111031,0.613532,4.787119,0.000000,0.163499,0.084064,0.000000


In [5]:
## Join Together
doce_ccle = doce.join(ccle.set_index('cell_line'), how='inner').set_index('CELL_LINE_NAME')

doce_ccle

Unnamed: 0_level_0,L10_IC_50,TSPAN6,TNMD,DPM1,SCYL3,C1orf112,FGR,CFH,FUCA2,GCLC,...,ARHGAP11B,AC004593.2,AC090517.4,AL160269.1,ABCF2-H2BE1,POLR2J3,H2BE1,AL445238.1,GET1-SH3BGR,AC113348.1
CELL_LINE_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22rv1,-1.664372,2.643856,0.0,6.219556,3.418190,4.659925,0.014355,0.111031,5.846243,7.057883,...,2.111031,0.056584,0.042644,0.056584,1.124328,5.069960,0.0,0.000000,1.803227,0.0
2313287,-2.265796,2.985500,0.0,6.778734,4.130931,3.778209,0.000000,0.298658,7.433794,6.689299,...,1.735522,0.344828,0.367371,0.124328,2.192194,4.358959,0.0,0.042644,0.097611,0.0
42mgba,-2.194771,4.574707,0.0,6.632414,1.937344,3.401903,0.028569,0.575312,5.775577,3.320485,...,2.477677,1.220330,0.111031,0.000000,2.841973,3.615887,0.0,0.000000,1.189034,0.0
5637,-2.816851,5.868637,0.0,6.636045,2.046142,4.996389,0.176323,1.655352,6.200457,3.498251,...,0.773996,0.028569,0.137504,0.070389,1.176323,6.022812,0.0,0.000000,0.056584,0.0
639v,-2.370916,5.026800,0.0,6.966130,1.899176,3.531069,0.000000,3.910733,6.371385,4.693208,...,1.952334,0.042644,0.042644,0.042644,3.104337,5.934281,0.0,0.000000,0.378512,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wsunhl,-2.450142,0.000000,0.0,6.231125,2.684819,4.898208,3.026800,0.028569,3.347666,3.772941,...,2.533563,0.000000,0.056584,0.000000,1.906891,5.387500,0.0,0.056584,0.400538,0.0
yapc,-0.244988,5.649328,0.0,6.674828,3.085765,3.790772,0.226509,3.878725,6.167519,5.101818,...,2.084064,0.201634,0.000000,0.000000,2.375735,4.361768,0.0,0.000000,0.333424,0.0
yh13,-2.664806,4.863938,0.0,5.977967,2.553361,3.772941,0.275007,7.178814,6.867279,3.959770,...,3.310340,0.000000,0.000000,0.000000,3.748461,6.263973,0.0,0.000000,0.111031,0.0
ykg1,-3.034370,5.914086,0.0,6.749668,2.809414,4.175525,0.176323,5.859224,6.535275,4.598127,...,2.263034,0.163499,0.097611,0.495695,3.379898,6.094869,0.0,0.000000,1.028569,0.0


In [6]:
doce_ccle.to_csv('../docetaxel_sensitivity.csv')