In [1]:
# import libraries

import os
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import mygene
import sklearn.preprocessing as sk
import seaborn as sns
from sklearn import metrics
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from torch.utils.data.sampler import WeightedRandomSampler
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score, precision_recall_curve
import random
from random import randint
from sklearn.model_selection import StratifiedKFold
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

In [2]:
# define directories

cell_line_dir = "/Volumes/Expansion/Thesis Work/Supplementary Files/GDSC/"
models_dir = "/Volumes/Expansion/Thesis Work/Results/Transfer Learning/Drugs with same pathways/PDX_EGFRi/Expression_Mutation/Models/"
DEGs_dir = "/Volumes/Expansion/Thesis Work/Results/GDSC_DEGs_inhibitors/EGFRi/"
dataset_dir = "/Volumes/Expansion/Thesis Work/Results/preprocessed_results2/"
pdx_data_dir = "/Volumes/Expansion/Thesis Work/Datasets/PDX/Expression/"
save_results_to = "/Volumes/Expansion/Thesis Work/Results/Transfer Learning/Drugs with same pathways/PDX_EGFRi/Expression_Mutation/Predictions/"

In [3]:
# set random seeds

torch.manual_seed(42)
random.seed(42)

In [4]:
# change directory to read GDSC cell line details

os.chdir(cell_line_dir)
print(os.getcwd())

/Volumes/Expansion/Thesis Work/Supplementary Files/GDSC


In [5]:
# read GDSC cell line details table

GDSC_cell_line_details = pd.read_excel("GDSC_Cell_Lines_Details.xlsx", keep_default_na = False)
GDSC_cell_line_details.set_index("COSMIC identifier", inplace = True)
GDSC_cell_line_details.tail()

Unnamed: 0_level_0,Sample Name,Whole Exome Sequencing (WES),Copy Number Alterations (CNA),Gene Expression,Methylation,Drug\nResponse,GDSC\nTissue descriptor 1,GDSC\nTissue\ndescriptor 2,Cancer Type\n(matching TCGA label),Microsatellite \ninstability Status (MSI),Screen Medium,Growth Properties
COSMIC identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1298218.0,KP-2,Y,N,Y,N,Y,pancreas,pancreas,PAAD,,D/F12,Adherent
1330932.0,KO52,Y,Y,N,N,N,leukemia,acute_myeloid_leukaemia,LAML,MSS/MSI-L,D/F12,
1331030.0,SC-1,Y,Y,N,N,N,lymphoma,B_cell_lymphoma,DLBC,MSS/MSI-L,R,
1503373.0,U-CH2,Y,Y,N,N,N,bone,bone_other,,MSS/MSI-L,D/F12,Adherent
,TOTAL:,1001,996,968,957,990,,,,,,


In [6]:
# change directory to read GDSC expression dataset (EGFRi)

os.chdir(dataset_dir + "/exprs/")
print(os.getcwd())

/Volumes/Expansion/Thesis Work/Results/preprocessed_results2/exprs


In [7]:
# read GDSC expression dataset (EGFRi)

GDSCE = pd.read_csv("GDSC_exprs.z.EGFRi.tsv", 
                    sep = "\t", index_col=0, decimal = ",")
GDSCE = pd.DataFrame.transpose(GDSCE)
GDSCE.head(3)

ENTREZID,1,2,9,10,12,13,14,15,16,18,...,107984199,107984208,107984325,107984411,107984648,107984889,107984989,107986313,107986782,107986810
683665,-0.4711563767951986,-0.1857522957766964,0.9702409548662462,-0.4081281805960821,-0.4161511169999803,-0.4384159723467665,-1.154692246005183,0.5262132484107975,-0.7950924576033422,-0.6337442716864214,...,1.283534690590172,-0.3367265464586691,-0.0039762988723447,0.7378729872604209,0.9757614264575688,0.1494417647095801,2.062294063911948,-0.8140636761719959,0.2870333828833719,0.9524265085537148
683667,0.8924335646616343,-0.2118047645974752,-1.5249963829654445,-0.5512338119420259,-0.1375180480622428,-0.5276262991742051,-0.0305604198225781,0.5226216566753464,0.4877783097403741,0.2252883573642275,...,-0.0156845434045267,-0.1612071417519457,1.2033249282722671,2.7714836839186687,-0.3165072496327769,-0.8849200030571541,-0.5104734344593952,0.2984361940989172,-0.4349407065496763,0.4747633022523675
684057,0.6510004960254864,1.6578876382433665,-0.4977106390881842,-0.1212172092933644,-0.398455281697677,-0.2804901106672752,0.6455898116420014,0.215833091774836,-0.1723595788875009,-0.2170870770552223,...,-0.024863133395904,0.4073439017665206,-0.4440356638831038,0.8630347078663075,1.4486171428489227,-1.4777214830771732,0.5220400480464272,0.6441211958947753,0.9057518606131394,0.3018783753479829


In [8]:
# change directory to read GDSC mutation dataset (EGFRi)

os.chdir(dataset_dir + "/mutations/")
print(os.getcwd())

/Volumes/Expansion/Thesis Work/Results/preprocessed_results2/mutations


In [9]:
# read GDSC mutation dataset (EGFRi)

GDSCM = pd.read_csv("GDSC_mutations.EGFRi.tsv",
                    sep="\t", index_col=0, decimal=".")
GDSCM.drop_duplicates(keep='last')
GDSCM = pd.DataFrame.transpose(GDSCM)
GDSCM = GDSCM.loc[:, ~GDSCM.columns.duplicated()]

GDSCM.head()

Unnamed: 0,143872,728577,2,2050,10243,79365,256006,4609,118788,113675,...,26517,28972,53917,283507,514,339210,64981,7335,5739,83442
683665,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
683667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
684057,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
684059,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
684062,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
# change directory to read GDSC response dataset (EGFRi)

os.chdir(dataset_dir + "/response/")
print(os.getcwd())

/Volumes/Expansion/Thesis Work/Results/preprocessed_results2/response


In [11]:
# read GDSC response dataset (EGFRi)

GDSCR = pd.read_csv("GDSC_response.EGFRi.tsv",
                    sep="\t",
                    index_col=0,
                    decimal=",")
GDSCR.dropna(inplace=True)
GDSCR.rename(mapper=str, axis='index', inplace=True)
d = {"R": 0, "S": 1}
GDSCR["response"] = GDSCR.loc[:, "response"].apply(lambda x: d[x])

GDSCR.head()

Unnamed: 0_level_0,response,logIC50,drug,exprs,CNA,mutations
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
683665,0,1.49001536063508,Afatinib,1,1,1
683665,0,3.70042086392846,Afatinib (rescreen),1,1,1
683665,0,6.29444657935625,Cetuximab,1,1,1
683665,0,-1.50094791622811,CUDC-101,1,1,1
683665,1,-0.966773876200563,EKB-569,1,1,1


In [12]:
# print count of cell lines for each drug (EGFRi)

GDSCR["drug"].value_counts()

drug
EKB-569                770
Afatinib (rescreen)    757
CUDC-101               754
Afatinib               738
Gefitinib              734
Cetuximab              725
HG-5-88-01             422
Lapatinib              371
Erlotinib              362
Name: count, dtype: int64

In [13]:
# change directory to read PDX expression dataset homogenized with GDSC expression dataset (Cetuximab)

os.chdir(dataset_dir + "/exprs_homogenized/")
print(os.getcwd())

/Volumes/Expansion/Thesis Work/Results/preprocessed_results2/exprs_homogenized


In [14]:
# read PDX expression dataset homogenized with GDSC expression dataset (Cetuximab)

PDXEcetuximab = pd.read_csv("PDX_exprs.Cetuximab.eb_with.GDSC_exprs.Cetuximab.tsv",
                   sep = "\t", index_col=0, decimal = ",")
PDXEcetuximab = pd.DataFrame.transpose(PDXEcetuximab)
PDXEcetuximab.head(3)

ENTREZID,1,2,9,10,12,13,14,15,16,18,...,100507206,100507254,100507436,100507472,100526773,100527978,100532746,100820829,102724473,105375355
X-1027,3.60061063639454,3.07817969365007,6.32765157689639,2.93005596217687,2.64304491832882,2.56618657438719,8.09168714940745,2.55483284192774,8.59989127217117,2.80664192172726,...,2.5997416042245,3.06591431154376,5.50994883395808,2.93037262394265,2.93460472043208,2.44987572619243,2.48989773232543,3.88458609037183,3.95227909263729,2.46055443755884
X-1119,3.54613916595946,3.07817969365007,7.46956729139981,4.20521626673769,2.929569339555,5.57576806896234,8.20319464923893,2.41443776874882,7.7754694329959,3.34200309260696,...,2.67274979347372,3.11395605082,8.98094314882696,2.93037262394265,2.93460472043208,2.44987572619243,2.48989773232543,4.81457724903364,3.95227909263729,2.84309989182372
X-1156,6.99909567844156,8.47130432034951,6.15135176832441,2.36952374925443,3.6713478754664,2.53437362934482,8.43809032488933,2.53376073162296,7.11948011162002,3.28816715734563,...,2.72150643954151,3.17396413482904,6.58328775150283,2.93037262394265,2.93460472043208,2.44987572619243,2.48989773232543,4.74727922278015,4.15062701412473,2.46987656241725


In [15]:
# change directory to read PDX mutation dataset (Cetuximab)

os.chdir(dataset_dir + "mutations")
os.getcwd()

'/Volumes/Expansion/Thesis Work/Results/preprocessed_results2/mutations'

In [16]:
# read PDX mutation dataset (Cetuximab)

PDXMcetuximab = pd.read_csv("PDX_mutations.Cetuximab.tsv",
                        sep="\t", index_col=0, decimal=",")
PDXMcetuximab.drop_duplicates(keep='last')
PDXMcetuximab = pd.DataFrame.transpose(PDXMcetuximab)
PDXMcetuximab = PDXMcetuximab.loc[:, ~PDXMcetuximab.columns.duplicated()]

PDXMcetuximab.head(3)

ENTREZID,3075.0,171017.0,2058.0,79890.0,7701.0,3104.0,4642.0,9778.0,100132406.0,23607.0,...,253738.0,154807.0,79140.0,6307.0,6352.0,56005.0,115650.0,64946.0,5777.0,253639.0
X-1027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
X-1119,0.0,-0.05,0.0,-0.05,0.0,0.0,0.0,0.0,-0.05,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
X-1156,0.0,-0.05,0.0,-0.05,0.0,0.0,0.0,-0.05,-0.05,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
# variance threshold for GDSC expression dataset (EGFRi)

selector = VarianceThreshold(0.05)
selector.fit_transform(GDSCE)
GDSCE = GDSCE[GDSCE.columns[selector.get_support(indices=True)]]

In [18]:
# fill NA values and binarize GDSC mutation dataset (EGFRi)

GDSCM = GDSCM.fillna(0)
GDSCM[GDSCM != 0.0] = 1

In [19]:
# select shared genes between GDSC and PDX expression and mutation datasets 

ls = GDSCE.columns.intersection(GDSCM.columns)
ls = ls.intersection(PDXEcetuximab.columns)
ls = ls.intersection(PDXMcetuximab.columns)

ls = pd.unique(ls)

print(f"GDSC and PDX # of common genes: {ls.shape[0]}")

GDSC and PDX # of common genes: 13861


In [20]:
# select shared samples between GDSC expression, mutation, and response datasets (EGFRi)

ls2 = GDSCE.index.intersection(GDSCM.index)
ls2 = ls2.intersection(GDSCR.index)

print(f"GDSC # of common cell lines: {ls2.shape[0]}")

GDSC # of common cell lines: 800


In [21]:
# subset shared genes and samples in GDSC expression, mutation, and response datasets

GDSCE = GDSCE.loc[ls2, ls]
GDSCM = GDSCM.loc[ls2, ls]
GDSCR = GDSCR.loc[ls2, :]

print(f"GDSC # of common samples and genes (Expression): {GDSCE.shape}")
print(f"GDSC # of common samples and genes (Mutation): {GDSCM.shape}")
print(f"GDSC # of samples (Response): {GDSCR.shape[0]}")

# There exists same sample names for different drugs, so row shape is different in GDSC response data

GDSC # of common samples and genes (Expression): (800, 13861)
GDSC # of common samples and genes (Mutation): (800, 13861)
GDSC # of samples (Response): 5633


In [22]:
# select shared samples between PDX expression and mutation datasets (Cetuximab) 

ls3 = PDXEcetuximab.index.intersection(PDXMcetuximab.index)

print(f"PDX # of common samples (Cetuximab): {ls3.shape[0]}")

PDX # of common samples (Cetuximab): 60


In [23]:
# select shared genes and samples between PDX expression and mutation datasets (Cetuximab)

PDXEcetuximab = PDXEcetuximab.loc[ls3, ls]
PDXMcetuximab = PDXMcetuximab.loc[ls3, ls]

print(f"PDX # of common samples and genes for Expression (Cetuximab): {PDXEcetuximab.shape}")
print(f"PDX # of common samples and genes for Mutation (Cetuximab): {PDXMcetuximab.shape}\n")

PDX # of common samples and genes for Expression (Cetuximab): (60, 13861)
PDX # of common samples and genes for Mutation (Cetuximab): (60, 13861)



In [24]:
# change directory to read DEGs (EGFRi)

os.chdir(DEGs_dir)
print(os.getcwd())

/Volumes/Expansion/Thesis Work/Results/GDSC_DEGs_inhibitors/EGFRi


In [25]:
# read DEGs (EGFRi)

DEGs_filtered_data = pd.read_excel("EGFRi_Differentially_Expressed_Genes (EnsemblID).xlsx",
                                    sheet_name = "Common DEGs")
print(f"There are {DEGs_filtered_data.shape[0]} DEGs (p-adjusted < 0.05 and |logFC| > 1) among EGFR signaling inhibitors\n")

DEGs_filtered_data.head()

There are 1028 DEGs (p-adjusted < 0.05 and |logFC| > 1) among EGFR signaling inhibitors



Unnamed: 0,Gene Symbol,Frequency
0,MYOF,9
1,TM4SF1,8
2,KRT18,8
3,WWC1,8
4,CAV1,8


In [26]:
# get gene symbol from mygene database

mg = mygene.MyGeneInfo()
DEGs_entrez_id = mg.querymany(DEGs_filtered_data["Gene Symbol"], 
                              species="human", 
                              scopes = "symbol",
                              field = "entrezgene",
                              as_dataframe=True)["entrezgene"]
DEGs_entrez_id = pd.unique(DEGs_entrez_id.dropna())
DEGs_entrez_id = pd.Series(DEGs_entrez_id)

result = mg.query("SLC22A18", species="human", scopes="symbol", fields="entrezgene")
DEGs_entrez_id = pd.concat([DEGs_entrez_id, pd.Series(str(result["hits"][1]["entrezgene"]))], ignore_index=True)
DEGs_entrez_id = pd.Series(DEGs_entrez_id)

result = mg.query("SLC22A18", species="human", scopes="symbol", fields="entrezgene")
DEGs_entrez_id = pd.concat([DEGs_entrez_id, pd.Series(str(result["hits"][1]["entrezgene"]))], ignore_index=True)

DEGs_entrez_id.shape[0]

2 input query terms found dup hits:	[('CLECL1P', 2), ('CAST', 2)]
1 input query terms found no hit:	['SLC22A18']


1029

In [27]:
# assign GDSC datasets to new variables

exprs_z = GDSCE
mut = GDSCM
responses = GDSCR

In [28]:
# GDSC response dataset

responses.head(3)

Unnamed: 0,response,logIC50,drug,exprs,CNA,mutations
683665,0,1.49001536063508,Afatinib,1,1,1
683665,0,3.70042086392846,Afatinib (rescreen),1,1,1
683665,0,6.29444657935625,Cetuximab,1,1,1


In [29]:
# list drug names in GDSC response dataset

drugs = set(responses["drug"].values)
drugs

{'Afatinib',
 'Afatinib (rescreen)',
 'CUDC-101',
 'Cetuximab',
 'EKB-569',
 'Erlotinib',
 'Gefitinib',
 'HG-5-88-01',
 'Lapatinib'}

In [30]:
# convert Entrez gene IDs to integer

responses.index = responses.index.astype(int)
responses.index

Index([683665, 683665, 683665, 683665, 683665, 683665, 683665, 683665, 683665,
       683667,
       ...
       910930, 910941, 910941, 924248, 924248, 924248, 906849, 906849, 949162,
       949162],
      dtype='int64', length=5633)

In [31]:
# merge GDSC cell line details with response dataset

dat = pd.merge(GDSC_cell_line_details,
         responses,
         left_index = True, 
         right_index=True,
         how = "inner")

dat["GDSC\nTissue descriptor 1"].value_counts()
#dat.head()

GDSC\nTissue descriptor 1
lung_NSCLC           594
leukemia             538
urogenital_system    531
lymphoma             487
aero_dig_tract       408
nervous_system       369
lung_SCLC            347
skin                 334
breast               299
large_intestine      279
digestive_system     268
bone                 261
neuroblastoma        212
pancreas             154
kidney               138
myeloma              113
soft_tissue          112
lung                  99
thyroid               90
Name: count, dtype: int64

In [32]:
# list count of cell lines as to tissues treated with cetuximab

filter = (dat["drug"] == "Cetuximab")
dat.loc[filter,"GDSC\nTissue descriptor 1"].value_counts()

GDSC\nTissue descriptor 1
lung_NSCLC           87
urogenital_system    76
leukemia             66
lymphoma             56
aero_dig_tract       56
nervous_system       47
skin                 44
breast               41
lung_SCLC            39
digestive_system     37
bone                 34
large_intestine      33
pancreas             20
kidney               19
neuroblastoma        19
soft_tissue          16
myeloma              13
lung                 11
thyroid              11
Name: count, dtype: int64

In [33]:
# convert Entrez gene IDs to string

responses.index = responses.index.astype(str)
responses.index

Index(['683665', '683665', '683665', '683665', '683665', '683665', '683665',
       '683665', '683665', '683667',
       ...
       '910930', '910941', '910941', '924248', '924248', '924248', '906849',
       '906849', '949162', '949162'],
      dtype='object', length=5633)

In [34]:
# filter GDSC expression and mutation datasets (EGFRi) as to drugs
# subset selected DEGs

expression_zscores = []
mutations = []
for drug in drugs:
    samples = responses.loc[responses["drug"] == drug, :].index.values
    e_z = exprs_z.loc[samples, :]
    m = mut.loc[samples, :]
    expression_zscores.append(e_z)
    mutations.append(m)

GDSCEv2 = pd.concat(expression_zscores, axis=0)
GDSCMv2 = pd.concat(mutations, axis=0)
GDSCRv2 = responses

ls4 = list(set(GDSCE.columns).intersection(set(DEGs_entrez_id.astype(int))))
ls5 = GDSCEv2.index.intersection(GDSCMv2.index)

GDSCEv2 = GDSCEv2.loc[ls5, ls4]
GDSCMv2 = GDSCMv2.loc[ls5, ls4]
GDSCRv2 = GDSCRv2.loc[ls5, :]

PDXEcetuximab = PDXEcetuximab.loc[:,ls4]
PDXMcetuximab = PDXMcetuximab.loc[:,ls4]

responses.index = responses.index.values + "_" + responses["drug"].values

print(f"GDSC # of common samples and genes (Expression): {GDSCEv2.shape}")
print(f"GDSC # of common samples and genes (Mutation): {GDSCMv2.shape}")
print(f"GDSC # of common samples (Response): {GDSCRv2.shape[0]}\n")

print(f"PDX # of common samples and genes for Cetuximab (Expression): {PDXEcetuximab.shape}")
print(f"PDX # of common samples and genes for Cetuximab (Mutation): {PDXMcetuximab.shape}\n")

GDSC # of common samples and genes (Expression): (5633, 873)
GDSC # of common samples and genes (Mutation): (5633, 873)
GDSC # of common samples (Response): 5633

PDX # of common samples and genes for Cetuximab (Expression): (60, 873)
PDX # of common samples and genes for Cetuximab (Mutation): (60, 873)



In [35]:
# GDSC response values

Y = GDSCRv2['response'].values
print(Y)

[0 0 0 ... 0 0 0]


In [36]:
# change directory to read PDX response dataset (Cetuximab)

os.chdir(dataset_dir + "response")
os.getcwd()

'/Volumes/Expansion/Thesis Work/Results/preprocessed_results2/response'

In [37]:
# read PDX response dataset (Cetuximab)

PDXRcetuximab = pd.read_csv("PDX_response.Cetuximab.tsv", 
                      sep = "\t", 
                      index_col=0, 
                      decimal = ",")
PDXRcetuximab.dropna(inplace=True)
PDXRcetuximab.rename(mapper=str, axis='index', inplace=True)
d = {"R": 0, "S": 1}
PDXRcetuximab["response"] = PDXRcetuximab.loc[:, "response"].apply(lambda x: d[x])
Ytscetuximab = PDXRcetuximab["response"].values    

print(f"There are {Ytscetuximab.shape[0]} samples with response data in the PDX (Cetuximab)\n")
Ytscetuximab

There are 60 samples with response data in the PDX (Cetuximab)



array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [38]:
# read PDX dataset to select cohorts (Cetuximab)

pdx_cohort = pd.read_excel(pdx_data_dir + "/nm.3954-S2.xlsx",
                              "PCT raw data")
pdx_cohort = pdx_cohort.loc[pdx_cohort["Treatment"] == "cetuximab",]
pdx_cohort = pdx_cohort.loc[:,["Model","Tumor Type"]]
pdx_cohort = pdx_cohort.drop_duplicates()
pdx_cohort.set_index("Model", inplace = True) 
pdx_cohort.head()

Unnamed: 0_level_0,Tumor Type
Model,Unnamed: 1_level_1
X-0933,CRC
X-1027,CRC
X-1055,CRC
X-1119,CRC
X-1156,NSCLC


In [39]:
# merge PDX cohort table with PDX response dataset (Cetuximab)

dat_pdx = pd.merge(pdx_cohort,
                   PDXRcetuximab,
                   left_index = True,
                   right_index = True)
dat_pdx["Tumor Type"].value_counts()

Tumor Type
CRC      36
NSCLC    24
Name: count, dtype: int64

In [40]:
# filter CRC cohort samples (Cetuximab)

filter = (dat_pdx["Tumor Type"] == "CRC")
pdx_crc_samples = dat_pdx.loc[filter,].index
pdx_crc_samples

Index(['X-1027', 'X-1119', 'X-1167', 'X-1173', 'X-1270', 'X-1290', 'X-1303',
       'X-1329', 'X-1443', 'X-1479', 'X-1500', 'X-1536', 'X-1538', 'X-2145',
       'X-2182', 'X-2239', 'X-2403', 'X-2483', 'X-2484', 'X-2538', 'X-2573',
       'X-2659', 'X-2822', 'X-2846', 'X-2861', 'X-3093', 'X-3205', 'X-3224',
       'X-3671', 'X-3792', 'X-4087', 'X-5254', 'X-5405', 'X-5446', 'X-5495',
       'X-5578'],
      dtype='object')

In [41]:
# filter responses of CRC cohort samples (Cetuximab)

Ytscetuximab_crc = PDXRcetuximab.loc[pdx_crc_samples,"response"].values
Ytscetuximab_crc

array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])

In [42]:
# count of sensitive and resistance CRC samples (Cetuximab)

PDXRcetuximab.loc[pdx_crc_samples,"response"].value_counts()

response
0    32
1     4
Name: count, dtype: int64

In [43]:
# filter NSCLC cohort samples (Cetuximab)

filter = (dat_pdx["Tumor Type"] == "NSCLC")
pdx_nsclc_samples = dat_pdx.loc[filter,].index
pdx_nsclc_samples

Index(['X-1156', 'X-1172', 'X-1323', 'X-1442', 'X-1499', 'X-1586', 'X-1658',
       'X-1725', 'X-1787', 'X-1823', 'X-1834', 'X-1835', 'X-1934', 'X-1980',
       'X-1993', 'X-2017', 'X-2042', 'X-2082', 'X-2088', 'X-2094', 'X-3029',
       'X-3237', 'X-3843', 'X-4819'],
      dtype='object')

In [44]:
# filter responses of NSCLC cohort samples (Cetuximab)

Ytscetuximab_nsclc = PDXRcetuximab.loc[pdx_nsclc_samples,"response"].values
Ytscetuximab_nsclc

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0])

In [45]:
# count of sensitive and resistance NSCLC samples (Cetuximab)

PDXRcetuximab.loc[pdx_nsclc_samples,"response"].value_counts()

response
0    23
1     1
Name: count, dtype: int64

In [46]:
# define maximum iteration

max_iter = 50

In [47]:
# assign number of neurons and dropout rates found for layers from the saved models

hdm1 = 64
hdm2 = 32
rate1 = 0.5
rate2 = 0.6
rate3 = 0.6

In [48]:
# load pre-trained models and make predictions

scalerGDSC = sk.StandardScaler()
scalerGDSC.fit(GDSCEv2.values)
X_trainE = scalerGDSC.transform(GDSCEv2.values)
X_testEcetuximab = scalerGDSC.transform(PDXEcetuximab.values) 
X_testEcetuximab_crc = scalerGDSC.transform(PDXEcetuximab.loc[pdx_crc_samples,].values) 
X_testEcetuximab_nsclc = scalerGDSC.transform(PDXEcetuximab.loc[pdx_nsclc_samples,].values) 

X_trainM = np.nan_to_num(GDSCMv2.values)
X_testMcetuximab = np.nan_to_num(PDXMcetuximab.values.astype(np.float32))
X_testMcetuximab_crc = np.nan_to_num(PDXMcetuximab.loc[pdx_crc_samples,].values.astype(np.float32))
X_testMcetuximab_nsclc = np.nan_to_num(PDXMcetuximab.loc[pdx_nsclc_samples,].values.astype(np.float32))

TX_testEcetuximab = torch.FloatTensor(X_testEcetuximab)
TX_testEcetuximab_crc = torch.FloatTensor(X_testEcetuximab_crc)
TX_testEcetuximab_nsclc = torch.FloatTensor(X_testEcetuximab_nsclc)

TX_testMcetuximab = torch.FloatTensor(X_testMcetuximab.astype(np.float32))
TX_testMcetuximab_crc = torch.FloatTensor(X_testMcetuximab_crc.astype(np.float32))
TX_testMcetuximab_nsclc = torch.FloatTensor(X_testMcetuximab_nsclc.astype(np.float32))

ty_testEcetuximab = torch.FloatTensor(Ytscetuximab.astype(int))
ty_testEcetuximab_crc = torch.FloatTensor(Ytscetuximab_crc.astype(int))
ty_testEcetuximab_nsclc = torch.FloatTensor(Ytscetuximab_nsclc.astype(int))

n_sampE, IE_dim = X_trainE.shape
n_sampM, IM_dim = X_trainM.shape

h_dim1 = hdm1
h_dim2 = hdm2
Z_in = h_dim1 + h_dim2 

class AEE(nn.Module):
    def __init__(self):
        super(AEE, self).__init__()
        self.EnE = torch.nn.Sequential(
            nn.Linear(IE_dim, h_dim1),
            nn.BatchNorm1d(h_dim1),
            nn.ReLU(),
            nn.Dropout(rate1))
    def forward(self, x):
        output = self.EnE(x)
        return output  


class AEM(nn.Module):
    def __init__(self):
        super(AEM, self).__init__()
        self.EnM = torch.nn.Sequential(
            nn.Linear(IM_dim, h_dim2),
            nn.BatchNorm1d(h_dim2),
            nn.ReLU(),
            nn.Dropout(rate2))
    def forward(self, x):
        output = self.EnM(x)
        return output       

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.FC = torch.nn.Sequential(
            nn.Linear(Z_in, 1),
            nn.Dropout(rate3),
            nn.Sigmoid())
    def forward(self, x):
        return self.FC(x)

torch.cuda.manual_seed_all(42)

AutoencoderE = torch.load(models_dir + "Exprs_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation.pt")
AutoencoderM = torch.load(models_dir + "Mut_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation.pt")

Clas = torch.load(models_dir + "Class_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation.pt")

AutoencoderE.eval()
AutoencoderM.eval()
Clas.eval()

ZEX = AutoencoderE(torch.FloatTensor(X_trainE))
ZMX = AutoencoderM(torch.FloatTensor(X_trainM))
ZTX = torch.cat((ZEX, ZMX), 1)
ZTX = F.normalize(ZTX, p=2, dim=0)
PredX = Clas(ZTX)
AUCT = roc_auc_score(Y, PredX.detach().numpy())
#print(f"GDSC - Training AUC (PDX_EGFRi): {round(AUCT,2)}")
AUCTaucpr = average_precision_score(Y, PredX.detach().numpy())
#print(f"GDSC - Training AUCPR (PDX_EGFRi): {round(AUCTaucpr,2)}\n")

ZETcetuximab = AutoencoderE(TX_testEcetuximab)
ZMTcetuximab = AutoencoderM(TX_testMcetuximab)
ZTTcetuximab = torch.cat((ZETcetuximab, ZMTcetuximab), 1)
ZTTcetuximab = F.normalize(ZTTcetuximab, p=2, dim=0)
PredTcetuximab = Clas(ZTTcetuximab)
AUCTcetuximab = roc_auc_score(Ytscetuximab.astype(int), PredTcetuximab.detach().numpy())
#print(f"PDX - Test AUC (Cetuximab): {round(AUCTcetuximab,2)}")
AUCTcetuximab_aucpr = average_precision_score(Ytscetuximab.astype(int), PredTcetuximab.detach().numpy())
#print(f"PDX - Test AUCPR (Cetuximab): {round(AUCTcetuximab_aucpr,2)}\n")

ZETcetuximab_crc = AutoencoderE(TX_testEcetuximab_crc)
ZMTcetuximab_crc = AutoencoderM(TX_testMcetuximab_crc)
ZTTcetuximab_crc = torch.cat((ZETcetuximab_crc, ZMTcetuximab_crc), 1)
ZTTcetuximab_crc = F.normalize(ZTTcetuximab_crc, p=2, dim=0)
PredTcetuximab_crc = Clas(ZTTcetuximab_crc)
AUCTcetuximab_crc = roc_auc_score(Ytscetuximab_crc.astype(int), PredTcetuximab_crc.detach().numpy())
#print(f"PDX - Test AUC (Cetuximab, CRC): {round(AUCTcetuximab_crc,2)}")
AUCTcetuximab_crc_aucpr = average_precision_score(Ytscetuximab_crc.astype(int), PredTcetuximab_crc.detach().numpy())
#print(f"PDX - Test AUCPR (Cetuximab, CRC): {round(AUCTcetuximab_crc_aucpr,2)}\n")

ZETcetuximab_nsclc = AutoencoderE(TX_testEcetuximab_nsclc)
ZMTcetuximab_nsclc = AutoencoderM(TX_testMcetuximab_nsclc)
ZTTcetuximab_nsclc = torch.cat((ZETcetuximab_nsclc, ZMTcetuximab_nsclc), 1)
ZTTcetuximab_nsclc = F.normalize(ZTTcetuximab_nsclc, p=2, dim=0)
PredTcetuximab_nsclc = Clas(ZTTcetuximab_nsclc)
AUCTcetuximab_nsclc = roc_auc_score(Ytscetuximab_nsclc.astype(int), PredTcetuximab_nsclc.detach().numpy())
#print(f"PDX - Test AUC (Cetuximab, NSCLC): {round(AUCTcetuximab_nsclc,2)}")
AUCTcetuximab_nsclc_aucpr = average_precision_score(Ytscetuximab_nsclc.astype(int), PredTcetuximab_nsclc.detach().numpy())
#print(f"PDX - Test AUCPR (Cetuximab, NSCLC): {round(AUCTcetuximab_nsclc_aucpr,2)}\n")

AUCT_finetuned = dict()
AUCTaucpr_finetuned = dict()
AUCTcetuximab_finetuned = dict()
AUCTcetuximab_crc_finetuned = dict()
AUCTcetuximab_nsclc_finetuned = dict()
AUCTcetuximab_aucpr_finetuned = dict()
AUCTcetuximab_crc_aucpr_finetuned = dict()
AUCTcetuximab_nsclc_aucpr_finetuned = dict()
for iter in range(max_iter):
   iter += 1  

   AutoencoderE_finetuned = torch.load(models_dir + f"Finetuned_Models/Exprs_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation_Finetuned_{iter}.pt")
   AutoencoderM_finetuned = torch.load(models_dir + f"Finetuned_Models/Mut_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation_Finetuned_{iter}.pt")
   Clas_finetuned = torch.load(models_dir + f"Finetuned_Models/Class_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation_Finetuned_{iter}.pt")
   
   AutoencoderE_finetuned.eval()
   AutoencoderM_finetuned.eval()
   Clas_finetuned.eval()
      
   ZEX = AutoencoderE_finetuned(torch.FloatTensor(X_trainE))
   ZMX = AutoencoderM_finetuned(torch.FloatTensor(X_trainM))
   ZTX = torch.cat((ZEX, ZMX), 1)
   ZTX = F.normalize(ZTX, p=2, dim=0)
   PredX = Clas_finetuned(ZTX)
   AUCT_finetuned[iter] = roc_auc_score(Y, PredX.detach().numpy())
   AUCTaucpr_finetuned[iter] = average_precision_score(Y, PredX.detach().numpy())
   
   ZETcetuximab = AutoencoderE_finetuned(TX_testEcetuximab)
   ZMTcetuximab = AutoencoderM_finetuned(TX_testMcetuximab)
   ZTTcetuximab = torch.cat((ZETcetuximab, ZMTcetuximab), 1)
   ZTTcetuximab = F.normalize(ZTTcetuximab, p=2, dim=0)
   PredTcetuximab = Clas_finetuned(ZTTcetuximab)
   AUCTcetuximab_finetuned[iter] = roc_auc_score(Ytscetuximab.astype(int), PredTcetuximab.detach().numpy())
   AUCTcetuximab_aucpr_finetuned[iter] = average_precision_score(Ytscetuximab.astype(int), PredTcetuximab.detach().numpy())

   ZETcetuximab_crc = AutoencoderE_finetuned(TX_testEcetuximab_crc)
   ZMTcetuximab_crc = AutoencoderM_finetuned(TX_testMcetuximab_crc)
   ZTTcetuximab_crc = torch.cat((ZETcetuximab_crc, ZMTcetuximab_crc), 1)
   ZTTcetuximab_crc = F.normalize(ZTTcetuximab_crc, p=2, dim=0)
   PredTcetuximab_crc = Clas_finetuned(ZTTcetuximab_crc)
   AUCTcetuximab_crc_finetuned[iter] = roc_auc_score(Ytscetuximab_crc.astype(int), PredTcetuximab_crc.detach().numpy())
   AUCTcetuximab_crc_aucpr_finetuned[iter] = average_precision_score(Ytscetuximab_crc.astype(int), PredTcetuximab_crc.detach().numpy())

   ZETcetuximab_nsclc = AutoencoderE_finetuned(TX_testEcetuximab_nsclc)
   ZMTcetuximab_nsclc = AutoencoderM_finetuned(TX_testMcetuximab_nsclc)
   ZTTcetuximab_nsclc = torch.cat((ZETcetuximab_nsclc, ZMTcetuximab_nsclc), 1)
   ZTTcetuximab_nsclc = F.normalize(ZTTcetuximab_nsclc, p=2, dim=0)
   PredTcetuximab_nsclc = Clas_finetuned(ZTTcetuximab_nsclc)
   AUCTcetuximab_nsclc_finetuned[iter] = roc_auc_score(Ytscetuximab_nsclc.astype(int), PredTcetuximab_nsclc.detach().numpy())
   AUCTcetuximab_nsclc_aucpr_finetuned[iter] = average_precision_score(Ytscetuximab_nsclc.astype(int), PredTcetuximab_nsclc.detach().numpy())

max_key = max(AUCTcetuximab_aucpr_finetuned, key=AUCTcetuximab_aucpr_finetuned.get)
max_key_crc = max(AUCTcetuximab_crc_aucpr_finetuned, key=AUCTcetuximab_crc_aucpr_finetuned.get)
max_key_nsclc = max(AUCTcetuximab_nsclc_aucpr_finetuned, key=AUCTcetuximab_nsclc_aucpr_finetuned.get)

print(f"GDSC - Training AUC (EGFRi): {round(AUCT,2)}")
print(f"GDSC - Training AUCPR (EGFRi): {round(AUCTaucpr,2)}")
print(f"PDX - Test AUC (Cetuximab): {round(AUCTcetuximab,2)}")
print(f"PDX - Test AUCPR (Cetuximab): {round(AUCTcetuximab_aucpr,2)}")
print(f"PDX - Test AUC (Cetuximab, CRC): {round(AUCTcetuximab_crc,2)}")
print(f"PDX - Test AUCPR (Cetuximab, CRC): {round(AUCTcetuximab_crc_aucpr,2)}")
print(f"PDX - Test AUC (Cetuximab, NSCLC): {round(AUCTcetuximab_nsclc,2)}")
print(f"PDX - Test AUCPR (Cetuximab, NSCLC): {round(AUCTcetuximab_nsclc_aucpr,2)}\n")

print(f"GDSC - Training AUC (EGFRi, Finetuned): {round(AUCT_finetuned[max_key],2)}")
print(f"GDSC - Training AUCPR (EGFRi, Finetuned): {round(AUCTaucpr_finetuned[max_key],2)}")
print(f"PDX - Test AUC (Cetuximab, Finetuned): {round(AUCTcetuximab_finetuned[max_key],2)}")
print(f"PDX - Test AUCPR (Cetuximab, Finetuned): {round(AUCTcetuximab_aucpr_finetuned[max_key],2)}\n")

print(f"GDSC - Training AUC (EGFRi, Finetuned): {round(AUCT_finetuned[max_key_crc],2)}")
print(f"GDSC - Training AUCPR (EGFRi, Finetuned): {round(AUCTaucpr_finetuned[max_key_crc],2)}")
print(f"PDX - Test AUC (Cetuximab, CRC, Finetuned): {round(AUCTcetuximab_crc_finetuned[max_key_crc],2)}")
print(f"PDX - Test AUCPR (Cetuximab, CRC, Finetuned): {round(AUCTcetuximab_crc_aucpr_finetuned[max_key_crc],2)}\n\n")

print(f"GDSC - Training AUC (EGFRi, Finetuned): {round(AUCT_finetuned[max_key_nsclc],2)}")
print(f"GDSC - Training AUCPR (EGFRi, Finetuned): {round(AUCTaucpr_finetuned[max_key_nsclc],2)}")
print(f"PDX - Test AUC (Cetuximab, NSCLC, Finetuned): {round(AUCTcetuximab_nsclc_finetuned[max_key_nsclc],2)}")
print(f"PDX - Test AUCPR (Cetuximab, NSCLC, Finetuned): {round(AUCTcetuximab_nsclc_aucpr_finetuned[max_key_nsclc],2)}\n\n")

print("Models for maximum finetuning (Cetuximab)")
print(f"Exprs_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation_Finetuned_{max_key}.pt")
print(f"CNA_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation_Finetuned_{max_key}.pt")
print(f"Class_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation_Finetuned_{max_key}.pt")

print("\nModels for maximum finetuning (Cetuximab, CRC)")
print(f"Exprs_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation_Finetuned_{max_key_crc}.pt")
print(f"CNA_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation_Finetuned_{max_key_crc}.pt")
print(f"Class_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation_Finetuned_{max_key_crc}.pt")

print("\nModels for maximum finetuning (Cetuximab, NSCLC)")
print(f"Exprs_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation_Finetuned_{max_key_nsclc}.pt")
print(f"CNA_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation_Finetuned_{max_key_nsclc}.pt")
print(f"Class_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation_Finetuned_{max_key_nsclc}.pt")

GDSC - Training AUC (EGFRi): 0.82
GDSC - Training AUCPR (EGFRi): 0.4
PDX - Test AUC (Cetuximab): 0.56
PDX - Test AUCPR (Cetuximab): 0.11
PDX - Test AUC (Cetuximab, CRC): 0.65
PDX - Test AUCPR (Cetuximab, CRC): 0.18
PDX - Test AUC (Cetuximab, NSCLC): 0.83
PDX - Test AUCPR (Cetuximab, NSCLC): 0.2

GDSC - Training AUC (EGFRi, Finetuned): 0.73
GDSC - Training AUCPR (EGFRi, Finetuned): 0.36
PDX - Test AUC (Cetuximab, Finetuned): 0.73
PDX - Test AUCPR (Cetuximab, Finetuned): 0.21

GDSC - Training AUC (EGFRi, Finetuned): 0.73
GDSC - Training AUCPR (EGFRi, Finetuned): 0.36
PDX - Test AUC (Cetuximab, CRC, Finetuned): 0.67
PDX - Test AUCPR (Cetuximab, CRC, Finetuned): 0.3


GDSC - Training AUC (EGFRi, Finetuned): 0.8
GDSC - Training AUCPR (EGFRi, Finetuned): 0.41
PDX - Test AUC (Cetuximab, NSCLC, Finetuned): 0.91
PDX - Test AUCPR (Cetuximab, NSCLC, Finetuned): 0.33


Models for maximum finetuning (Cetuximab)
Exprs_Cetuximab_GDSC_PDX_EGFRi_Expression_Mutation_Finetuned_35.pt
CNA_Cetuximab_GDSC_PD

In [49]:
# create a dataframe with the prediction results 

em = {"Data": ["Expr + Mut", "Expr + Mut (Fine-tuned)"],
       "AUC (GDSC)": [round(AUCT, 2), round(AUCT_finetuned[max_key], 2)],
       "AUCPR (GDSC)": [round(AUCTaucpr,2), round(AUCTaucpr_finetuned[max_key],2)],
       "AUC (PDX)": [round(AUCTcetuximab,2), round(AUCTcetuximab_finetuned[max_key],2)],
       "AUCPR (PDX)":  [round(AUCTcetuximab_aucpr,2), round(AUCTcetuximab_aucpr_finetuned[max_key],2)],
       "AUC (PDX-CRC)":  [round(AUCTcetuximab_crc,2), round(AUCTcetuximab_crc_finetuned[max_key_crc],2)],
       "AUCPR (PDX-CRC)": [round(AUCTcetuximab_crc_aucpr,2), round(AUCTcetuximab_crc_aucpr_finetuned[max_key_crc],2)],
       "AUC (PDX-NSCLC)":  [round(AUCTcetuximab_nsclc,2), round(AUCTcetuximab_nsclc_finetuned[max_key_nsclc],2)],
       "AUCPR (PDX-NSCLC)": [round(AUCTcetuximab_nsclc_aucpr,2), round(AUCTcetuximab_nsclc_aucpr_finetuned[max_key_nsclc],2)],
       "Sample Size (GDSC)": [GDSCEv2.shape[0], GDSCEv2.shape[0]],
       "Feature Size (GDSC)": [GDSCEv2.shape[1], GDSCEv2.shape[1]] 
}
em_dataframe = pd.DataFrame.from_dict(em, orient='index').transpose() 

em_dataframe.to_csv(save_results_to + "GDSC_PDX_Expression_Mutation_EGFRi_Cetuximab.tsv",
                    sep = "\t",
                    index = False)

em_dataframe

Unnamed: 0,Data,AUC (GDSC),AUCPR (GDSC),AUC (PDX),AUCPR (PDX),AUC (PDX-CRC),AUCPR (PDX-CRC),AUC (PDX-NSCLC),AUCPR (PDX-NSCLC),Sample Size (GDSC),Feature Size (GDSC)
0,Expr + Mut,0.82,0.4,0.56,0.11,0.65,0.18,0.83,0.2,5633,873
1,Expr + Mut (Fine-tuned),0.73,0.36,0.73,0.21,0.67,0.3,0.91,0.33,5633,873


In [50]:
# show expression layer parameters

AutoencoderE

AEE(
  (EnE): Sequential(
    (0): Linear(in_features=873, out_features=512, bias=True)
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
  )
)

In [51]:
# show mutation layer parameters

AutoencoderM

AEM(
  (EnM): Sequential(
    (0): Linear(in_features=873, out_features=256, bias=True)
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.8, inplace=False)
  )
)

In [52]:
# show classification layer parameters

Clas

Classifier(
  (FC): Sequential(
    (0): Linear(in_features=768, out_features=1, bias=True)
    (1): Dropout(p=0.8, inplace=False)
    (2): Sigmoid()
  )
)

In [53]:
# change directory

os.chdir(save_results_to)
print(os.getcwd())

/Volumes/Expansion/Thesis Work/Results/Transfer Learning/Drugs with same pathways/PDX_EGFRi/Expression_Mutation/Predictions


In [54]:
# save predictions

file = open('GDSC_PDX_DEGs - PDX_EGFRi_Cetuximab Predictions (Expression and Mutation).txt', 'w')
file.write(f"GDSC Training (EM) AUC (PDX_EGFRi): {round(AUCT,2)}\n")
file.write(f"GDSC Training (EM) AUCPR (PDX_EGFRi): {round(AUCTaucpr,2)}\n\n")

file.write(f"PDX Test (EM) AUC (Cetuximab): {round(AUCTcetuximab,2)}\n")
file.write(f"PDX Test (EM) AUCPR (Cetuximab): {round(AUCTcetuximab_aucpr,2)}\n\n")

file.close()