In [1]:
import pandas as pd

In [2]:
%run ../scripts/ResultsAnalysis/enrichementAnalysis.py

### VI

In [None]:
VI = pd.read_csv('../Data/GeneSets/VIsGenes.csv').values.flatten()
dfVIEnrichments = enrichmentAnalysisGprofiler(list(VI))

In [None]:
[t for t in dfVIEnrichments['term_name'].values if 'viral'in t]

### DEG

In [None]:
DEG = pd.read_csv('../Data/GeneSets/DEGsGenes.csv').values.flatten()
dfDEGEnrichments = enrichmentAnalysisGprofiler(list(DEG))

In [None]:
[t for t in dfDEGEnrichments['term_name'].values if 'viral' in t]

### VI-unique neighbors

In [None]:
VINeigh = pd.read_csv('../Data/GeneSets/VINeighboursUniqueGenes.csv').values.flatten()
dfVINeighEnrichments = enrichmentAnalysisGprofiler(list(VINeigh))

In [None]:
[t for t in dfVINeighEnrichments['term_name'].values if 'viral' in t]

In [None]:
dfVINeighEnrichments.sort_values(['source', 'p_value']).to_csv('../Data/Results/VINeighborsUnique_enrichment.csv', index=False)

In [None]:
VINeigh.shape, dfVINeighEnrichments.shape

### DEG-unique neighbors

In [None]:
DEGNeigh = pd.read_csv('../Data/GeneSets/DEGNeighboursUniqueGenes.csv').values.flatten()
dfDEGNeighnrichments = enrichmentAnalysisGprofiler(list(DEGNeigh))

In [None]:
[t for t in dfDEGNeighnrichments['term_name'].values if 'viral' in t]

In [None]:
dfDEGNeighnrichments.sort_values(['source', 'p_value']).to_csv('../Data/Results/DEGNeighborsUnique_enrichment.csv', index=False)


In [None]:
DEGNeigh.shape, dfDEGNeighnrichments.shape

### Background

In [None]:
dfTmp = pd.read_csv('../Data/GeneSets/allGenes.csv')
Background = dfTmp[dfTmp['Gene Description'] == 'Background']['Gene'].values
dfBackgroundEhnrichments = enrichmentAnalysisGprofiler(list(Background))

In [None]:
[t for t in dfBackgroundEhnrichments['term_name'].values if 'viral' in t]

In [None]:
dfBackgroundEhnrichments.sort_values(['source', 'p_value']).to_csv('../Data/Results/Background_enrichment.csv', index=False)


In [None]:
Background.shape, dfBackgroundEhnrichments.shape

### All Common Neighbors

In [4]:
CN = pd.read_csv('../Data/GeneSets/CommonNeighboursGenes.csv').values.flatten()
dfCNEnrichments = enrichmentAnalysisGprofiler(list(CN))

In [5]:
viralTerms = [t for t in dfCNEnrichments['term_name'].values if 'viral'in t]
dfCNEnrichments[dfCNEnrichments['term_name'].isin(viralTerms)][['p_value', 'source', 'term_name']]

Unnamed: 0,p_value,source,term_name
57,5.932951e-52,GO:BP,viral process
334,1.272541e-15,GO:BP,viral transcription
354,1.579887e-14,GO:BP,viral gene expression
1005,2.358386e-05,GO:BP,viral life cycle
1956,0.04717683,GO:BP,positive regulation of viral process


In [None]:
dfCNEnrichments.to_csv('../Data/Results/CommonNeighbours_enrichment.csv', index=False)

##### Plot 

In [20]:
%load_ext rpy2.ipython
dfCNEnrichmentsPlot = enrichmentAnalysisGprofiler(list(CN), toPlot=True)
dfCNEnrichmentsPlot.columns

In [None]:
%%R -i dfCNEnrichmentsPlot -w 8 -h 2 --units in -r 400
# import df from global environment

#Import libraries
library(enrichplot)
library(DOSE)
library(grid)
library(ggplot2)


#Prepare the data for plotting
gp_mod <- dfCNEnrichmentsPlot[,c("source", "term_id",
                            "term_name", "p_value", "query_size",
                            "intersection_size", "term_size",
                            "effective_domain_size")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size,  "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Category", "ID", "Description", "p.adjust",
                  "query_size", "Count", "term_size", "effective_domain_size",
                  "GeneRatio", "BgRatio")
row.names(gp_mod) = gp_mod$ID

#Selecting the list of terms we want to plot
termsToPlot <- c('GO:0016032', 'GO:0019083', 'GO:0019080', 'GO:0019058', 'GO:0048524')

#Define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod[termsToPlot,])


#Create plot
barplot(gp_mod_enrich, showCategory = 40, font.size = 8) + ggplot2::ylab("Intersection size")+ ggplot2::theme(plot.margin = unit(c(0,0,0,2), "cm"))

#Save plot
ggsave("../Plots/Enrichments_CommonNeighbors_ViralProcesses.png")

### Enrichment targeted common neighbours 

#### Adding gene set information

In [9]:
allGenes= pd.read_csv('../Data/GeneSets/allGenes.csv')
predictedDTIs = pd.read_csv('../Data/Results/predictedDTIs.csv')
dfDTIsGeneSet = pd.merge(predictedDTIs, allGenes, on='Gene')
dfDTIsGeneSet.columns = ['Gene', 'DrugBank ID', 'Score', 'Gene Description']
dfDTIsGeneSet

Unnamed: 0,Gene,DrugBank ID,Score,Gene Description
0,KIT,DB09079,0.915800,DEG-unique neighbor
1,KIT,DB06626,0.329777,DEG-unique neighbor
2,PDGFRB,DB08901,0.859385,Common neighbor
3,PDGFRB,DB09078,0.816084,Common neighbor
4,UQCR11,DB04141,0.843258,Background
...,...,...,...,...
809,CHRNA7,DB00981,0.298938,DEG-unique neighbor
810,CHRNA7,DB00572,0.296752,DEG-unique neighbor
811,PDE10A,DB00651,0.298120,Common neighbor
812,ANXA1,DB01185,0.297246,Common neighbor


#### Adding drugs information

In [10]:
dfDrugGroup = pd.read_csv('../Data/Raw/Drug_Groups.csv')[['DrugBank ID', 'DrugBank Name', 'Group']]
dfDrugGroup.columns = ['DrugBank ID', 'DrugBank Name', 'Drug Status']
dfDTIsGeneSetDrugInfo = pd.merge(dfDTIsGeneSet, dfDrugGroup, on='DrugBank ID')

#Some drugs have different status, keeping the approved/experimental and if both are for the same drug keep approved
dfDTIsGeneSetDrugInfo = dfDTIsGeneSetDrugInfo[dfDTIsGeneSetDrugInfo['Drug Status'].isin(['approved', 'experimental'])]
indexDrugExperimentalAlsoApproved = dfDTIsGeneSetDrugInfo[dfDTIsGeneSetDrugInfo.duplicated(subset=['Gene', 'DrugBank ID'])].index
dfDTIsGeneSetDrugInfo.drop(indexDrugExperimentalAlsoApproved, axis=0, inplace=True)

dfDTIsGeneSetDrugInfo.to_csv('../Data/Results/predictedDTIsInfo.csv')

In [11]:
dfDTIsCommonNeighApproved = dfDTIsGeneSetDrugInfo[(dfDTIsGeneSetDrugInfo['Drug Status']== 'approved')&(dfDTIsGeneSetDrugInfo['Gene Description']=='Common neighbor')]
dfDTIsCommonNeighApproved

Unnamed: 0,Gene,DrugBank ID,Score,Gene Description,DrugBank Name,Drug Status
2,ABL1,DB09079,0.445404,Common neighbor,Nintedanib,approved
3,FGFR4,DB09079,0.302215,Common neighbor,Nintedanib,approved
6,PDGFRA,DB06626,0.297646,Common neighbor,Axitinib,approved
8,PDGFRB,DB08901,0.859385,Common neighbor,Ponatinib,approved
12,FLT4,DB08901,0.628299,Common neighbor,Ponatinib,approved
...,...,...,...,...,...,...
1145,CASP3,DB01169,0.361553,Common neighbor,Arsenic trioxide,approved
1147,AKR1B1,DB03147,0.357395,Common neighbor,Flavin adenine dinucleotide,approved
1150,CA9,DB00819,0.316179,Common neighbor,Acetazolamide,approved
1152,TLR4,DB02325,0.351178,Common neighbor,Isopropyl alcohol,approved


In [12]:
CNTargeted = dfDTIsCommonNeighApproved['Gene'].unique()
print(CNTargeted.shape[0], ' targeted common neighbors')
dfCNTargeted = enrichmentAnalysisGprofiler(list(CNTargeted))

49  targeted common neighbors


In [None]:
dfCNTargeted = enrichmentAnalysisGprofiler(list(CNTargeted))
dfCNTargeted.to_csv('../Data/Results/TargetedCommonNeighbours_enrichment.csv', index=False)

##### Plot 

In [None]:
%load_ext rpy2.ipython
dfCNTargetedPlot = enrichmentAnalysisGprofiler(list(CNTargeted), toPlot=True)
dfCNTargetedPlot.columns

In [None]:
%%R -i dfCNTargetedPlot -w 8 -h 5 --units in -r 400
# import df from global environment

#Import libraries
library(enrichplot)
library(DOSE)
library(grid)
library(ggplot2)


#Prepare the data for plotting
gp_mod <- dfCNTargetedPlot[,c("source", "term_id",
                            "term_name", "p_value", "query_size",
                            "intersection_size", "term_size",
                            "effective_domain_size")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size,  "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Category", "ID", "Description", "p.adjust",
                  "query_size", "Count", "term_size", "effective_domain_size",
                  "GeneRatio", "BgRatio")
row.names(gp_mod) = gp_mod$ID

#Selecting the list of terms we want to plot
termsToPlot <- c('GO:0007186', 'GO:0098664', 'GO:0007188', 'GO:0007193', 'GO:0007200', 'GO:0008227', 'GO:0004993', 
                 'GO:0000165', 'GO:0043408', 'GO:0043410',    
                 'GO:0038086', 'GO:0038091',
                 'GO:0070371', 'GO:0070372', 'GO:0070374', 
                 'GO:0006198', 'GO:0046058', 'GO:0043951', 'GO:0043949', 'GO:0019933', 'GO:0030552',
                 'GO:0045834', 'GO:0008015', 'GO:0034702', 'GO:0099589', 'GO:0051378', 'GO:0042417', 'GO:1903351',
                 'GO:1903350', 'GO:0004517',   
                 'KEGG:04151', 'KEGG:04014', 'KEGG:04010', 'KEGG:04024', 'KEGG:04370',   
                 'REAC:R-HSA-375280', 'REAC:R-HSA-1280215', 'REAC:R-HSA-392154')

#Define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod[termsToPlot,])


#Create plot
barplot(gp_mod_enrich, showCategory = 40, font.size = 8) + ggplot2::ylab("Intersection size")+ ggplot2::theme(plot.margin = unit(c(0,0,0,2), "cm"))

#Save plot
ggsave("../Plots/Enrichments_targetedCommonNeighbors.png")

In [None]:
dfCNTargeted.to_csv('../Data/Results/TargetedCommonNeighbours_enrichment.csv', index=False)