In [84]:
#Purpose: find out mutations that co-occur with gene of interest from MC3 MAF

#Using mutation file derived from MC3 MAF with cancer type added to last column
#https://www.synapse.org/#!Synapse:syn7214402/wiki/405297

#Main gene:
main_gene="ERBB2"

#Dictionaries
mutations={}
finallist={}
finalsamples=[]
cancertype_lookup={}

############################################################
#GO THROUGH MUTATION FILE TO DETERMINE MUTATIONAL LANDSCAPE#
############################################################

#Mutation File

import re

og_maf = open("/Users/rjayasin/Desktop/OneDrive/Working Directory/co_occuring/mc3.v0.2.8.CONTROLLED.filter.ct.maf")
#og_maf=open("test")

#Header
#header = og_maf.readline()

for line in og_maf:
    data = line.strip().split('\t')
    skip_line = False
#Skip header
    if data[0] == 'Hugo_Symbol':
        skip_line = True

    if skip_line:
        continue

#Skip certain mutation types
    if data[8] == "Silent" or data[8] == "Intron" or data[8] == "RNA" or data[8] == "5'Flank" or data[8] == "3'Flank" or data[8] == "IGR":
        skip_line = True
        
    if skip_line:
        continue
        
#Maintain relevant data (Mutation, Gene, Sample, CancerType)
    gene = data[0]
    tumorsample = data[16]
    mutation = data[0]+"_"+data[4]+"_"+data[5]+"_"+data[10]+"_"+data[12]+"_"+data[8]
    sample = data[15]
    #removes last element from row which is the cancer type in our case
    cancer=data.pop()
    cancertype_lookup[sample]=cancer #doesn't matter if it's overwritten

#Store Mutation Data
#Samples columns, rows as genes
    if gene in mutations:
        #This will only be true if gene and sample keys are already in dictionary, in this case a sample has another mutation in this gene
        try:
            current_count = mutations[gene][sample]
            new_count = current_count+1
            mutations[gene][sample]=new_count
        #This will only occur if sample is not currently stored as key in dictionary
        except KeyError:
            mutations[gene][sample]=1
    #if gene not currently in dictionary, then make it so            
    else:
        mutations[gene]={}
        mutations[gene][sample]=1

print("Finished Processing MC3 Mutation File")

################################################
#DETERMINE GENES THAT CO-MUTATED WITH MAIN_GENE#
################################################
#Go through final list and only print out samples for which there are co-occuring mutations
#with main gene

for genei in mutations:
    for sn in mutations[genei]:
        #If sample also has mutation in main gene check if genei is also mutated in this gene
        try:
            #check if main_gene is mutated in this sample
            erbb2count=mutations[main_gene][sn]
            try:
                #check if current gene is also mutated in this sample
                count=mutations[genei][sn]
                #if both main_gene and current gene are mutated in this sample, store in final dictionary
                if genei in finallist:
                    finallist[genei][sn]=count
                    if sn in finalsamples:
                        pass
                    else:
                        finalsamples.append(sn)
                #Else if doesnt currently exist in dictionary, initialize dictionary and store value.
                else:
                    finallist[genei]={}
                    finallist[genei][sn]=count
                    if sn in finalsamples:
                        pass
                    else:
                        finalsamples.append(sn)
            #If current gene does not exist in dictionary for this sample then skip to next sample
            except KeyError:
                continue
#If sample does not have mutation in main gene then skip this sample,gene mutation pair
        except KeyError:
            continue
    
print("Finished evaluating genes that are co-mutated with gene of interest")

####################
#PRINT FINAL MATRIX#
####################
#Print header
header=[]
#header=["\t"] #change this if you want a tab
for sample in finalsamples:
    ct=cancertype_lookup[sample]
    ct_samplename=ct+"_"+sample
    header.append(ct_samplename)

#Print final matrix with only genes that have mutations co-occuring with main_gene
fh = open("co_occuring.txt", "w")
fh.write("\t".join(header)+"\n")

for genex in finallist:
    fh.write(genex)
    for sample1 in finalsamples:
        try:
            genemutation_count=finallist[genex][sample1]
            fh.write("\t"+str(genemutation_count))
        except KeyError:
            genemutation_count=0
            fh.write("\t"+str(genemutation_count))
        #Print final matrix
    fh.write("\n")  
fh.close()    

print("Finished final matrix")

#Final Format
#Gene    GBM_TCGA-06-5416-01A-01D-1486-08        LUAD_TCGA-17-Z049-01A-01W-0746-08       GBM_TCGA-19-5956-01A-11D-1696-08        LUAD_TCGA-86-6562-01A-11D-1753-08       UCEC_TCGA-A5-A0G1-01A-11D-A122-09
#TACC2   7       1       2       1       3       12      1       2       1       2       1       3       2       2       1       1       2       2       1       1       1       1       2       1       1
#PANX3   1       0       1       0       2       1       0       1       0       0       0       1       2       1       0       0       0       1       0       0       0       1       0       0       0
#SPI1    2       0       0       0       0       1       0       0       0       0       0       0       1       0       0       1       0       0       0       0       0       0       0       0       0



Finished Processing MC3 Mutation File
Finished evaluating genes that are co-mutated with gene of interest
Finished final matrix


In [None]:
%run plot_mutual_exclusive_mutation.py co_occuring_top20 
#Below plotting script written by Wen-Wei Liao

In [92]:
#Plotting below for above matrix

finalmatrix=open("co_occuring.txt", "r")

import argparse
from collections import defaultdict
from os.path import splitext, basename

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches


parser = argparse.ArgumentParser()
parser.add_argument("input")
args = parser.parse_args()

plt.rc('font', family='Helvetica')
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

cancer_lut = {'ACC': '#C1A72F', 'BLCA': '#FAD2D9', 'BRCA': '#ED2891',
              'CESC': '#F6B667', 'CHOL': '#104A7F', 'COAD': '#9EDDF9',
              'DLBC': '#3953A4', 'ESCA': '#007EB5', 'GBM': '#B2509E',
              'HNSC': '#97D1A9', 'KICH': '#ED1C24', 'KIRC': '#F8AFB3',
              'KIRP': '#EA7075', 'LAML': '#754C29', 'LGG': '#D49DC7',
              'LIHC': '#CACCDB', 'LUAD': '#D3C3E0', 'LUSC': '#A084BD',
              'MESO': '#542C88', 'OV': '#D97D25', 'PAAD': '#6E7BA2',
              'PCPG': '#E8C51D', 'PRAD': '#7E1918', 'READ': '#DAF1FC',
              'SARC': '#00A99D', 'SKCM': '#BBD642', 'STAD': '#00AEEF',
              'TGCT': '#BE1E2D', 'THCA': '#F9ED32', 'THYM': '#CEAC8F',
              'UCEC': '#FBE3C7', 'UCS': '#F89420', 'UVM': '#009444'}

df = pd.read_table(args.input, index_col=0)
res = df.T.sort_values(list(df.index.values), ascending=False)
df = res.T
df = df.replace(0, np.nan)

cancers = list(map(lambda s: s.split("_")[0], df.columns.values))
cancer_colors = {}
for sample in df.columns.values:
    cancer = sample.split("_")[0]
    cancer_colors[sample] = cancer_lut[cancer]
cancer_colors = pd.Series(cancer_colors)

sns.set()
cm = sns.clustermap(df, cmap=sns.color_palette("Set2", 5), figsize=(24, 6), 
                    col_colors=cancer_colors, row_cluster=False,
                    col_cluster=False)
cm.cax.set_visible(False)
ax = cm.ax_heatmap
ax.tick_params(labelleft="on", labelright="off", labeltop="off",
               labelbottom="off", labelsize="large")

handles = []
for cancer in sorted(set(cancers)):
    handles.append(mpatches.Patch(color=cancer_lut[cancer], label=cancer))
l = cm.ax_heatmap.legend(loc='center left', bbox_to_anchor=(1.08,0.58),
                          handles=handles,frameon=False, fontsize=10,
                          mode="expand", ncol=2)
l.set_title(title='Cancer type\n',prop={'size':14})
frame = l.get_frame()
frame.set_facecolor("white")

prefix = splitext(basename(args.input))[0]
plt.savefig("{0}.pdf".format(prefix), transparent=True)
plt.close()




Finished final matrix
