# MAF file and miRNA hit table

Read the maf file in the same folder. Give me a table listing all hit and non-hit miRNAs and the number of mutations on them. For example,

|miR|#hits|
|---|---|
|miR1|2|
|miR2|0|
|miR3|5|

##Reading MAF

In [1]:
import pandas as pd
from pylab import *

FILE = "hgsc.bcm.edu__Mixed_curated_DNA_sequencing_level2.maf"

In [2]:
maf = pd.read_table(FILE, usecols=["Hugo_Symbol", "Chrom", "Start_Position", "End_Position"])
maf.head()

Unnamed: 0,Hugo_Symbol,Chrom,Start_Position,End_Position
0,A1BG,19,58864353,58864353
1,A1CF,10,52573773,52573773
2,A2ML1,12,8995922,8995922
3,A4GALT,22,43089055,43089055
4,A4GALT,22,43089757,43089757


##Reading miRBase

In [3]:
FILE = "../hw6/hsa.gff3"
df = pd.read_table(FILE, comment="#", usecols=["Chromosome", "Start", "End", "miR Name"])
df.head()

Unnamed: 0,Chromosome,Start,End,miR Name
0,chr1,17369,17436,ID=MI0022705;Alias=MI0022705;Name=hsa-mir-6859-1
1,chr1,17409,17431,ID=MIMAT0027618;Alias=MIMAT0027618;Name=hsa-mi...
2,chr1,17369,17391,ID=MIMAT0027619;Alias=MIMAT0027619;Name=hsa-mi...
3,chr1,30366,30503,ID=MI0006363;Alias=MI0006363;Name=hsa-mir-1302-2
4,chr1,30438,30458,ID=MIMAT0005890;Alias=MIMAT0005890;Name=hsa-mi...


##Defining a searching function

In [4]:
def pos2miR(chr, start, end):
    chunk = df[df["Chromosome"] == "chr" + str(chr)]  # shrink the search space to the given chr
    out = []
    for idx, row in chunk.iterrows():
        if row["Start"] <= start <= row["End"] or \
           row["Start"] <=  end  <= row["End"]:
            # get the name (either hsa-mir-### or hsa-miR-###)
            miR = row["miR Name"].split("Name=")[1].split(";")[0]
            out.append(miR)
    return out

In [5]:
# test
pos2miR(1, 17370, 17370)

['hsa-mir-6859-1', 'hsa-miR-6859-3p']

In [6]:
# test2
pos2miR("Y", 2609229, 2609229)

['hsa-mir-6089-2', 'hsa-miR-6089']

##Searching for hits

In [7]:
from collections import Counter
miR2hits = Counter()
for idx, row in maf.iterrows():
    print idx, '\r',
    hits = pos2miR(row["Chrom"], row["Start_Position"], row["End_Position"])
    for hit in hits:
        print row["Hugo_Symbol"], row["Chrom"], row["Start_Position"], row["End_Position"], "hits on", hit
        miR2hits[hit] += 1

MAGEC3 X 140926205 140926205 hits on hsa-mir-320d-2
VARS2 6 30890885 30890885 hits on hsa-mir-4640


In [8]:
miR2hits

Counter({'hsa-mir-4640': 1, 'hsa-mir-320d-2': 1})

In [2]:
import pandas as pd
from pylab import *

FILE = "C:/Users/jtso/My Documents/LAML1.txt"

maf = pd.read_table(FILE, usecols=["Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Tumor_Sample_Barcode"])
maf.head()

Unnamed: 0,Hugo_Symbol,Chromosome,Start_Position,End_Position,Tumor_Sample_Barcode
0,MT-CYB,MT,14767,14767,TCGA-AB-2802-03B-01W-0728-08
1,TBX15,1,119270684,119270684,TCGA-AB-2802-03B-01W-0728-08
2,TCHHL1,1,150324146,150324146,TCGA-AB-2802-03B-01W-0728-08
3,DNMT3A,2,25310747,25310747,TCGA-AB-2802-03B-01W-0728-08
4,IDH1,2,208821357,208821357,TCGA-AB-2802-03B-01W-0728-08


In [3]:
FILE = "C:/Users/jtso/My Documents/LUAD1.txt"

maf = pd.read_table(FILE, usecols=["Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Tumor_Sample_Barcode"])
maf.head()

Unnamed: 0,Hugo_Symbol,Chromosome,Start_Position,End_Position,Tumor_Sample_Barcode
0,KDM5C,X,53230770,53230770,TCGA-05-4244-01A-01D-1105-08
1,HTR6,1,20005133,20005133,TCGA-05-4244-01A-01D-1105-08
2,OR4S2,11,55419045,55419045,TCGA-05-4244-01A-01D-1105-08
3,RTN1,14,60074059,60074059,TCGA-05-4244-01A-01D-1105-08
4,TNS4,17,38644981,38644981,TCGA-05-4244-01A-01D-1105-08


In [4]:
FILE = "C:/Users/jtso/My Documents/LUSC1.txt"

maf = pd.read_table(FILE, usecols=["Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Tumor_Sample_Barcode"])
maf.head()

Unnamed: 0,Hugo_Symbol,Chromosome,Start_Position,End_Position,Tumor_Sample_Barcode
0,AGRN,1,957793,957793,TCGA-18-3406-01A-01D-0983-08
1,AGRN,1,981368,981368,TCGA-18-3406-01A-01D-0983-08
2,GLTPD1,1,1263143,1263143,TCGA-18-3406-01A-01D-0983-08
3,ACTRT2,1,2939345,2939345,TCGA-18-3406-01A-01D-0983-08
4,CASP9,1,15844888,15844888,TCGA-18-3406-01A-01D-0983-08
