# Disease gene annotations
This script annotates constrained genes with disease annotations from OMIM.

## Preliminaries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

sns.set_context("talk")

### Read the data

In [133]:
names = (["chr","start","end", "Cyto Location", "Computed Cyto Location", "mim", 
          "Gene Symbols", "Gene Name", "hgnc","Entrez Gene ID","ensg","Comments",
          "phenotypes", "Mouse Gene Symbol/ID",]
        )
usecols = (["chr","mim","hgnc","ensg","Comments","phenotypes"])

gene = pd.read_csv("../data/omim_genemap2.txt",
                   sep="\t",
                   comment="#", 
                   header=None,
                   names=names,
                   usecols=usecols,
                  )
gene = gene.fillna("")

In [134]:
m1 = gene.phenotypes.str.startswith("[")
m2 = gene.phenotypes.str.startswith("{")
m3 = gene.phenotypes.str.startswith("?")
m4 = gene.phenotypes.str.contains(", somatic")

gene = gene[(~(m1 | m2 | m3)) & ~m4]

gene.shape

(16618, 6)

In [173]:
names = ["phenotype","Gene Symbols","mim","Cyto Location"]
usecols = ["phenotype", "mim"]
morbid = pd.read_csv("../data/omim_morbidmap.txt", 
                     sep="\t",
                     comment="#", 
                     header=None,
                     names=names,
                     usecols=usecols
                    )

m1 = morbid.phenotype.str.startswith("[")
m2 = morbid.phenotype.str.startswith("{")
m3 = morbid.phenotype.str.startswith("?")
m4 = morbid.phenotype.str.endswith("(3)") # The molecular basis of the disorder is known
m5 = morbid.phenotype.str.contains(", somatic")

morbid = morbid[(~(m1 | m2 | m3)) & m4 & ~m5]



In [158]:
names = ["mim","entry_type","entrez","hgnc","ensg"]
usecols = ["mim","hgnc","ensg"]
mim = pd.read_csv("../data/omim_mim2gene.txt", 
                  sep="\t",
                  comment="#", 
                  header=None,
                  names=names,
                  usecols=usecols,
                 )

In [169]:
df = morbid.merge(mim, how="left")