In [4]:
import omim
from omim import util
from omim.db import Manager, OMIM_DATA


In [8]:
manager = Manager(dbfile=omim.DEFAULT_DB)

# show columns
print(util.get_columns_table())


# show stats
generated, table = util.get_stats_table(manager)
print(generated)
print(table)



[2022-06-02 16:51:55 Manager __exit__ DEBUG MainThread:33] database closed.


+------------------+-----------------------+--------------+
| Key              | Comment               | Type         |
+------------------+-----------------------+--------------+
| mim_number       | MIM Number            | VARCHAR(10)  |
| prefix           | The prefix symbol     | VARCHAR(1)   |
| title            | The title             | VARCHAR(50)  |
| references       | The references        | VARCHAR(300) |
| geneMap          | The geneMap data      | VARCHAR(300) |
| phenotypeMap     | The phenotypeMap data | VARCHAR(300) |
| mim_type         | The mim_type          | VARCHAR(20)  |
| entrez_gene_id   | The entrez_gene_id    | VARCHAR(20)  |
| ensembl_gene_id  | The ensembl_gene_id   | VARCHAR(20)  |
| hgnc_gene_symbol | The hgnc_gene_symbol  | VARCHAR(20)  |
| generated        | The generated time    | DATETIME     |
+------------------+-----------------------+--------------+
2021-04-20
+--------------------------+-------+
| MIM_TYPE                 | COUNT |
+--------------

In [9]:
# count the database
manager.query(OMIM_DATA).count()



27142

In [11]:
# query with key-value
res = manager.query(OMIM_DATA, 'prefix', '*')


# fetch query result
item = res.first()
items = res.all()

# content of result
print(item.mim_number, item.title)
print(item.as_dict)
print(len(items))

100640 ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1
{'prefix': '*', 'references': '3943866, 6723659, 2591967, 2987944, 3013004, 224930, 26430123, 6127541, 2729894, 17529981', 'phenotypeMap': None, 'entrez_gene_id': '216', 'hgnc_gene_symbol': 'ALDH1A1', 'geneMap': None, 'title': 'ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1', 'mim_number': '100640', 'mim_type': 'gene', 'ensembl_gene_id': 'ENSG00000165092', 'generated': datetime.datetime(2021, 4, 14, 0, 0)}
16458


In [None]:
#Additional queries
res = manager.query(OMIM_DATA, 'mim_number', '600799')
res = manager.query(OMIM_DATA, 'hgnc_gene_symbol', 'BMPR2')
res = manager.query(OMIM_DATA, 'geneMap', '%Pulmonary hypertension%')  # fuzzy query

In [12]:
import requests
from bs4 import BeautifulSoup
import time

In [15]:
url = 'https://www.omim.org/search/?index=geneMap&start=1&search=ALZHEIMER&limit=100&format=tsv'
s = requests.Session()
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45.0'} 

r = s.get(url, headers=headers)
if r.status_code == 200:
    with open('alzheimer_search_result.txt', 'w') as f:
        f.write(r.text)
else:
    print('error!!!')
    



In [19]:
print('combine the search result...')

search_result = open('alzheimer_search_result.txt', 'r').readlines()

# we want to put the gene and mim_number into a dict
gene2mim_number = {}

for line in search_result:
    line = line.strip()
    if  not line.startswith('Downloaded') and not line.startswith('Cytogenetic'):
        if '\t' in line:
            line = line.split('\t')
            gene = line[2]
            if ',' in gene:
                gene = gene.split(',')[0]
            mim_number = line[4]
            
            if gene not in gene2mim_number.keys():
                gene2mim_number[gene] = mim_number
            else:
                pass

for gene in gene2mim_number.keys():
    print(gene + ' : ' + gene2mim_number[gene])



combine the search result...
AD7CNTP : 607413
AD13 : 611152
AD14 : 611154
PSEN2 : 600759
AD15 : 611155
HFE : 613609
AD17 : 615080
NOS3 : 163729
AD10 : 609636
AD12 : 611073
AD11 : 609790
AD7 : 606187
PLAU : 191840
AD6 : 605526
CALHM1 : 612234
AD5 : 602096
PSEN1 : 104311
ADAM10 : 602192
MPO : 606989
ABCA7 : 605414
APOE : 107741
AD8 : 607116
APP : 104760
AD16 : 300756


In [21]:
gene2mim_number.keys()

dict_keys(['AD7CNTP', 'AD13', 'AD14', 'PSEN2', 'AD15', 'HFE', 'AD17', 'NOS3', 'AD10', 'AD12', 'AD11', 'AD7', 'PLAU', 'AD6', 'CALHM1', 'AD5', 'PSEN1', 'ADAM10', 'MPO', 'ABCA7', 'APOE', 'AD8', 'APP', 'AD16'])