In [2]:
import pandas as pd

In [3]:
df_mouse_data = pd.read_csv("MGI_DO.tsv", sep="\t")
df_mouse_data

Unnamed: 0,DO Disease ID,DO Disease Name,OMIM IDs,Common Organism Name,NCBI Taxon ID,Symbol,EntrezGene ID,Mouse MGI ID
0,DOID:0112248,17-beta hydroxysteroid dehydrogenase 3 deficiency,OMIM:264300,human,9606,HSD17B3,3293.0,
1,DOID:0112248,17-beta hydroxysteroid dehydrogenase 3 deficiency,OMIM:264300,"mouse, laboratory",10090,Hsd17b3,15487.0,MGI:107177
2,DOID:0111453,2-aminoadipic 2-oxoadipic aciduria,OMIM:204750,human,9606,DHTKD1,55526.0,
3,DOID:0111453,2-aminoadipic 2-oxoadipic aciduria,OMIM:204750,"mouse, laboratory",10090,Dhtkd1,209692.0,MGI:2445096
4,DOID:0050573,2-hydroxyglutaric aciduria,,human,9606,L2HGDH,79944.0,
...,...,...,...,...,...,...,...,...
18631,DOID:905,Zellweger syndrome,,human,9606,PHYH,5264.0,
18632,DOID:905,Zellweger syndrome,,"mouse, laboratory",10090,Pex1,71382.0,MGI:1918632
18633,DOID:905,Zellweger syndrome,,"mouse, laboratory",10090,Pex11b,18632.0,MGI:1338882
18634,DOID:0060478,Zika fever,,human,9606,STAT2,6773.0,


In [4]:
df_mouse_disease_omims = df_mouse_data[["DO Disease Name", "OMIM IDs"]]
df_mouse_disease_omims

Unnamed: 0,DO Disease Name,OMIM IDs
0,17-beta hydroxysteroid dehydrogenase 3 deficiency,OMIM:264300
1,17-beta hydroxysteroid dehydrogenase 3 deficiency,OMIM:264300
2,2-aminoadipic 2-oxoadipic aciduria,OMIM:204750
3,2-aminoadipic 2-oxoadipic aciduria,OMIM:204750
4,2-hydroxyglutaric aciduria,
...,...,...
18631,Zellweger syndrome,
18632,Zellweger syndrome,
18633,Zellweger syndrome,
18634,Zika fever,


In [7]:
df_mouse_disease_omims = df_mouse_disease_omims.dropna()
df_mouse_disease_omims = df_mouse_disease_omims.drop_duplicates()
df_mouse_disease_omims

Unnamed: 0,DO Disease Name,OMIM IDs
0,17-beta hydroxysteroid dehydrogenase 3 deficiency,OMIM:264300
2,2-aminoadipic 2-oxoadipic aciduria,OMIM:204750
6,3MC syndrome 1,OMIM:257920
7,3MC syndrome 2,OMIM:265050
8,3MC syndrome 3,OMIM:248340
...,...,...
18622,Y-linked deafness 2,OMIM:400047
18623,Y-linked spermatogenic failure 2,OMIM:415000
18624,Yoon-Bellen neurodevelopmental syndrome,OMIM:619701
18625,Yunis-Varon syndrome,OMIM:216340


In [8]:
df_mouse_disease_omims.to_csv("unique_mouse_disease_omims.tsv", index=None)

In [12]:
disease_names = list()
omim_ids = list()

for i, row in df_mouse_disease_omims.iterrows():
    r_values = row.values
    d_name = r_values[0]
    d_omim = r_values[1].split("|")
    if len(d_omim) > 1:
        for omim in d_omim:
            disease_names.append(d_name)
            omim_ids.append(omim)
    else:
        disease_names.append(d_name)
        omim_ids.append(d_omim[0])
        
df_disease_names_omim = pd.DataFrame(zip(disease_names, omim_ids), columns=["DiseaseNames", "OMIMIDs"])
df_disease_names_omim

Unnamed: 0,DiseaseNames,OMIMIDs
0,17-beta hydroxysteroid dehydrogenase 3 deficiency,OMIM:264300
1,2-aminoadipic 2-oxoadipic aciduria,OMIM:204750
2,3MC syndrome 1,OMIM:257920
3,3MC syndrome 2,OMIM:265050
4,3MC syndrome 3,OMIM:248340
...,...,...
4895,Y-linked deafness 2,OMIM:400047
4896,Y-linked spermatogenic failure 2,OMIM:415000
4897,Yoon-Bellen neurodevelopmental syndrome,OMIM:619701
4898,Yunis-Varon syndrome,OMIM:216340


In [13]:
df_disease_names_omim = df_disease_names_omim.drop_duplicates()
df_disease_names_omim

Unnamed: 0,DiseaseNames,OMIMIDs
0,17-beta hydroxysteroid dehydrogenase 3 deficiency,OMIM:264300
1,2-aminoadipic 2-oxoadipic aciduria,OMIM:204750
2,3MC syndrome 1,OMIM:257920
3,3MC syndrome 2,OMIM:265050
4,3MC syndrome 3,OMIM:248340
...,...,...
4895,Y-linked deafness 2,OMIM:400047
4896,Y-linked spermatogenic failure 2,OMIM:415000
4897,Yoon-Bellen neurodevelopmental syndrome,OMIM:619701
4898,Yunis-Varon syndrome,OMIM:216340


In [51]:
import subprocess
import xmltodict
import json

# https://www.nlm.nih.gov/dataguide/classes/edirect-for-pubmed/samplecode3.html
bash_command = 'esearch -db pubmed -query "ACROMICRIC DYSPLASIA[Title/Abstract]" | efetch -format xml | xtract -pattern PubmedArticle -element ArticleTitle AbstractText'

# | xtract -pattern PubmedArticle -element TitleText AbstractText
# | xtract -pattern PubmedArticle -element Abstract

# Run the command
result = subprocess.run(bash_command, shell=True, capture_output=True, text=True)

# Check if the command was successful
if result.returncode == 0:
    print(result.stdout)
    '''articles = result.stdout.split("\n\n")
    for article in articles:
        #print(article)
        print(article.split("  -"))
        ab_index = article.find("A  -")
        print("========================")
        #print(article[ab_index:])
        break'''
        

    
'''else:
    # Print error message
    print("Error executing command:")
    print(result.stderr)'''

[Clinical phenotype and genetic analysis of six Chinese patients affected with Acromicric dysplasia due to variants of FBN1 gene].	To retrospectively analyze the clinical and genetic characteristics of six patients with Acromicric dysplasia due to variants of the FBN1 gene.	Six patients who had visited the Affiliated Hospital of Qingdao University between February 2018 and October 2020 were selected as the study subjects. Clinical data of the patients were collected. High-throughput sequencing was carried out. And candidate variants were verified by Sanger sequencing.	All of the six patients had presented with severe short stature (< 3s), brachydactyly, short and broad hands and feet. Other manifestations included joint stiffness, facial dysmorphism, delayed bone age, liver enlargement, coracoid femoral head, and lumbar lordosis. Genetic testing revealed that all had harbored heterozygous variants of the FBN1 gene. Patient 1 had harbored a c.5183C>T (p.A1728V) missense variant in exon 

'else:\n    # Print error message\n    print("Error executing command:")\n    print(result.stderr)'