# Import modules

In [1]:
import importlib
import subprocess

try:
    importlib.import_module('pygrpm')
except ImportError:
    subprocess.check_call(["pip", "install", "git+https://github.com/johndef64/GRPM_system.git"])
    
from pygrpm import *

# GET Datasets

In [3]:
### GET Datasets ###
if not os.path.exists('grpm_dataset/grpm_dataset.parquet'):
    get_and_extract('grpm_dataset', record_id='14052302')
    get_and_extract('nutrigenetic_dataset', record_id='14052302')

# LOAD Datasets

In [2]:
### LOAD Datasets ###
"""
1. GRPM Datset
2. Nutrigenetic Dataset
3. Nutrigenetic Dataset + GWAS
"""

pcg_grpm, rna_grpm, pseudo_grpm = grpm_importer()
grpm_nutrigen, grpm_nutrigen_int, grpm_nutrigen_int_gwas = nutrig_importer()


Importing time:  0:00:06.059999
pcg: 776.19 MB
rna: 58.18 MB
pseudo: 1.93 MB
nutrigen dataset: 87.13 MB
nutrigen dataset filtered: 53.62 MB
nutrigen gwas dataset: 20.56 MB


Unnamed: 0,gene,rsid,pmid,mesh,topic,interest_index
0,FTO,rs9972653,32393786,Body Mass Index,General Nutrition,1.00000
1,FTO,rs9972653,32393786,"Diabetes Mellitus, Type 2",General Nutrition,1.00000
2,FTO,rs9972653,32393786,Diet,General Nutrition,1.00000
3,FTO,rs9972653,33128006,Body Mass Index,General Nutrition,1.00000
4,FTO,rs9972653,33128006,Cardiovascular Diseases,General Nutrition,1.00000
...,...,...,...,...,...,...
1171244,FADS1,rs174545,23221573,Lipid Metabolism,Xenobiotics Metabolism,0.01255
1171245,FADS1,rs174545,30120404,Liver,Xenobiotics Metabolism,0.01255
1171246,FADS1,rs174545,30120404,Non-alcoholic Fatty Liver Disease,Xenobiotics Metabolism,0.01255
1171247,FADS1,rs174544,20565855,Lipid Metabolism,Xenobiotics Metabolism,0.01255


# SHOW Stats

In [4]:
%%time
# Full GRPM Dataset Build
pcg_grpm_stats = get_stats(pcg_grpm, group_by = 'gene')
display(grpm_nutrigen_int.head(10))
display(pcg_grpm_stats.head(20))

Computing Stats...
runtime:  0:01:18.116845


Unnamed: 0_level_0,type,rsid,pmid,mesh,qualifier
Unnamed: 0_level_1,unique,unique,unique,unique,unique
gene,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
A1BG,1,4,7,75,14
A1CF,1,7,23,137,18
A2M,1,21,57,365,32
A2ML1,1,32,13,117,16
A3GALT2,1,1,1,11,4
...,...,...,...,...,...
ZXDC,1,5,3,44,5
ZYG11B,1,1,1,19,2
ZYX,1,5,4,45,8
ZZEF1,1,9,12,111,13


CPU times: total: 1min 17s
Wall time: 1min 18s


In [7]:
%%time
# Nutrigenetic Dataset (10 Topics)
grpm_nutrigen_stats = get_stats(grpm_nutrigen, group_by = 'gene', gi_sort=True)
display(grpm_nutrigen.head(10))
display(grpm_nutrigen_stats.head(20))

Computing Stats...
runtime:  0:00:52.760797


Unnamed: 0,gene,rsid,pmid,mesh,topic,interest_index
0,FTO,rs9972653,32393786,Body Mass Index,General Nutrition,1.0
1,FTO,rs9972653,32393786,"Diabetes Mellitus, Type 2",General Nutrition,1.0
2,FTO,rs9972653,32393786,Diet,General Nutrition,1.0
3,FTO,rs9972653,33128006,Body Mass Index,General Nutrition,1.0
4,FTO,rs9972653,33128006,Cardiovascular Diseases,General Nutrition,1.0
5,FTO,rs9941349,22084931,Obesity,General Nutrition,1.0
6,FTO,rs9941349,20442772,Body Composition,General Nutrition,1.0
7,FTO,rs9941349,20442772,Obesity,General Nutrition,1.0
8,FTO,rs9941349,24879436,Obesity,General Nutrition,1.0
9,FTO,rs9941349,21552555,Body Mass Index,General Nutrition,1.0


Unnamed: 0_level_0,rsid,pmid,mesh,topic
Unnamed: 0_level_1,unique,unique,unique,unique
gene,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
FTO,309,1350,411,10
MTHFR,148,3598,669,10
VDR,190,1024,297,10
TCF7L2,142,1054,357,10
HFE,58,1434,364,10
PNPLA3,36,1109,370,10
PPARG,207,983,389,10
ADIPOQ,75,682,300,10
KCNJ11,115,658,257,10
GCKR,40,637,293,10


CPU times: total: 55 s
Wall time: 56.5 s


In [5]:
%%time
# Nutrigenetic Dataset + GWAS
grpm_nutrigen_int_gwas_stats = get_stats(grpm_nutrigen_int_gwas, group_by='GRPM_GENE', gi_sort=True)
display(grpm_nutrigen_int_gwas.head(10))
display(grpm_nutrigen_int_gwas_stats.head(20))

Computing Stats...
runtime:  0:00:04.428999


Unnamed: 0,GRPM_GENE,GRPM_RSID,GRPM_PMID,GRPM_MESH,GRPM_TOPIC,GRPM_GI,SEMANTIC_SIMILARITY,GWAS_DISEASE/TRAIT,GWAS_MAPPED_TRAIT,GWAS_MAPPED_GENE,GWAS_CONTEXT,GWAS_STRONGEST_SNP-RISK_ALLELE,GWAS_OR-BETA,GWAS_STUDY,GWAS_STUDY_ID
0,FTO,rs9941349,22084931,Obesity,General Nutrition,1.0,0.909086,Obesity (extreme),obesity,FTO,intron_variant,rs9941349-T,1.48,Common body mass index-associated variants con...,GCST000426
1,FTO,rs9941349,20442772,Obesity,General Nutrition,1.0,0.909086,Obesity (extreme),obesity,FTO,intron_variant,rs9941349-T,1.48,Common body mass index-associated variants con...,GCST000426
2,FTO,rs9941349,24879436,Obesity,General Nutrition,1.0,0.909086,Obesity (extreme),obesity,FTO,intron_variant,rs9941349-T,1.48,Common body mass index-associated variants con...,GCST000426
3,FTO,rs9941349,21552555,Obesity,General Nutrition,1.0,0.909086,Obesity (extreme),obesity,FTO,intron_variant,rs9941349-T,1.48,Common body mass index-associated variants con...,GCST000426
4,FTO,rs9941349,25014319,Obesity,General Nutrition,1.0,0.909086,Obesity (extreme),obesity,FTO,intron_variant,rs9941349-T,1.48,Common body mass index-associated variants con...,GCST000426
5,FTO,rs9941349,27449576,Obesity,General Nutrition,1.0,0.909086,Obesity (extreme),obesity,FTO,intron_variant,rs9941349-T,1.48,Common body mass index-associated variants con...,GCST000426
6,FTO,rs9941349,22531089,Obesity,General Nutrition,1.0,0.909086,Obesity (extreme),obesity,FTO,intron_variant,rs9941349-T,1.48,Common body mass index-associated variants con...,GCST000426
7,FTO,rs9941349,32193455,Obesity,General Nutrition,1.0,0.909086,Obesity (extreme),obesity,FTO,intron_variant,rs9941349-T,1.48,Common body mass index-associated variants con...,GCST000426
8,FTO,rs9941349,31772290,Obesity,General Nutrition,1.0,0.909086,Obesity (extreme),obesity,FTO,intron_variant,rs9941349-T,1.48,Common body mass index-associated variants con...,GCST000426
9,FTO,rs9941349,21438147,Obesity,General Nutrition,1.0,0.909086,Obesity (extreme),obesity,FTO,intron_variant,rs9941349-T,1.48,Common body mass index-associated variants con...,GCST000426


Unnamed: 0_level_0,GRPM_RSID,GRPM_PMID,GRPM_MESH,GRPM_TOPIC,GWAS_DISEASE/TRAIT,GWAS_MAPPED_TRAIT,GWAS_MAPPED_GENE,GWAS_CONTEXT,GWAS_STRONGEST_SNP-RISK_ALLELE,GWAS_STUDY,GWAS_STUDY_ID
Unnamed: 0_level_1,unique,unique,unique,unique,unique,unique,unique,unique,unique,unique,unique
GRPM_GENE,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
FTO,37,632,18,5,22,19,1,2,47,60,87
TCF7L2,7,272,10,5,10,11,1,1,10,23,29
PNPLA3,4,847,9,4,13,11,1,3,5,25,30
PPARG,3,107,4,5,4,4,1,3,4,6,6
ADIPOQ,3,60,1,3,2,2,1,2,4,5,5
KCNJ11,2,44,2,4,2,2,1,2,2,4,4
GCKR,4,333,33,6,251,137,1,2,12,73,360
GC,7,345,4,6,7,5,1,2,9,9,11
LEPR,8,16,3,4,3,3,1,2,9,5,5
SLC30A8,3,43,5,5,5,5,1,2,5,9,11


CPU times: total: 4.06 s
Wall time: 4.47 s


# QUERY GRPM Dataset 

## MeSH Query Example

In [3]:
# LOAD MeSH

grpm_mesh= import_grpm_mesh()
grpm_mesh.head()

GRPM MeSH count: 21705


Unnamed: 0,Preferred Label,Class ID,Synonyms,Definitions
0,Electronic Health Records,http://purl.bioontology.org/ontology/MESH/D057286,"Electronic Medical Record|Medical Record, Elec...",Media that facilitate transportability of pert...
1,Consent Forms,http://purl.bioontology.org/ontology/MESH/D032962,Informed Consent Documents|Informed Consent Fo...,Documents describing a medical treatment or re...
2,Genealogy and Heraldry,http://purl.bioontology.org/ontology/MESH/D005789,Geneology and Heraldry|Heraldry and Genealogy|...,"Descent of a person, family, or group from an ..."
3,Publications,http://purl.bioontology.org/ontology/MESH/D011642,Publication,Copies of a work or document distributed to th...
4,Pharmaceutical Services,http://purl.bioontology.org/ontology/MESH/D010593,"Pharmaceutical Service|Services, Pharmaceutic|...",Total pharmaceutical services provided by qual...


In [5]:
# Random Query Example
mesh_query =  grpm_mesh['Preferred Label'].drop_duplicates().sample(10).to_list()

# Filter and get unique results
result = query_dataset(pcg_grpm, mesh_query, 'mesh')
display(result)

Unnamed: 0,gene,type,rsid,pmid,mesh,qualifier,major
178386,KRT23,PCG,rs140407470,21874024,CD28 Antigens,physiology,False
222732,MMEL1,PCG,rs3890745,19898481,CD28 Antigens,genetics,True
361776,HLA-DRA,PCG,rs3135392,33058932,Cephalosporins,adverse effects,True
400814,TNF,PCG,rs1800629,25510954,CD28 Antigens,analysis,False
589282,PTPRC,PCG,rs10919563,19898481,CD28 Antigens,genetics,True
...,...,...,...,...,...,...,...
16415706,TNPO3,PCG,rs10488631,31030958,CD28 Antigens,antagonists & inhibitors,False
16416142,TNPO3,PCG,rs10488631,27092776,CD28 Antigens,genetics,True
16416202,TNPO3,PCG,rs10488631,34017081,CD28 Antigens,genetics,True
16488218,ALOX15,PCG,rs34210653,31301373,Turbinates,metabolism,True


## Build MeSH Query 
[CUDA recommended] - In Colab:: load Runtime with GPU 

In [5]:
# LOAD Language Model
MODEL = 'dmis-lab/biobert-v1.1'
model = load_language_model(MODEL)
file_path = 'ref-mesh/GrpmMeshSynEmbeddings_biobert-v1.1.pkl'

# Get MeSH embeddings
test_cuda()
grpm_mesh_embeddings = get_mesh_embeddings(grpm_mesh, model, file_path)

grpm_meshes = grpm_mesh_embeddings['meshes']
mesh_embeddings = grpm_mesh_embeddings['embeddings']

No sentence-transformers model found with name dmis-lab/biobert-v1.1. Creating a new one with mean pooling.


Torch version: 2.6.0+cu118
Is CUDA enabled? True
Importing pretrained embeddings...
Done


In [14]:
# User defined Topic Terms

user_query = "diet ketogenic, diet reducing, diet sodium-restricted, diet, dietary, dietetics, dyslipidemias, eating disorders, feeding and eating disorder, food hypersensitivity, foodborne diseases, gastrointestinal diseases, hypercholesterolemia, hyperglycemia, hyperlipidemias, hyperphagia, hypoglycemia, hypophagia, insulin resistance"  # comma separated list

topic_terms_list = user_query.split(',')
topic_terms = pd.Series(topic_terms_list)


# Extract MeSH Query
tab = create_corr_table(topic_terms, grpm_meshes, model, mesh_embeddings)

threshold = 0.84 # set similarity threshold
mesh_query = tab[tab.similarity >= threshold].list2.to_list()
print('\n\nMeSH Query:', mesh_query)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 19/19 [00:03<00:00,  5.59it/s]



MeSH Query: ['Diet, Carbohydrate-Restricted', 'Hyperlipidemias', 'Feeding and Eating Disorders of Childhood', 'Egg Hypersensitivity', 'Endoscopy, Gastrointestinal', 'Hypercholesterolemia', 'Hypertriglyceridemia', 'Hyperlipidemias', 'Hypoglycemic Agents']





MeSH Query: ['Diet', 'Diet', 'Dyslipidemias', 'Hypersensitivity', 'Gastrointestinal Diseases', 'Hypercholesterolemia', 'Hyperglycemia', 'Hyperlipidemias', 'Hyperphagia', 'Hypoglycemia']

## Execute MeSH Query

In [None]:
# Filter and get unique results
result = query_dataset(pcg_grpm, mesh_query, 'mesh')
display(result)

# QUERY Nutrigenetic Dataset

In [6]:
get_stats(grpm_nutrigen_int, 'topic')

Computing Stats...
runtime:  0:00:01.133999


Unnamed: 0_level_0,gene,rsid,pmid,mesh
Unnamed: 0_level_1,unique,unique,unique,unique
topic,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Cardiovascular Health and Lipid Metabolism,975,41931,66113,521
Diabetes Mellitus Type II and Metabolic Syndrome,603,22270,36198,297
Diet-induced Oxidative Stress,75,2559,10058,60
Eating Behavior and Taste Sensation,211,4252,7241,256
Food Allergies,451,6289,7762,64
Food Intolerances,392,5008,4726,125
General Nutrition,686,26456,44859,397
"Obesity, Weight Control and Compulsive Eating",317,10842,22123,230
Vitamin and Micronutrients Metabolism and Deficiency-Related Diseases,89,3525,6882,147
Xenobiotics Metabolism,173,7159,14171,151


## 1. Query by Nutritional Topic 

In [17]:
# Select Topic
topic = "Vitamin and Micronutrients Metabolism and Deficiency-Related Diseases"

# Filter and get unique results
topic_data = query_dataset(grpm_nutrigen_int, [topic], 'topic')
print(f'Displaying "{topic}" topic')
display(topic_data)

Displaying "Vitamin and Micronutrients Metabolism and Deficiency-Related Diseases" topic


Unnamed: 0,gene,rsid,pmid,mesh,topic,interest_index
967603,VDR,rs987849,34578986,Nutritional Status,Vitamin and Micronutrients Metabolism and Defi...,1.00000
967604,VDR,rs987849,34578986,"Receptors, Calcitriol",Vitamin and Micronutrients Metabolism and Defi...,1.00000
967605,VDR,rs987849,34578986,Vitamin D,Vitamin and Micronutrients Metabolism and Defi...,1.00000
967606,VDR,rs987849,34578986,Vitamin D Deficiency,Vitamin and Micronutrients Metabolism and Defi...,1.00000
967607,VDR,rs987849,34578986,Vitamin D-Binding Protein,Vitamin and Micronutrients Metabolism and Defi...,1.00000
...,...,...,...,...,...,...
1002198,ITPR1,rs121912425,21555639,"Inositol 1,4,5-Trisphosphate Receptors",Vitamin and Micronutrients Metabolism and Defi...,0.01255
1002199,ITPR1,rs121912425,22986007,"Inositol 1,4,5-Trisphosphate Receptors",Vitamin and Micronutrients Metabolism and Defi...,0.01255
1002200,ITPR1,rs121912425,18579805,"Inositol 1,4,5-Trisphosphate Receptors",Vitamin and Micronutrients Metabolism and Defi...,0.01255
1002201,ITPR1,rs121912425,27108798,"Inositol 1,4,5-Trisphosphate Receptors",Vitamin and Micronutrients Metabolism and Defi...,0.01255


In [None]:
# Get Topic Data Stats
stats = get_stats(topic_data, "gene", gi_sort=True)
stats

### Select Topic on Nutrigenetic-GWAS dataset

In [36]:
# Select Topic on Nutrigenetic-GWAS dataset
topic = "Vitamin and Micronutrients Metabolism and Deficiency-Related Diseases"

topic_data_gwas = query_dataset(grpm_nutrigen_int_gwas, [topic], 'GRPM_TOPIC')
display(topic_data_gwas.head())
# Get Topic Data Stats
stats = get_stats(topic_data_gwas, group_by = "GRPM_GENE")
stats

Unnamed: 0,GRPM_GENE,GRPM_RSID,GRPM_PMID,GRPM_MESH,GRPM_TOPIC,GRPM_GI,SEMANTIC_SIMILARITY,GWAS_DISEASE/TRAIT,GWAS_MAPPED_TRAIT,GWAS_MAPPED_GENE,GWAS_CONTEXT,GWAS_STRONGEST_SNP-RISK_ALLELE,GWAS_OR-BETA,GWAS_STUDY,GWAS_STUDY_ID
164708,MTHFR,rs1801133,26025547,Vitamin B 12,Vitamin and Micronutrients Metabolism and Defi...,0.64677,0.952817,Vitamin B9 levels,folic acid measurement,MTHFR,missense_variant,rs1801133-A,0.12,The STROMICS genome study: deep whole-genome s...,GCST90319528
164709,MTHFR,rs1801133,33290257,Vitamin B 12,Vitamin and Micronutrients Metabolism and Defi...,0.64677,0.952817,Vitamin B9 levels,folic acid measurement,MTHFR,missense_variant,rs1801133-A,0.12,The STROMICS genome study: deep whole-genome s...,GCST90319528
164710,MTHFR,rs1801133,33799553,Vitamin B 12,Vitamin and Micronutrients Metabolism and Defi...,0.64677,0.952817,Vitamin B9 levels,folic acid measurement,MTHFR,missense_variant,rs1801133-A,0.12,The STROMICS genome study: deep whole-genome s...,GCST90319528
164711,MTHFR,rs1801133,33347560,Vitamin B 12,Vitamin and Micronutrients Metabolism and Defi...,0.64677,0.952817,Vitamin B9 levels,folic acid measurement,MTHFR,missense_variant,rs1801133-A,0.12,The STROMICS genome study: deep whole-genome s...,GCST90319528
164712,MTHFR,rs1801133,17277043,Vitamin B 12,Vitamin and Micronutrients Metabolism and Defi...,0.64677,0.952817,Vitamin B9 levels,folic acid measurement,MTHFR,missense_variant,rs1801133-A,0.12,The STROMICS genome study: deep whole-genome s...,GCST90319528


Computing Stats...
runtime:  0:00:00.161999


Unnamed: 0_level_0,GRPM_RSID,GRPM_PMID,GRPM_MESH,GRPM_TOPIC,GWAS_DISEASE/TRAIT,GWAS_MAPPED_TRAIT,GWAS_MAPPED_GENE,GWAS_CONTEXT,GWAS_STRONGEST_SNP-RISK_ALLELE,GWAS_STUDY,GWAS_STUDY_ID
Unnamed: 0_level_1,unique,unique,unique,unique,unique,unique,unique,unique,unique,unique,unique
GRPM_GENE,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
AMDHD1,1,21,2,1,2,2,1,1,1,1,2
BTD,2,25,1,1,1,1,1,1,2,1,1
CUBN,1,8,1,1,1,1,1,1,1,1,1
CYP24A1,2,24,1,1,1,1,1,2,2,1,1
CYP2R1,1,5,2,1,2,2,1,1,1,2,2
CYP4F2,1,10,1,1,1,1,1,1,1,1,1
GC,7,345,3,1,6,4,1,2,9,8,10
GSTP1,1,484,1,1,1,1,1,1,1,1,1
MTHFR,1,319,1,1,1,1,1,1,1,1,1
NADSYN1,5,160,2,1,3,3,1,2,5,4,5


## 2. Advanced query (use case)

Exploring the genetic determinants of nutritional status involves understanding how genetic variations influence the intake and utilization of micronutrients, impacting nutrient transport, metabolism, and cellular uptake.

> *Micronutrients such as trace elements and vitamins are important as enzyme cofactors in the metabolism of all cells in the body and therefore key to determining nutritional status.*


Build a composite query:
- Nutritional Status, Mechanisms of Micronutrient Metabolism, and Micronutrient Measurement

In [11]:
from pygrpm import *

# Download and import MeSH Embeddings
grpm_mesh_embeddings = import_mesh_embeddings()

grpm_meshes = grpm_mesh_embeddings['meshes']
mesh_embeddings = grpm_mesh_embeddings['embeddings']

# Define queries using natural language
QUERIES =[
     # 1. **Nutritional Status**:
     ["Measurement of nutritional status", 0.85],
     ["Assess essential micronutrients", 0.84],
     ["Focus on vitamins like vitamin A, D, and B-vitamins.", 0.84],
     ["trace minerals such as iron, zinc, and iodine.", 0.84],

    # 2"**Mechanisms of Micronutrient Metabolism**:
     ["cellular processes for micronutrient absorption.", 0.87],
     ["transport and transformation of nutrients.", 0.84],
     ["nutrients storage mechanisms.", 0.84],
     ["cofactors in nutrient utilization.", 0.84],
     ["homeostasis of nutrients.", 0.84],
]


mesh_query = []
for i in range(len(QUERIES)):
    query =  QUERIES[i]
    print("\n",query)
    meshes = get_mesh_query(query[0], grpm_meshes, model, mesh_embeddings=mesh_embeddings, threshold=query[1])
    mesh_query.extend(meshes)

mesh_query = list(set(mesh_query))

Done

 ['Measurement of nutritional status', 0.85]
Related MeSH: ['Nutritional Support', 'Malnutrition', 'Nutritional Status', 'Lung Volume Measurements', 'Speech Production Measurement', 'Eye Movement Measurements', 'Nutritional Requirements', 'Educational Measurement']

 ['Assess essential micronutrients', 0.84]
Related MeSH: ['Dietary Proteins', 'Micronutrients', 'Malnutrition', 'Vitamin A Deficiency', 'Nutrition Disorders', 'Nutritional Status', 'Nutrients', 'Sports Nutritional Sciences', 'Nutritional Requirements']

 ['Focus on vitamins like vitamin A, D, and B-vitamins.', 0.84]
Related MeSH: ['Vitamin E', 'Cholagogues and Choleretics', 'Anti-Infective Agents, Urinary', 'Anti-Infective Agents, Local', 'Fatty Acids, Omega-3', 'Vitamin B 12', 'Vitamin B Complex', 'Vitamin A', 'Vitamin D', 'Diuretics, Potassium Sparing', 'Vaccines, Virus-Like Particle', 'Amino Acids, Essential', 'Vitamin K 3', 'Vitamin K 2', 'Vitamin B 6', 'Contraceptives, Oral, Hormonal', 'Receptors, OSM-LIF', 'Sali

Get Genes and Variants possibly related to the Query

In [1]:
# Filter and get unique results
query_result = query_dataset(grpm_nutrigen_int, mesh_query, 'mesh')
query_result

NameError: name 'query_dataset' is not defined

In [20]:
get_stats(query_result, "gene", gi_sort=True)

Computing Stats...
runtime:  0:00:02.702022


Unnamed: 0_level_0,rsid,pmid,mesh,topic
Unnamed: 0_level_1,unique,unique,unique,unique
gene,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
FTO,28,95,16,6
MTHFR,56,614,25,9
VDR,152,376,9,7
TCF7L2,9,56,12,6
HFE,11,100,11,7
...,...,...,...,...
UGT1A3,1,1,1,1
MPO,1,1,1,1
UGT1A1,1,1,1,1
AKR1D1,7,1,1,1


In [26]:
## Select specific genes

# Gene Query 
my_genes = "VDR, PNPLA3, PNPLA3"

# Filter and get unique results
gene_panel = query_dataset(query_result, my_genes.split(','), 'gene')
display(gene_panel)

Unnamed: 0,gene,rsid,pmid,mesh,topic,interest_index
12321,VDR,rs987849,34578986,Nutritional Status,General Nutrition,0.78441
12323,VDR,rs987849,34578986,Vitamin D,General Nutrition,0.78441
12324,VDR,rs987849,34578986,Vitamin D Deficiency,General Nutrition,0.78441
12327,VDR,rs987849,22046258,Vitamin D,General Nutrition,0.78441
12332,VDR,rs987849,20086113,Vitamin D,General Nutrition,0.78441
...,...,...,...,...,...,...
1043634,VDR,rs10875694,34578986,Nutritional Status,Food Intolerances,0.02603
1043635,VDR,rs10875693,34578986,Nutritional Status,Food Intolerances,0.02603
1043636,VDR,rs10783219,34578986,Nutritional Status,Food Intolerances,0.02603
1043637,VDR,rs10783218,34578986,Nutritional Status,Food Intolerances,0.02603
