# General Utilities

In [None]:
import os
import io
import zipfile
import requests

def simple_bool(message):
    choose = input(message+" (y/n): ").lower()
    your_bool = choose in ["y", "yes","yea","sure"]
    return your_bool

def get_file(url, file_name, dir = os.getcwd()):
    url = url
    file_name = file_name
    response = requests.get(url)
    if response.status_code == 200:
        content = response.content
        file_path = os.path.join(dir, file_name)
        with open(file_path, 'wb') as file:
            file.write(content)

# Download and Exrtact zip file from Zenodo

In [None]:
def get_and_extract_zenodo(file, dir = os.getcwd(), ext = '.zip'):
    url='https://zenodo.org/record/8205724/files/'+file+'.zip?download=1'
    zip_file_name = file+ext
    extracted_folder_name = dir
    # Download the ZIP file
    response = requests.get(url)
    if response.status_code == 200:
        # Extract the ZIP contents
        with io.BytesIO(response.content) as zip_buffer:
            with zipfile.ZipFile(zip_buffer, 'r') as zip_ref:
                zip_ref.extractall(extracted_folder_name)
        print(f"ZIP file '{zip_file_name}' extracted to '{extracted_folder_name}' successfully.")
    else:
        print("Failed to download the ZIP file.")

# Download single GitHub file from repository

In [None]:
import requests

def get_gitfile(url, flag =''):
    url = url.replace('blob','raw')
    response = requests.get(url)
    filename = flag + url.rsplit('/',1)[1]
    if response.status_code == 200:
        with open(filename, 'wb') as file:
            file.write(response.content)
        print(f"File downloaded successfully. Saved as {filename}")
    else:
        print("Unable to download the file.")



# CSV Datasets

In [None]:
# Download the file
file1 = "https://github.com/SeniorMars/pokemon-csv/blob/master/pokemon.csv"
file2 = "https://github.com/zehnzwanzig/PokemonGo_CSV/blob/master/pokemon.csv"
file3 = ''

get_gitfile(file1, 'base_')
get_gitfile(file2, 'go_')
get_gitfile(file3, 'new_')

# Check files in folder (with extension)

In [None]:
import glob
import pandas as pd

def display_file(ext, folder_path=os.getcwd(), contains=''):
    file_pattern = os.path.join(folder_path, "*."+ext)
    csv_files = glob.glob(file_pattern)
    csv_files_name = []
    for file in csv_files:
        file_name = os.path.basename(file)
        csv_files_name.append(file_name)

    print('Available .'+ext+' files:')
    files_df = pd.Series(csv_files_name)
    file = files_df[files_df.str.contains(contains)]
    print(file)

display_file('csv')

# Display CSV

In [None]:
import pandas as pd
poke_base = pd.read_csv('base_pokemon.csv')
poke_go = pd.read_csv('go_pokemon.csv', encoding='latin-1')
display(poke_go, poke_base)

# Get pychatgpt

In [None]:
# get & import pychatgpt (openai based module)
if simple_bool('Do you have an openai API-key?'):
    # Get pychatgpt at: https://github.com/johndef64/pychatgpt.git
    get_file(url="https://raw.githubusercontent.com/johndef64/pychatgpt/main/pychatgpt.py", file_name='pychatgpt.py')

    import pychatgpt as op
    # Example usage
    message = "Describe Nutrigenetics"
    response = op.send_message_gpt(message)

else:
    print('get your api-key at https://platform.openai.com/account/api-keys\n'
          'or simply use web playground at https://platform.openai.com/playground?model=gpt-3.5-turbo-16k')

In [None]:
import pychatgpt as op

op.send_message_gpt('''


poke_base = pd.read_csv('base_pokemon.csv')
poke_go = pd.read_csv('go_pokemon.csv', encoding='utf-8')
display(poke_go, poke_base)


UnicodeDecodeError: 'utf-8' codec can't decode byte 0x91 in position 8: invalid start byte''')

# Prompt for keyword-mesh

In [None]:
# Define Biomedical topics
nutritional_topic = [['diseases and disorders realted to nutrition and diet ', 'diet, food consuption, eating behaviour and nutrition']]
infective_topic = [['infective agents, bacteria, virus and protozoan','infective diseases']]
reproductive_topic = [['reproductive system physiology','reproductive system pathology', 'Assisted reproductive technology']]
female_infertility_topic = [['female infertility, genetic imprinting and maternal effect']]
special_issue = [['Diagnosis and Therapies for Genetic Diseases']]

nutritional_topics = [
    ['Obesity, overweight and body weight control', 'compulsive eating behavior'],
    ['cardiovascular diseases','physiological processes realted to cardiovascular diseases','lipid metabolism in the context of cardiovascular diseases'],
    ['Diabetes Melitus Type II and metabolic syndrome'],
    ['Vitamin metabolism and Vitamins recommended intake levels','Micronutrients metabolism and Micronutrient recommended intake levels', 'disease related to vitamins and micronutrients deficiency'],
    ['eating behaviour and taste sensation'],
    ['food intolerances'],
    ['food allergies'],
    ['diet-induced oxidative stress'],
    ['metabolism of xenobiotics'],
]
chosen_topic = special_issue
pd.Series(chosen_topic)

In [None]:
# GPT prompts

# parameters--------------------------------------
object1 = 'Pubmed MeSH terms'
object2 = 'Pubmed keywords'

object= object1
num_mesh = 100
topics = chosen_topic
topic_id = 0
#-----------------------------------------------
topic_01  = topics[topic_id][0]
topic_02  = topics[topic_id][1] if len(topics[topic_id])>=2 else None
topic_03  = topics[topic_id][2] if len(topics[topic_id])>=3 else None

format = {'list': ". Create a python list format like this:\n gpt_01 = [\"term1\",\n \"term2\",\n \"term3\",...]",
          'csv':  ". Create a CSV file like this:\n gpt_terms,\n \"term1\",\n \"term2\",\n \"term3\", ..."}
format = format['csv']

prompt_01 = "give me a comprehensive list of "+str(num_mesh)+" real "+object+" terms related to "+ topic_01+format+"\n"
prompt_02 = "give me a comprehensive list of "+str(num_mesh)+" real "+object+" terms related to "+ topic_02 +format+"\n" if len(topics[topic_id])>=2 else None
prompt_03 = "give me a comprehensive list of "+str(num_mesh)+" real "+object+" terms related to "+ topic_03 +format+"\n" if len(topics[topic_id])>=3 else None

prompts = [prompt_01, prompt_02, prompt_03]
# If you do not have an openai API key, paste these prompts at https://platform.openai.com/playground?model=gpt-3.5-turbo-16k

import pyperclip
pyperclip.copy(prompt_01)
pyperclip.copy(prompt_01+prompt_02) if len(topics[topic_id])>=2 else None
pyperclip.copy(prompt_01+prompt_02+prompt_03) if len(topics[topic_id])>=3 else None

print('prompt_01:',prompt_01)

In [None]:
# get GPT-terms
import pychatgpt as op
op.conversation_gpt =[]
response = op.send_message_gpt(prompt_01, model='gpt-4', maxtoken=2000)
#response = op.send_message_gpt('clearchat')

print('''\n\nGet the GPT terms from 'conversation_log.txt'
=> save them manually in csv format in "ref-mesh-archive/gpt_terms/yourterms.csv"''')

In [None]:
file_path = 'special_issue_2.csv'
content = '''gpt_terms
"Genetic Diseases Diagnosis",
"Genetic Testing",
"Molecular Diagnostics",
"Genetic Screenings",
"DNA Sequencing",
"Genome Mapping",
"Chromosomal Abnormalities",
"Prenatal Diagnosis",
"Newborn Screening",
"Personalized Medicine",
"Genetic Counseling",
"Carrier Testing",
"Genomic Medicine",
"Pharmacogenetics",
"Predictive Testing",
"Presymptomatic Testing",
"Biochemical Testing",
"Genetic Therapies",
"Gene Therapy",
"Gene Editing",
"CRISPR-Cas9",
"Stem Cell Therapy",
"RNA Therapy",
"Genetic Surgery",
"Molecular Therapy",
"Enzyme Replacement Therapy",
"Antisense Therapy",
"Gene Silencing",
"Genetic Vaccine",
"Pharmacological Chaperones"
'''
def write_to_file(file_path, content):
    with open(file_path, 'w') as file:
        file.write(content)

write_to_file(file_path, content)

In [None]:
op.send_message_gpt('how to say, if a apackahger is not instaled, intall it?')

# open with notepad, subprocess

In [None]:
import subprocess

def open_in_notepadpp(file_path):
    notepadpp_path = r"C:\Program Files\Notepad++\notepad++.exe"  # Path to Notepad++ executable
    subprocess.Popen([notepadpp_path, file_path])

# Usage
file_path = r"conversation_log.txt"  # Replace with the actual file path
open_in_notepadpp(file_path)

# GET ZENODO NBIB Dataset (full)

In [None]:
import os
import io
import sys
import glob
import zipfile
import requests
from datetime import datetime

def simple_bool(message):
    choose = input(message+" (y/n): ").lower()
    your_bool = choose in ["y", "yes","yea","sure"]
    return your_bool

def get_and_extract(file, dir = os.getcwd(), ext = '.zip'):
    url='https://zenodo.org/record/8205724/files/'+file+'.zip?download=1'
    zip_file_name = file+ext
    extracted_folder_name = dir
    # Download the ZIP file
    response = requests.get(url)
    if response.status_code == 200:
        # Extract the ZIP contents
        with io.BytesIO(response.content) as zip_buffer:
            with zipfile.ZipFile(zip_buffer, 'r') as zip_ref:
                zip_ref.extractall(extracted_folder_name)
        print(f"ZIP file '{zip_file_name}' extracted to '{extracted_folder_name}' successfully.")
    else:
        print("Failed to download the ZIP file.")

if simple_bool('Download nbib-data from Zenodo?\n (careful! 5GB unpacked)'):
    timea = datetime.now()
    get_and_extract('nbib_data')
    print('Download and extraction time ',datetime.now()-timea)

#set source dataset:-----------------------
db_tag = 'pcg'
db_name = 'grpm_db_' + db_tag
db_path = 'grpm_dataset/'+db_name

time1 = datetime.now()
#import gene-fullnbib
dummy_nbib = pd.read_csv(db_path+'/complete_nbibtable.csv', index_col=0)
dummy_nbib['pubmed_id'] = dummy_nbib['pubmed_id'].astype(str)
time2 = datetime.now()
print('time import nbib: ', time2-time1)
print(dummy_nbib.memory_usage().sum() / 1024 / 1024, 'MB')

display(dummy_nbib)