In [1]:
import pandas as pd
from pathlib import Path
import bm25s
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from scipy.stats import rankdata
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

resource module not available on Windows


  from .autonotebook import tqdm as notebook_tqdm


$env:SEMANTIC_SCHOLAR_API_KEY = '***'   # if needed
>>  python -c "import asyncio; from pathlib import Path; from semantic_download import download_results; asyncio.run(download_results(['persistent organic pollutants long-term health effects','POPs exposure chronic health outcomes toxicology epidemiology', 'environmental toxicants bioaccumulation human morbidity consequences'], Path('multi_out.json')))"

In [2]:
def compute_bm25s_scores(query: str, df: pd.DataFrame):
    corpus = (df["title"].astype(str) + ": " + df["abstract"].astype(str)).tolist()
    corpus_tokens = bm25s.tokenize(corpus, stopwords="english")

    retriever = bm25s.BM25(method="lucene")
    retriever.index(corpus_tokens)
    
    # Tokenize query
    query_tokens = bm25s.tokenize(query, stopwords="english")
    
    # Get scores for all documents
    # retrieve returns (docs, scores) but we only need scores
    _, scores = retriever.retrieve(query_tokens, k=len(corpus))
    
    # BM25s returns scores in shape (1, k) - flatten to 1D
    scores = scores.flatten()
    
    # Normalize scores to 0-1 range
    if scores.max() > 0:
        scores_normalized = scores / scores.max()
    else:
        scores_normalized = np.zeros(len(corpus))
    return rankdata(-scores_normalized, "dense")

########################################################

def tfidf_vectorizer(query: str, papers: list[str]):
    if not papers:
        return np.array([])
    corpus = [query] + papers
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(corpus)
    
    similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()

    # Normalize scores to 0-1 range
    scores_normalized = similarities / (similarities.max() or 1)
    return rankdata(-scores_normalized, "dense")


#####################################################

def compute_lsa_scores(query: str, df: pd.DataFrame, n_components: int = 100):
    """
    Computes LSA scores for a query against a dataframe of documents.

    NOTE: This is the 'inefficient' way, as it re-builds the entire
    LSA model for every single query. See the class-based
    example below for a much more efficient, 'correct' implementation.
    """
    
    # 1. Create the corpus
    corpus = (df["title"].astype(str) + ": " + df["abstract"].astype(str)).tolist()
    
    # --- LSA Indexing ---
    # 2. Create the TF-IDF matrix (This is the input to LSA)
    # We filter out very common and very rare words, which helps LSA
    vectorizer = TfidfVectorizer(stop_words='english', max_df=0.8, min_df=5)
    tfidf_matrix = vectorizer.fit_transform(corpus)
    
    # 3. Apply Truncated SVD (This *is* LSA)
    # This finds the 'topics' or 'concepts' in the text
    svd_model = TruncatedSVD(n_components=n_components, random_state=42)
    
    # 4. Create the LSA document matrix (documents transformed into 'topic space')
    lsa_matrix = svd_model.fit_transform(tfidf_matrix)
    
    # --- LSA Querying ---
    # 5. Transform the query into the same LSA 'topic space'
    # 5a. First, get the query's TF-IDF vector
    query_tfidf = vectorizer.transform([query])
    # 5b. Then, transform that vector using the SVD model
    query_lsa = svd_model.transform(query_tfidf)
    
    # 6. Calculate Similarity
    # We use cosine similarity to find documents that are 'close' to
    # the query in the LSA topic space.
    scores = cosine_similarity(query_lsa, lsa_matrix)
    
    # 7. Flatten and Rank
    scores = scores.flatten()
    return rankdata(-scores, "dense")


#################################################################

def compute_bm25_centroid_ranks(df: pd.DataFrame, centroid_k: int = 5):
    """
    Ranks all papers by similarity to the centroid of the
    top 'centroid_k' BM25-ranked papers.
    
    This function is designed to run *after* a 'bm25_rank'
    column already exists on the DataFrame.
    
    It correctly handles missing embeddings by assigning them
    the worst possible rank.
    """
    
    # --- Step 1: Clean the Data ---
    
    # 1a. Extract vectors, safely handling empty dicts
    df['specter_vector'] = df['embedding'].apply(lambda x: x.get('specter'))
    
    # 1b. Create clean df, but *keep the original index*
    # This index is crucial for mapping back to the full df.
    df_clean = df.dropna(subset=['specter_vector']).copy()
    
    # 1c. Create the clean 2D NumPy array
    try:
        embeddings_clean_all = np.stack(df_clean['specter_vector'].values)
    except ValueError as e:
        print(f"Error: Could not stack embeddings. Are they all the same length? {e}")
        # Return a useless rank for all rows if it fails
        return np.full(len(df), len(df), dtype=int)
        
    # --- Step 2: Create Pseudo-Query (Centroid) ---
    
    # 2a. Find top 'k' papers *from df_clean* based on bm25_rank
    # *** FIX: Use ascending=True because rank 1 is the best ***
    top_k_papers = df_clean.sort_values(by="bm25_rank", ascending=True).head(centroid_k)
    
    # 2b. Get the integer row positions (ilocs) of these papers
    # within the 'df_clean' dataframe.
    top_k_ilocs = df_clean.index.get_indexer(top_k_papers.index)

    # 2c. Get vectors using these ilocs from the clean array
    top_k_vectors = embeddings_clean_all[top_k_ilocs]
    
    # 2d. Create the centroid
    centroid_query_vector = np.mean(top_k_vectors, axis=0).reshape(1, -1)
    
    # --- Step 3: Re-rank ---
    
    # 3a. Calculate similarity against all 140 clean embeddings
    scores = cosine_similarity(centroid_query_vector, embeddings_clean_all)
    scores = scores.flatten() # Shape (140,)
    
    # --- Step 4: Map Ranks Back to Original DF (Fixes the ValueError) ---
    
    # 4a. Create a Series of scores, using df_clean's index
    # This aligns the 140 scores with their original 166-based indices
    ranked_scores_series = pd.Series(scores, index=df_clean.index)
    
    # 4b. Reindex to the *full* original df's index (166 rows)
    # This fills the 26 missing rows (with no embeddings) with NaN
    full_scores_series = ranked_scores_series.reindex(df.index)
    
    # 4c. Fill NaN with a very bad score (-1, since similarity is 0-1)
    # These papers couldn't be scored, so they are the least similar.
    full_scores_series = full_scores_series.fillna(-np.inf) 
    
    # 4d. Now, rank this *full* 166-length array
    # The -1 scores will correctly be given the worst (highest) rank.
    return rankdata(-full_scores_series.values, "dense")

#################################################################

def compute_rrf_relevance(df):
    return (
        1 / (df['bm25_rank'] + 60) +
        1 / (df['tfid_rank'] + 60) +
        1 / (df['lsa_rank'] + 60) +
        1 / (df['pseudo_specter_rank'] + 60)
    )


##################################################################

def compute_authority_scores(df: pd.DataFrame):
    """
    Computes authority scores based on influential citations.
    Returns dense ranks (1 = best).
    """
    # Use influentialCitationCount, fallback to citationCount * 0.3
    df['authority_raw'] = df.apply(
        lambda row: np.log10(
            max(row.get('influentialCitationCount', 0), 
                row.get('citationCount', 0) * 0.3) + 1
        ), 
        axis=1
    )
    
    # Rank: higher authority = lower rank number
    return rankdata(-df['authority_raw'].values, method='dense')


################################################################

def compute_recency_scores(df: pd.DataFrame, current_year: int = 2025, lambda_decay: float = 0.1):
    """
    Computes recency scores using exponential decay.
    Returns dense ranks (1 = newest/best).
    
    lambda_decay controls half-life:
    - 0.1 = ~10 year half-life (general science)
    - 0.3 = ~3 year half-life (fast-moving fields like ML)
    """
    df['recency_raw'] = df['year'].apply(
        lambda year: np.exp(-lambda_decay * (current_year - year)) if pd.notna(year) else 0
    )
    
    # Rank: higher recency score = lower rank number
    return rankdata(-df['recency_raw'].values, method='dense')

#######################################################################

def normalize_scores(df: pd.DataFrame, score_columns: list):
    """
    Applies min-max normalization to score columns.
    Handles edge case where max == min.
    """
    for col in score_columns:
        min_val = df[col].min()
        max_val = df[col].max()
        
        if max_val - min_val > 0:
            df[f'{col}_norm'] = (df[col] - min_val) / (max_val - min_val)
        else:
            # All scores are identical - set to 0.5 (neutral)
            df[f'{col}_norm'] = 0.5
    
    return df


In [3]:
def compute_final_ranking(df: pd.DataFrame, 
                          w_relevance: float = 0.60,
                          w_authority: float = 0.35,
                          w_recency: float = 0.05):
    """
    Computes final weighted score from all normalized signals.
    """
    
    # --- THIS IS THE FIX ---
    # 1. Compute the raw RRF SCORE (higher is better)
    df['rrf_relevance_SCORE'] = compute_rrf_relevance(df)
    
    # 2. Convert the SCORE to a RANK (lower is better)
    #    We rank the *negative* score, so the highest score gets rank 1.
    df['rrf_relevance_rank'] = rankdata(-df['rrf_relevance_SCORE'].values, method='dense')
    # --- END FIX ---

    # Compute other raw ranks (lower rank = better)
    df['authority_rank'] = compute_authority_scores(df)
    df['recency_rank'] = compute_recency_scores(df)
    
    # CRITICAL: Normalize ranks directly using min-max
    df = normalize_scores(df, ['rrf_relevance_rank', 'authority_rank', 'recency_rank'])
    
    # Invert normalized ranks so higher = better (this logic is correct)
    df['relevance_score'] = 1.0 - df['rrf_relevance_rank_norm']
    df['authority_score'] = 1.0 - df['authority_rank_norm']
    df['recency_score'] = 1.0 - df['recency_rank_norm']
    
    # Final weighted combination
    df['final_score'] = (
        w_relevance * df['relevance_score'] +
        w_authority * df['authority_score'] +
        w_recency * df['recency_score']
    )
    
    return df.sort_values('final_score', ascending=False)

In [4]:
query =  "long-term health outcomes persistent organic pollutants exposure"

In [5]:
# file_path = Path.cwd().parents[0].joinpath("searches").glob("*.json")
file_path = Path.cwd().parents[0].joinpath("multi_out.json")
# df = pd.concat([pd.read_json(i) for i in file_path]).reset_index(drop = True)
#     # .drop(columns = ["paperId", "openAccessPdf", "authors", "venue"])
df = pd.read_json(file_path)

df.sample(5)

Unnamed: 0,paperId,title,abstract,year,citationCount,influentialCitationCount,openAccessPdf,embedding,authors,venue,externalIds
103,9783cd995efc3397af25f93e6efa3e7b932b73b7,The epidemiology of poisoning and overdose in ...,Poisoning is a major public health concern. In...,2024,4,0,{'url': 'https://oss.signavitae.com/mre-signav...,"{'specter': [-6.292193412780762, -6.3891000747...","[{'authorId': '4835619', 'name': 'Bader A. Aly...",Signa Vitae,"{'DOI': '10.22514/sv.2024.035', 'CorpusId': 26..."
272,0fcd5a2c84dbcbeed6774f6c636284c6990824db,Opinion: Sustainable development must account ...,The United Nations (UN) launched the 2030 Agen...,2020,263,6,{'url': 'https://www.pnas.org/content/pnas/117...,"{'specter': [1.224358201026916, -2.33797121047...","[{'authorId': '5140577', 'name': 'M. Di Marco'...",Proceedings of the National Academy of Science...,"{'MAG': '3006334980', 'DOI': '10.1073/pnas.200..."
221,43e4b4a7081ace0be846fa95ed543a4c8ddfb7c6,Bioaccumulation and biological effects of ciga...,Marine debris is a global environmental issue....,2015,116,3,{'url': 'https://www.nature.com/articles/srep1...,"{'specter': [-1.39003300666809, -3.19305634498...","[{'authorId': '9022451', 'name': 'S. Wright'},...",Scientific Reports,"{'MAG': '2228573600', 'PubMedCentral': '456989..."
237,17d1db0af0504080f12e6afd94653f2b28240f88,Chronological Trends and Mercury Bioaccumulati...,Simple Summary Managing aquatic systems is bec...,2021,7,0,{'url': 'https://www.mdpi.com/2076-2615/11/8/2...,"{'specter': [-2.694979429244995, -4.9451875686...","[{'authorId': '143738405', 'name': 'F. Morgado...",Animals,"{'PubMedCentral': '8388643', 'DOI': '10.3390/a..."
21,e54f60ff5b009a342eff125f234ff75085e3065e,Research progress of persistent organic pollut...,\n \n The occurrence of persistent organic pol...,2024,15,0,{'url': 'https://iwaponline.com/wpt/article-pd...,"{'specter': [1.019235610961914, -2.49191117286...","[{'authorId': '2181026966', 'name': 'Aindrila ...",Water Practice &amp; Technology,"{'DOI': '10.2166/wpt.2024.031', 'CorpusId': 26..."


In [6]:
df = df.assign(tfid_rank = lambda df: tfidf_vectorizer(query, df.title.to_list()),
          bm25_rank = lambda df: compute_bm25s_scores(query, df),
          lsa_rank = lambda df: compute_lsa_scores(query, df),
          pseudo_specter_rank = lambda df: compute_bm25_centroid_ranks(df),
          rrf_score = lambda df: compute_rrf_relevance(df),
          authority_rank = lambda df: compute_authority_scores(df),
          recency_rank = lambda df: compute_recency_scores(df, 2025)
         )
df

                                                                                                                       

Unnamed: 0,paperId,title,abstract,year,citationCount,influentialCitationCount,openAccessPdf,embedding,authors,venue,...,tfid_rank,bm25_rank,lsa_rank,specter_vector,pseudo_specter_rank,rrf_score,authority_raw,authority_rank,recency_raw,recency_rank
0,84ee677a9ad3d62ce40fcdb7af05f2f0553687e4,A comprehensive review of the human body burde...,Guiyu has been one of the world’s largest dest...,2023,6,0,{'url': 'https://link.springer.com/content/pdf...,"{'specter': [-0.9831301569938661, -3.615358352...","[{'authorId': '153383503', 'name': 'Wenlong Hu...",Discover Environment,...,55,1,24,"[-0.9831301569938661, -3.615358352661133, 1.56...",4,0.052619,0.447158,106,0.818731,2
1,6668f9be10764376363e308f1ddb732bac5836b2,Persistent Organic Pollutants in Environment a...,Persistent organic pollutants (POPs) are often...,2023,0,0,{'url': 'https://savvysciencepublisher.com/ind...,"{'specter': [1.635173320770263, -3.62136530876...","[{'authorId': '12488370', 'name': 'F. Mir'}]",Journal of Environmental Science and Engineeri...,...,1,2,8,"[1.635173320770263, -3.621365308761596, 3.2358...",1,0.063622,0.000000,113,0.818731,2
2,ae1bfc377e3db5f7ef42e7af6963e40f6169fff8,Long-term Persistent Organic Pollutants Exposu...,Abstract Environmentally persistent organic po...,2018,44,2,{'url': 'https://academic.oup.com/biomedgeront...,"{'specter': [1.056381821632385, -4.94490575790...","[{'authorId': '120766307', 'name': 'Jinghua Yu...","The journals of gerontology. Series A, Biologi...",...,4,3,17,"[1.056381821632385, -4.944905757904053, 2.2588...",6,0.059637,1.152288,71,0.496585,7
3,262d55ba1fbea52d556eb108ecd6f4f80b83da73,For a healthier future: a virtuous cycle for r...,"In the modern society, people are exposed to v...",2017,16,0,{'url': 'https://jech.bmj.com/content/jech/71/...,"{'specter': [-0.44689041376113803, -6.43197727...","[{'authorId': '50853004', 'name': 'C. Mori'}, ...",Journal of Epidemiology and Community Health,...,28,4,13,"[-0.44689041376113803, -6.431977272033691, -0....",8,0.055393,0.763428,96,0.449329,8
4,54890cd8575c6d1ed7510a2f833aaa3249e83b43,Bias from conditioning on live birth in pregna...,Only 60-70% of fertilized eggs may result in a...,2015,208,7,{'url': 'https://escholarship.org/content/qt35...,{},"[{'authorId': '5825509', 'name': 'Z. Liew'}, {...",International Journal of Epidemiology,...,93,5,53,,219,0.034354,1.802089,23,0.367879,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
290,f511233f6fc50df63609dbb33f6b18531cf127a8,Molecular Mechanisms Associated with ROS-Depen...,"Currently, atherosclerosis, which affects the ...",2021,26,0,{'url': 'https://www.mdpi.com/2076-3921/10/5/7...,"{'specter': [1.989205718040466, -8.59575557708...","[{'authorId': '33968536', 'name': 'Greg Hutchi...",Antioxidants,...,186,260,266,"[1.989205718040466, -8.595755577087402, -0.260...",212,0.013934,0.944483,86,0.670320,4
291,52dd6f95e5a89993e6514860bd63c118c90b6d01,Exposure to violence during childhood is assoc...,,2013,477,17,{'url': 'https://www.nature.com/articles/mp201...,{},"[{'authorId': '48154457', 'name': 'I. Shalev'}...",Molecular Psychiatry,...,154,260,203,,219,0.015184,2.158664,10,0.301194,12
292,e149feec57faf780d6fc0d246c6344bc6a87cf26,"Human Excretion of Bisphenol A: Blood, Urine, ...",Background. Bisphenol A (BPA) is an ubiquitous...,2011,173,2,{'url': 'http://www.ec.gc.ca/lcpe-cepa/79A93C6...,"{'specter': [-0.22690190374851202, -4.84392166...","[{'authorId': '3702090', 'name': 'S. Genuis'},...",Journal of Environmental and Public Health,...,186,260,241,"[-0.22690190374851202, -4.843921661376953, 2.1...",89,0.017224,1.723456,27,0.246597,14
293,ee8035c029d6121ea8c6c6ddc2263d4306380d38,Ecophysiological determinants of the human ske...,This review assessed relationships between ext...,2018,2,0,{'url': 'https://journals.viamedica.pl/medical...,{},"[{'authorId': '47777253', 'name': 'P. Kamiński...",Medical Research Journal,...,186,260,125,,219,0.016180,0.204120,111,0.496585,7


In [14]:
compute_final_ranking(df).abstract.to_list()[2]
    # .openAccessPdf.to_list()

"Persistent organic pollutants (POPs) and related chemicals are fascinating because of their combination of physical-chemical properties and complex effects. Most are man-made, but some also have natural origins. They are persistent in the environment, but they can be broken down variously by biodegradation, atmospheric reactions, and abiotic transformations. They can exist in the gas or particle phases, or both, in the atmosphere and in the dissolved or particulate phases, or both, in water. These combinations mean that they may undergo long-range transport in the atmosphere or oceans, or they may stay close to sources. Hence, emissions from one country are frequently a source of contamination to another country. They are also usually lipophilic, so-combined with persistence-this means they can accumulate in organisms and biomagnify through food chains. We all have a baseline of POPs residues in our tissues, even the unborn fetus via placental transfer and the newly born baby via moth

In [21]:
compute_final_ranking(df).drop(columns = ["embedding", "authors", "openAccessPdf", "externalIds", "venue", "specter_vector"]).style.set_sticky()

Unnamed: 0,paperId,title,abstract,year,citationCount,influentialCitationCount,tfid_rank,bm25_rank,lsa_rank,pseudo_specter_rank,rrf_score,authority_raw,authority_rank,recency_raw,recency_rank,rrf_relevance_SCORE,rrf_relevance_rank,rrf_relevance_rank_norm,authority_rank_norm,recency_rank_norm,relevance_score,authority_score,recency_score,final_score
22,42352cc27c1598dc21f0fa7ecdf87f41f1979516,"Persistent Organic Pollutants in Food: Contamination Sources, Health Effects and Detection Methods","Persistent organic pollutants (POPs) present in foods have been a major concern for food safety due to their persistence and toxic effects. To ensure food safety and protect human health from POPs, it is critical to achieve a better understanding of POP pathways into food and develop strategies to reduce human exposure. POPs could present in food in the raw stages, transferred from the environment or artificially introduced during food preparation steps. Exposure to these pollutants may cause various health problems such as endocrine disruption, cardiovascular diseases, cancers, diabetes, birth defects, and dysfunctional immune and reproductive systems. This review describes potential sources of POP food contamination, analytical approaches to measure POP levels in food and efforts to control food contamination with POPs.",2019.0,302.0,6.0,26.0,23.0,30.0,219.0,0.038371,1.961895,13.0,0.548812,6.0,0.038371,33.0,0.108844,0.107143,0.208333,0.891156,0.892857,0.791667,0.886777
6,7f4be17e4f4436c1120eefcad95c933cac6af700,Respiratory health and indoor air pollutants based on quantitative exposure assessments,,2012.0,235.0,5.0,59.0,7.0,48.0,59.0,0.040991,1.854306,20.0,0.272532,13.0,0.040991,21.0,0.068027,0.169643,0.5,0.931973,0.830357,0.5,0.874809
60,5c3bc4a57370cdd9c089c56fc6e2a1201c001cca,Persistent Organic Pollutants (POPs) and Related Chemicals in the Global Environment: Some Personal Reflections.,"Persistent organic pollutants (POPs) and related chemicals are fascinating because of their combination of physical-chemical properties and complex effects. Most are man-made, but some also have natural origins. They are persistent in the environment, but they can be broken down variously by biodegradation, atmospheric reactions, and abiotic transformations. They can exist in the gas or particle phases, or both, in the atmosphere and in the dissolved or particulate phases, or both, in water. These combinations mean that they may undergo long-range transport in the atmosphere or oceans, or they may stay close to sources. Hence, emissions from one country are frequently a source of contamination to another country. They are also usually lipophilic, so-combined with persistence-this means they can accumulate in organisms and biomagnify through food chains. We all have a baseline of POPs residues in our tissues, even the unborn fetus via placental transfer and the newly born baby via mother's milk. POPs in biological systems occur in mixtures, so confirming effects caused by POPs on humans and other top predators is never straightforward. Depending on which papers you read, POPs may be relatively benign, or they could be responsible for key subchronic and chronic effects on reproductive potential, on immune response, as carcinogens, and on a range of behavioral and cognitive end points. They could be a factor behind diseases and conditions which have been increasingly reported and studied in modern societies. In short, they are endlessly fascinating to scientists and a nightmare to regulators and policy makers.",2021.0,163.0,1.0,60.0,61.0,28.0,13.0,0.04166,1.698101,29.0,0.67032,4.0,0.04166,17.0,0.054422,0.25,0.125,0.945578,0.75,0.875,0.873597
63,bf2aa4e96ecda59a71cae872d9fedbb41761be0d,"Demographic consequences of heavy metals and persistent organic pollutants in a vulnerable long-lived bird, the wandering albatross",,2014.0,142.0,5.0,37.0,63.0,7.0,77.0,0.040664,1.639486,36.0,0.332871,11.0,0.040664,24.0,0.078231,0.3125,0.416667,0.921769,0.6875,0.583333,0.822853
4,54890cd8575c6d1ed7510a2f833aaa3249e83b43,Bias from conditioning on live birth in pregnancy cohorts: an illustration based on neurodevelopment in children after prenatal exposure to organic pollutants.,"Only 60-70% of fertilized eggs may result in a live birth, and very early fetal loss mainly goes unnoticed. Outcomes that can only be ascertained in live-born children will be missing for those who do not survive till birth. In this article, we illustrate a common bias structure (leading to 'live-birth bias') that arises from studying the effects of prenatal exposure to environmental factors on long-term health outcomes among live births only in pregnancy cohorts. To illustrate this we used prenatal exposure to perfluoroalkyl substances (PFAS) and attention-deficit/hyperactivity disorder (ADHD) in school-aged children as an example. PFAS are persistent organic pollutants that may impact human fecundity and be toxic for neurodevelopment. We simulated several hypothetical scenarios based on characteristics from the Danish National Birth Cohort and found that a weak inverse association may appear even if PFAS do not cause ADHD but have a considerable effect on fetal survival. The magnitude of the negative bias was generally small, and adjusting for common causes of the outcome and fetal loss can reduce the bias. Our example highlights the need to identify the determinants of pregnancy loss and the importance of quantifying bias arising from conditioning on live birth in observational studies.",2015.0,208.0,7.0,93.0,5.0,53.0,219.0,0.034354,1.802089,23.0,0.367879,10.0,0.034354,52.0,0.173469,0.196429,0.375,0.826531,0.803571,0.625,0.808418
66,266f0151744e9adcac5922faf10634d477e0d390,"Persistent organic pollutants (POPs): a global issue, a global challenge",,2017.0,235.0,4.0,61.0,66.0,18.0,219.0,0.032606,1.854306,20.0,0.449329,8.0,0.032606,64.0,0.214286,0.169643,0.291667,0.785714,0.830357,0.708333,0.79747
14,f81a696a6de52f462c79912057fbbe2586d458cb,Most unwanted.,"Persistent organic pollutants (POPs) can travel thousands of miles, accumulate in the food chain, and persist in the environment, taking as long as centuries to degrade. POPs are known to play a role in birth defects, cancer, immune system dysfunction, and reproductive problems in wildlife. While the effects of POPs on human health are unclear, many researchers believe that long-term exposure contributes to increasing rates of birth defects, fertility problems, greater susceptibility to disease, diminished intelligence, and certain cancers. Twelve POPs have been identified by the United Nations Environment Programme as requiring urgent regulatory attention. They include the pesticides aldrin, chlordane, DDT, dieldrin, endrin, heptachlor, hexachlorobenzene, mirex, and toxaphene. Of the remaining three POPs, polychlorinated biphenyls are industrial products (used in electrical transformers), and dioxins and furans are unintentional by-products of industrial processes.",1999.0,120.0,4.0,186.0,15.0,62.0,7.0,0.04052,1.568202,39.0,0.074274,25.0,0.04052,25.0,0.081633,0.339286,1.0,0.918367,0.660714,0.0,0.78227
2,ae1bfc377e3db5f7ef42e7af6963e40f6169fff8,Long-term Persistent Organic Pollutants Exposure Induced Telomere Dysfunction and Senescence-Associated Secretary Phenotype,"Abstract Environmentally persistent organic pollutant (POP) is the general term for refractory organic compounds that show long-range atmospheric transport, environmental persistence, and bioaccumulation. It has been reported that the accumulation of POPs could lead to cellular DNA damage and adverse effects of on metabolic health. To better understand the mechanism of the health risks associated with POPs, we conducted an evidence-based cohort investigation (n = 5,955) at the Jinghai e-waste disposal center in China from 2009 to 2016, where people endure serious POP exposure. And high levels of aging-related diseases, including hypertension, diabetes, autoimmune diseases, and reproductive disorders were identified associated with the POP exposure. In the subsequent molecular level study, an increased telomere dysfunction including telomere multiple telomere signals, telomere signal-free ends, telomere shortening and activation of alternative lengthening of telomeres were observed, which might result from the hypomethylated DNA modification induced telomeric repeat-containing RNA overexpression. Moreover, dysfunctional telomere-leaded senescence-associated secretory phenotype was confirmed, as the proinflammatory cytokines and immunosenescence hallmarks including interleukin-6, P16INK4a, and P14ARF were stimulated. Thus, we proposed that the dysfunctional telomere and elevated systemic chronic inflammation contribute to the aging-associated diseases, which were highly developed among the POP exposure individuals.",2018.0,44.0,2.0,4.0,3.0,17.0,6.0,0.059637,1.152288,71.0,0.496585,7.0,0.059637,3.0,0.006803,0.625,0.25,0.993197,0.375,0.75,0.764668
135,9be10d03f4c940e80c6812da22ce7e02b031d5eb,Effects of Neonicotinoid Pesticide Exposure on Human Health: A Systematic Review,"Background: Numerous studies have identified detectable levels of neonicotinoids (neonics) in the environment, adverse effects of neonics in many species, including mammals, and pathways through which human exposure to neonics could occur, yet little is known about the human health effects of neonic exposure. Objective: In this systematic review, we sought to identify human population studies on the health effects of neonics. Methods: Studies published in English between 2005 and 2015 were searched using PubMed, Scopus, and Web of Science databases. No restrictions were placed on the type of health outcome assessed. Risk of bias was assessed using guidance developed by the National Toxicology Program’s Office of Health Assessment and Translation. Results: Eight studies investigating the human health effects of exposure to neonics were identified. Four examined acute exposure: Three neonic poisoning studies reported two fatalities (n = 1,280 cases) and an occupational exposure study of 19 forestry workers reported no adverse effects. Four general population studies reported associations between chronic neonic exposure and adverse developmental or neurological outcomes, including tetralogy of Fallot (AOR 2.4, 95% CI: 1.1, 5.4), anencephaly (AOR 2.9, 95% CI: 1.0, 8.2), autism spectrum disorder [AOR 1.3, 95% credible interval (CrI): 0.78, 2.2], and a symptom cluster including memory loss and finger tremor (OR 14, 95% CI: 3.5, 57). Reported odds ratios were based on exposed compared to unexposed groups. Conclusions: The studies conducted to date were limited in number with suggestive but methodologically weak findings related to chronic exposure. Given the wide-scale use of neonics, more studies are needed to fully understand their effects on human health. Citation: Cimino AM, Boyles AL, Thayer KA, Perry MJ. 2017. Effects of neonicotinoid pesticide exposure on human health: a systematic review. Environ Health Perspect 125:155–162; http://dx.doi.org/10.1289/EHP515",2016.0,359.0,4.0,84.0,134.0,41.0,119.0,0.027587,2.03623,11.0,0.40657,9.0,0.027587,93.0,0.312925,0.089286,0.333333,0.687075,0.910714,0.666667,0.764328
76,0ae1f5a9b109c1b488412a2a35d98be5d93f42a4,"A review of persistent organic pollutants: dioxins, furans, and their associated nitrogenated analogues",,2020.0,54.0,0.0,62.0,76.0,3.0,25.0,0.043187,1.235528,66.0,0.606531,5.0,0.043187,14.0,0.044218,0.580357,0.166667,0.955782,0.419643,0.833333,0.762011
