In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
import gensim
import operator
from gensim.models import Word2Vec
import gensim.downloader as api
from gensim.models import KeyedVectors
from gensim.parsing.preprocessing import strip_punctuation
print((gensim.__version__))  # needed 3.8.3   -> pip install gensim==3.8.3
from sklearn.linear_model import LogisticRegression
import functions
import re
from itertools import chain
from collections import Counter
import tqdm
from tqdm import tqdm

4.2.0


## Upload sources - trained models, data

In [3]:
train_year_of_citations = 2021

# which articles based on year of publication will be selected for training and testing
min_train_year_published = 2019
max_train_year_published = 2020
min_pred_year_published = 2022
max_pred_year_published = 2022

embeddings_from_year = 2019
embeddings_to_year = 2022

classifier = "lr"  # or "rf"

In [4]:
df_all = pd.read_csv("1.Preprocessing_outputs/df_sw_tok_low_punc_lemm_v7.csv").rename(columns = {'doi_x':'doi'})

In [None]:
df_all[df_all["abstract_cleaned"].str.contains("pada")]

In [4]:
df_all_from = df_all[(df_all['Year']<=2022) & (df_all['Year']>=2021)]

# Upload models

In [6]:
lreg_w2v_avg = pickle.load(open('3.Classifiers_outputs/train_'+str(train_year_of_citations)+'/'+'lreg_w2v_avg_'+str(train_year_of_citations)+'.sav', 'rb'))
model_w2v = gensim.models.Word2Vec.load("2.Train_embeddings_outpus/w2v_published_only_english_between_"+str(embeddings_from_year) + " and "+ str(embeddings_to_year)+".model")

# Importance of lreg w2v

In [7]:
#### words of word2vec model whole dictionary - based on published articles 2019-2022 
words = model_w2v.wv.key_to_index.keys()
we_dict = {word:model_w2v.wv[word] for word in words}
words_list_total = pd.DataFrame(we_dict.items())
print(len(words_list_total))

90918


In [8]:
score_of_word =  functions.score_of_word(model_w2v,lreg_w2v_avg).sort_values(by = ["score"], ascending=False)

### Add cnt of articles and first year

In [9]:
def add_cnt_info(score_of_word, df_all, top_n = 40000):

    final_results_of_select = score_of_word[score_of_word["word"].isin(list(words_list_total[0].values))]
    df_all["abstract_cleaned_tok"] = functions.tokenized_column(df_all["abstract_cleaned"])
    df_all_list_of_lists = list(df_all["abstract_cleaned_tok"].values)

    corpus = df_all.abstract_cleaned
    words = ' '.join(corpus)
    output = Counter(words.split()).most_common()
    cnt_in_all_articles = pd.DataFrame(output,columns=["index","cnt_in_all_articles"])
    
    cnt = dict(Counter(chain.from_iterable(set(l) for l in df_all_list_of_lists)))
    cnt_articles = pd.DataFrame(cnt,index=["cnt_of_articles"]).transpose().reset_index()
    
    final_results_of_select = pd.merge(final_results_of_select, cnt_in_all_articles, left_on=  ['word'],
                   right_on= ['index'], 
                   how = 'left')
    
    final_results_of_select = pd.merge(final_results_of_select, cnt_articles, left_on=  ['word'],
                   right_on= ['index'], 
                   how = 'left')
    
    final_results_of_select = final_results_of_select[["word","score","cnt_in_all_articles","cnt_of_articles"]]
    
    top_df = final_results_of_select[:top_n]
 
    return top_df

In [10]:
top_df = add_cnt_info(score_of_word, df_all, top_n = 80000)

###  Split because of memory of server

In [11]:
top_df[0:22000].to_csv("top_df_0_22000.csv")
top_df[22000:40000].to_csv("top_df_22000_40000.csv")
top_df[40000:60000].to_csv("top_df_40000_60000.csv")
top_df[60000:80000].to_csv("top_df_60000_80000.csv")

### Run separatelly

In [10]:
top_df = pd.read_csv("top_df_0_22000.csv")

In [6]:
top_df = pd.read_csv("top_df_22000_40000.csv")

In [6]:
top_df = pd.read_csv("top_df_40000_60000.csv")

In [6]:
top_df = pd.read_csv("top_df_60000_80000.csv")

### Add first year

In [7]:
def score_info_wo_target_w2(top_df, df_all, top_n = 40000):
    
    from sklearn.feature_extraction.text import CountVectorizer
    cvec = CountVectorizer(analyzer = "word", tokenizer=lambda txt: txt.split(), 
                       ngram_range=(1,1),
                       binary= True,
                       min_df = 1
                      ) 
    matrix_bow_train = cvec.fit_transform(df_all['abstract_cleaned'])
    tokens_bow_train = cvec.get_feature_names()
    matrix_bow_train_pd = pd.DataFrame.sparse.from_spmatrix(matrix_bow_train, columns = tokens_bow_train,index=df_all.Year)
    matrix_bow_train_pd = matrix_bow_train_pd[[col for col in matrix_bow_train_pd.columns if col in list(top_df.word.values)]]
    matrix_bow_train_pd = matrix_bow_train_pd.reset_index()
    
    import gc
    gc.collect()
    import ctypes
    libc = ctypes.CDLL("libc.so.6") # clearing cache 
    libc.malloc_trim(0)
    
    for col in tqdm(matrix_bow_train_pd.columns[1:(int(top_n))]):
        matrix_bow_train_pd[col] = matrix_bow_train_pd[col]*matrix_bow_train_pd['Year']
        #matrix_bow_train_pd[col] = np.where(matrix_bow_train_pd[col]==1,matrix_bow_train_pd['Year'],0)
        
    matrix_bow_train_pd=matrix_bow_train_pd.mask(matrix_bow_train_pd==0)
    matrix_bow_train_pd=matrix_bow_train_pd.fillna(10000)
    min_df = matrix_bow_train_pd.min()

    return pd.merge(top_df,min_df.reset_index(), left_on=['word'], right_on= ['index'],  how = 'left')

In [8]:
score_of_word_with_info = score_info_wo_target_w2(top_df, df_all, top_n = len(top_df))

100%|██████████| 19998/19998 [00:53<00:00, 375.26it/s]


### Save separetly (because of memory usage I do it separetly by 20 000 words)

In [13]:
score_of_word_with_info.to_csv("word_score_info_first_22000.csv")

In [9]:
score_of_word_with_info.to_csv("word_score_info_first_22000_40000.csv")

In [9]:
score_of_word_with_info.to_csv("word_score_info_first_40000_60000.csv")

In [9]:
score_of_word_with_info.to_csv("word_score_info_first_60000_80000.csv")

### Download all 

In [10]:
score_of_word_with_info1 = pd.read_csv("word_score_info_first_22000_40000.csv")[["word","score","cnt_of_articles","cnt_in_all_articles","0"]]
score_of_word_with_info2 = pd.read_csv("word_score_info_first_22000.csv")[["word","score","cnt_of_articles","cnt_in_all_articles","0"]]
score_of_word_with_info3 = pd.read_csv("word_score_info_first_40000_60000.csv")[["word","score","cnt_of_articles","cnt_in_all_articles","0"]]
score_of_word_with_info4 = pd.read_csv("word_score_info_first_60000_80000.csv")[["word","score","cnt_of_articles","cnt_in_all_articles","0"]]


# put all words togather
score_of_word_with_info = score_of_word_with_info1.merge(score_of_word_with_info2, on=["word","score","cnt_of_articles","cnt_in_all_articles","0"],how="outer").merge(score_of_word_with_info3, on=["word","score","cnt_of_articles","cnt_in_all_articles","0"],how="outer").merge(score_of_word_with_info2, on=["word","score","cnt_of_articles","cnt_in_all_articles","0"],how="outer").merge(score_of_word_with_info4, on=["word","score","cnt_of_articles","cnt_in_all_articles","0"],how="outer")
score_of_word_with_info["quantile"]=pd.cut(score_of_word_with_info.score, bins=10, right=True,labels=["1","2","3","4","5","6","7","8","9","10"])
score_of_word_with_info = score_of_word_with_info.dropna()

## Add relevant articles - articles with the highest number of words appeared in

- from all articles 2019-2021

In [14]:
top_df = score_of_word_with_info

top_n = len(top_df)

from sklearn.feature_extraction.text import CountVectorizer
cvec = CountVectorizer(analyzer = "word", tokenizer=lambda txt: txt.split(), 
                       ngram_range=(1,1),
                       binary= False,
                       min_df = 1
                      ) 
matrix_bow_train = cvec.fit_transform(df_all_from['abstract_cleaned'])
tokens_bow_train = cvec.get_feature_names()
matrix_bow_train_pd = pd.DataFrame.sparse.from_spmatrix(matrix_bow_train, columns = tokens_bow_train,index=df_all_from.doi)
matrix_bow_train_pd = matrix_bow_train_pd[[col for col in matrix_bow_train_pd.columns if col in list(top_df.word.values)]]



In [21]:
matrix_bow_train_pd = matrix_bow_train_pd.reset_index()

In [22]:
top_dois = []
top_cnt = []
word_list = []
for col in tqdm(matrix_bow_train_pd.columns[1:(int(top_n))]):
    top = matrix_bow_train_pd[[col]+["doi"]].sort_values(col,ascending=False)[:3]
    top_dois.append(str(top["doi"].values))
    top_cnt.append(str(list(top[col].values)))
    word_list.append(col)

100%|██████████| 79732/79732 [25:59<00:00, 51.12it/s]


In [23]:
fin = pd.DataFrame(zip(top_dois,top_cnt,word_list),columns = ["doi","cnt_of_words","word"])

In [25]:
top_df.merge(fin,on="word",how="left").to_csv("word_score_info_first_all.csv")

In [6]:
#score_of_word_with_info1 = pd.read_csv("word_score_info_first_22000_40000_add_art.csv")[["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words"]].astype(str)
#score_of_word_with_info2 = pd.read_csv("word_score_info_first_22000_add_art.csv")[["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words"]].astype(str)
#score_of_word_with_info3 = pd.read_csv("word_score_info_first_40000_60000_add_art.csv")[["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words"]].astype(str)
#score_of_word_with_info4 = pd.read_csv("word_score_info_first_60000_80000_add_art.csv")[["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words"]].astype(str)


# put all words togather
#score_of_word_with_info = score_of_word_with_info1.merge(score_of_word_with_info2, on=["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words"],how="outer").merge(score_of_word_with_info3, on=["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words"],how="outer").merge(score_of_word_with_info2, on=["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words"],how="outer").merge(score_of_word_with_info4, on=["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words"],how="outer")
#score_of_word_with_info.to_csv("score_of_word_with_info.csv")

score_of_word_with_info = pd.read_csv("word_score_info_first_all.csv")

In [7]:
score_of_word_with_info.sort_values("score",ascending=False)

Unnamed: 0.1,Unnamed: 0,word,score,cnt_of_articles,cnt_in_all_articles,0,quantile,doi,cnt_of_words
18000,18000,wanfang,0.999967,380,380,2012.0,10,['10.1097/md.0000000000027861' '10.1016/j.ejog...,"[1, 1, 1]"
18001,18001,zeta,0.999936,225,265,2004.0,10,['10.3390/v13122440' '10.1101/2021.12.28.21268...,"[5, 4, 3]"
18002,18002,cnki,0.999919,453,457,2011.0,10,['10.1017/s0950268821002223' '10.1007/s12291-0...,"[1, 1, 1]"
18003,18003,b1526,0.999893,69,121,2021.0,10,['10.1101/2021.08.04.21261596' '10.1101/2021.0...,"[8, 6, 5]"
18004,18004,aqi,0.999859,154,342,2020.0,10,['10.1007/s11356-020-12164-2' '10.1186/s12302-...,"[6, 5, 5]"
...,...,...,...,...,...,...,...,...,...
79992,79992,protoparvovirus,0.019214,19,30,2016.0,1,['10.1007/s00705-020-04904-z' '10.3390/v130816...,"[4, 4, 2]"
79993,79993,cuenca,0.019214,12,20,2017.0,1,['10.3390/ijerph18094657' '10.1109/etcm53643.2...,"[4, 2, 2]"
79994,79994,ondo,0.019212,11,13,2021.0,1,['10.21203/rs.3.rs-966817/v1' '10.1108/jacpr-0...,"[3, 1, 1]"
79995,79995,gsk,0.019212,121,197,2003.0,1,['10.1093/ofid/ofab466.214' '10.1182/blood-202...,"[8, 4, 3]"


In [15]:
score_of_word_with_info[0:22000].to_csv("sc_22000.csv")
score_of_word_with_info[22000:40000].to_csv("sc_22000_40000.csv")
score_of_word_with_info[40000:60000].to_csv("sc_40000_60000.csv")
score_of_word_with_info[60000:80000].to_csv("sc_60000_80000.csv")

## Add important articles

In [8]:
score_df = pd.read_csv("final_articles_score_table_w2v.csv")
df_all_from = df_all_from.merge(score_df[["doi","score"]],on="doi",how="left")

In [9]:
top_df = pd.read_csv("sc_60000_80000.csv")

top_n = len(top_df)

from sklearn.feature_extraction.text import CountVectorizer
cvec = CountVectorizer(analyzer = "word", tokenizer=lambda txt: txt.split(), 
                       ngram_range=(1,1),
                       binary= True,
                       min_df = 1
                      ) 
matrix_bow_train = cvec.fit_transform(df_all_from['abstract_cleaned'])
tokens_bow_train = cvec.get_feature_names()
matrix_bow_train_pd = pd.DataFrame.sparse.from_spmatrix(matrix_bow_train, columns = tokens_bow_train,index=df_all_from.score)
matrix_bow_train_pd = matrix_bow_train_pd[[col for col in matrix_bow_train_pd.columns if col in list(top_df.word.values)]]



In [10]:
matrix_bow_train_pd["doi"] = list(df_all_from.doi.values)

In [11]:
matrix_bow_train_pd

Unnamed: 0_level_0,-1,-11,-225,-230,-333,-35,-39,-47,-5,-68,...,≥72,≥75,≥85th,≥90,≥95,≥98,≥grade,●,⩾18,doi
score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.347239,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.3201/eid2701.201347
0.251360,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.1007/s10877-020-00638-0
0.230472,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.1007/s10151-020-02373-9
0.209152,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.1007/s10529-020-03060-3
0.306266,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.1007/s10943-020-01142-0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0.344008,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.1136/bmjno-2021-000214
0.326082,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.4103/ijo.ijo_1439_21
0.283125,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.3390/jpm11020116
0.198647,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.1007/s40520-022-02079-y


In [13]:
matrix_bow_train_pd = matrix_bow_train_pd.reset_index()

In [14]:
matrix_bow_train_pd

Unnamed: 0,score,-1,-11,-225,-230,-333,-35,-39,-47,-5,...,≥72,≥75,≥85th,≥90,≥95,≥98,≥grade,●,⩾18,doi
0,0.347239,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.3201/eid2701.201347
1,0.251360,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.1007/s10877-020-00638-0
2,0.230472,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.1007/s10151-020-02373-9
3,0.209152,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.1007/s10529-020-03060-3
4,0.306266,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.1007/s10943-020-01142-0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284269,0.344008,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.1136/bmjno-2021-000214
284270,0.326082,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.4103/ijo.ijo_1439_21
284271,0.283125,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.3390/jpm11020116
284272,0.198647,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10.1007/s40520-022-02079-y


In [15]:
top_dois = []
top_score = []
word_list = []
for col in tqdm(matrix_bow_train_pd.columns[1:(int(top_n))]):
    if col != "doi":
        matrix_bow_train_pd[col] = matrix_bow_train_pd[col].astype(float)*matrix_bow_train_pd['score'].astype(float)
        top = matrix_bow_train_pd[[col]+["score","doi"]].sort_values(col,ascending=False)[:3]
        top_dois.append(str(top["doi"].values))
        top_score.append(str(list(top[col].values)))
        word_list.append(col)

100%|██████████| 19905/19905 [10:05<00:00, 32.88it/s]


In [16]:
fin = pd.DataFrame(zip(top_dois,top_score,word_list),columns = ["dois_imp","score_of_words","word"])

In [17]:
fin

Unnamed: 0,dois_imp,score_of_words,word
0,['10.1016/j.envres.2021.111280' '10.1101/2021....,"[0.5672734813996978, 0.5307012380237226, 0.529...",-1
1,['10.1101/2022.03.22.22272745' '10.1101/2021.0...,"[0.5299966431405786, 0.520138608807366, 0.5014...",-11
2,['10.1080/00913847.2021.2022967' '10.1001/jama...,"[0.4065801327572071, 0.3999329295220716, 0.329...",-225
3,['10.21203/rs.3.rs-927188/v1' '10.3389/fmed.20...,"[0.4049136362783517, 0.3985196821935172, 0.329...",-230
4,['10.1101/2021.01.20.21250183' '10.1101/2021.0...,"[0.4194477704107779, 0.3690509715467971, 0.202...",-333
...,...,...,...
19899,['10.1101/2022.02.02.22270337' '10.3389/fmed.2...,"[0.6419698226879959, 0.5240546276864764, 0.467...",≥95
19900,['10.3389/fphar.2021.683296' '10.1136/bmjopen-...,"[0.3966985337844751, 0.3884521391983184, 0.370...",≥98
19901,['10.1186/s12885-021-09156-x' '10.2147/cmar.s2...,"[0.2385744549469066, 0.156596418918666, 0.1448...",≥grade
19902,['10.48047/rigeo.11.08.64' '10.48047/rigeo.11....,"[0.3113821569132466, 0.3028673870425701, 0.289...",●


In [35]:
top_df.merge(fin,on="word",how="left").to_csv("word_score_info_first_22000_40000.csv")

In [16]:
top_df.merge(fin,on="word",how="left").to_csv("word_score_info_first_22000.csv")

In [16]:
top_df.merge(fin,on="word",how="left").to_csv("word_score_info_first_40000_60000.csv")

In [18]:
top_df.merge(fin,on="word",how="left").to_csv("word_score_info_first_60000_80000.csv")

In [20]:
score_of_word_with_info1 = pd.read_csv("word_score_info_first_22000_40000.csv")[["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words","dois_imp","score_of_words"]].astype(str)
score_of_word_with_info2 = pd.read_csv("word_score_info_first_22000.csv")[["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words","dois_imp","score_of_words"]].astype(str)
score_of_word_with_info3 = pd.read_csv("word_score_info_first_40000_60000.csv")[["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words","dois_imp","score_of_words"]].astype(str)
score_of_word_with_info4 = pd.read_csv("word_score_info_first_60000_80000.csv")[["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words","dois_imp","score_of_words"]].astype(str)


# put all words togather
score_of_word_with_info = score_of_word_with_info1.merge(score_of_word_with_info2, on=["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words","dois_imp","score_of_words"],how="outer").merge(score_of_word_with_info3, on=["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words","dois_imp","score_of_words"],how="outer").merge(score_of_word_with_info2, on=["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words","dois_imp","score_of_words"],how="outer").merge(score_of_word_with_info4, on=["word","score","cnt_of_articles","cnt_in_all_articles","0","doi","cnt_of_words","dois_imp","score_of_words"],how="outer")
score_of_word_with_info.to_csv("score_of_word_with_info.csv")

In [21]:
score_of_word_with_info["dois_by_cnt_in_article"] = score_of_word_with_info["doi"]+score_of_word_with_info["cnt_of_words"]

In [22]:
score_of_word_with_info["dois_by_score_of_article"] = score_of_word_with_info["dois_imp"]+score_of_word_with_info["score_of_words"]

In [23]:
fin_cols = score_of_word_with_info[['word', 'score', 'cnt_of_articles', 'cnt_in_all_articles', '0', 'dois_by_cnt_in_article','dois_by_score_of_article']].rename({"0":"First_year"},axis=1)

In [24]:
fin_cols = fin_cols[fin_cols["cnt_of_articles"]!="nan"]

In [25]:
fin_cols.to_csv("score_of_word_with_info.csv")

In [28]:
fin_cols.sort_values("score",ascending=False)[:20]

Unnamed: 0,word,score,cnt_of_articles,cnt_in_all_articles,First_year,dois_by_cnt_in_article,dois_by_score_of_article
18000,wanfang,0.999966823984589,380,380,2012.0,['10.1097/md.0000000000027861' '10.1016/j.ejog...,['10.12114/j.issn.1007-9572.2021.00.548' '10.1...
18001,zeta,0.999936028486852,225,265,2004.0,['10.3390/v13122440' '10.1101/2021.12.28.21268...,['10.1093/infdis/jiab355' '10.1002/rmv.2270' '...
18002,cnki,0.9999192801553468,453,457,2011.0,['10.1017/s0950268821002223' '10.1007/s12291-0...,['10.12114/j.issn.1007-9572.2021.00.548' '10.1...
18003,b1526,0.9998926033645228,69,121,2021.0,['10.1101/2021.08.04.21261596' '10.1101/2021.0...,['10.1002/rmv.2270' '10.3390/v13122485' '10.11...
18004,aqi,0.99985852943917,154,342,2020.0,['10.1007/s11356-020-12164-2' '10.1186/s12302-...,['10.1007/s11356-021-18442-x' '10.1007/s11356-...
18005,cpdi,0.999837572466172,33,95,2020.0,['10.1080/20008198.2021.1980274' '10.1016/j.ja...,['10.3969/j.issn.1674-8115.2021.10.014'\n '10....
18006,re-positive,0.9998215608773692,64,199,2020.0,['10.3760/cma.j.cn112150-20211108-01034'\n '10...,['10.3760/cma.j.cn112338-20210506-00367'\n '10...
18007,pada,0.9998033866260874,91,192,2020.0,['10.25104/transla.v23i1.1737' '10.25311/kesko...,['10.24167/psidim.v20i2.3507' '10.35914/tomaeg...
18008,fcv-19s,0.9997973931248644,202,531,2020.0,['10.7717/peerj.11263' '10.4102/sajip.v47i0.18...,['10.1016/j.gerinurse.2021.09.012' '10.1007/s1...
18009,non-hubei,0.9997923153119636,28,60,2019.0,['10.1186/s12879-021-06502-z' '10.1007/s11187-...,['10.54605/fec20210304' '10.21037/apm-21-1975'...
