In [1]:
import pickle

In [2]:
import numpy as np

In [3]:
from collections import defaultdict, Counter
from operator import itemgetter

In [4]:
import spacy
from tqdm import tqdm

nlp = spacy.load('en_core_web_lg')

In [5]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import AgglomerativeClustering


In [6]:
doi2vec = pickle.load(open("cov/doi2vec.p", "rb"))
doi2str = pickle.load(open("cov/doi2str.p", "rb"))
doi2title = pickle.load(open("cov/doi2title.p", "rb"))
doi2year = pickle.load(open("cov/doi2year.p", "rb"))

In [7]:
len(doi2vec)

53629

In [8]:
X = np.zeros((len(doi2vec), 300))
doi2index = {}
for i, (doi, vec) in enumerate(doi2vec.items()):
    doi2index[doi] = i
    X[i, :] = vec

index2doi = {i: doi for doi, i in doi2index.items()}

In [9]:
titles = list(doi2title.values())

In [10]:
titles = [title for doi, title in doi2title.items() if doi2year[doi] >= 2017]

In [11]:
len(titles)

91974

In [12]:
docs = list(tqdm(nlp.pipe(titles)))

91974it [02:56, 520.42it/s]


In [13]:
nc_counts = defaultdict(int)

for doc in docs:
    for nc in doc.noun_chunks:
        if nc[0].pos_ == "DET":
            nc_counts[nc[1:].text.lower()] += 1
        else:
            nc_counts[nc.text.lower()] += 1

In [14]:
term_tuples = sorted(nc_counts.items(), key=itemgetter(1), reverse=True)

In [15]:
term_mean_sims = defaultdict(float)

for term, count in term_tuples:
    if len(term) <= 2:
        continue
    if count <= 5:
        break
    term_dois = set()
    term_vecs = []
    for doi, title in doi2title.items():
        if term.lower() in title.lower():
            if doi in doi2vec:
                term_dois.add(doi)
                term_vecs.append(doi2vec[doi])
    #term_X = np.zeros()term
    if len(term_dois) < 10000:
        if len(term_vecs) >= 1:
            sims = cosine_similarity(term_vecs)
            print(term, count, len(term_dois), np.mean(sims))
            term_mean_sims[term] = np.mean(sims)
    else:
        print(term, count, len(term_dois))
        

development 916 1151 0.4110555
review 886 1205 0.42017
role 755 1064 0.4351763
analysis 669 2168 0.40766364
application 645 1406 0.4591425
effect 619 2041 0.4115659
design 616 985 0.43563077
impact 614 863 0.4195619
effects 563 987 0.42590865
china 556 438 0.46752793
evidence 547 871 0.47702244
evaluation 546 669 0.43043017
patients 532 318 0.5102235
what 521 376 0.4370134
detection 493 1878 0.5069753
survey 476 584 0.4769149
case 461 870 0.423723
introduction 450 183 0.4474144
use 445 1858 0.41544524
study 435 1503 0.40536523
challenges 431 328 0.41810375
applications 416 858 0.48016524
children 410 423 0.54375654
comparison 374 387 0.41563287
influence 357 525 0.42131764
things 353 258 0.7216406
internet 304 316 0.6504806
performance 288 1020 0.43916017
treatment 287 313 0.4571783
wireless sensor networks 286 445 0.82846636
case study 274 281 0.43082467
research 272 918 0.45227897
identification 265 427 0.4611907
characterization 263 331 0.45069885
systematic review 252 246 0.5415329

imaging 55 568 0.5326
biology 55 216 0.5673833
france 55 32 0.4677567
european union 55 62 0.6558942
well-being 55 106 0.6238432
transition 55 224 0.4275805
difference 55 291 0.4349291
experiments 55 109 0.42310637
consequences 55 101 0.48234138
visualization 55 58 0.46070313
benefits 55 100 0.45011547
algorithms 55 214 0.5333564
engagement 55 224 0.51578313
associations 55 87 0.5335572
volume 55 67 0.44224164
r package 55 12 0.49676546
artificial intelligence 54 32 0.5611089
era 54 5691 0.40182117
city 54 688 0.41179922
studies 54 276 0.41556877
risk factors 54 37 0.49780568
networks 54 1615 0.5900238
meaning 54 121 0.50378793
ireland 54 46 0.6048902
emotions 54 92 0.6063367
crisis 53 150 0.5340981
drug discovery 53 20 0.6784291
obesity 53 70 0.65712464
tracking 53 408 0.6147816
behavior 53 582 0.45677495
fault diagnosis 53 104 0.88308156
exploration 53 124 0.4179728
examination 53 109 0.46876368
patient 52 421 0.5007687
preparation 52 59 0.46351314
anxiety 52 88 0.64065343
industry 5

interpretation 34 65 0.4232549
reading 34 305 0.6897059
philosophy 34 16 0.53333163
values 34 101 0.46816996
young people 34 26 0.6256503
depressive symptoms 34 48 0.79686904
web 34 189 0.52095914
vehicle 34 440 0.64703786
vitro 34 73 0.54773724
oxidative stress 34 31 0.7072973
money 34 37 0.51942784
intervention 34 321 0.51430833
narratives 34 115 0.6053005
mobile devices 34 31 0.6335059
pilot study 34 35 0.54882276
inclusion 34 50 0.46536463
foucault 34 5 0.7483867
call 33 534 0.4074582
protection 33 97 0.441209
saudi arabia 33 27 0.62591195
crystal structure 33 80 0.8414862
moving 33 124 0.45277375
structures 33 449 0.4540377
type 33 582 0.418082
noise 33 157 0.5126944
business 33 214 0.5802706
success 33 150 0.45464584
scope 33 117 0.5212237
sensor 33 4304 0.53939146
institutions 33 135 0.5434436
links 33 69 0.43693438
pathways 33 102 0.45373976
concepts 33 76 0.4405379
malaysia 33 32 0.5057135
disabilities 33 54 0.67641425
child 33 716 0.5340967
remote sensing 33 129 0.7160396
usi

position 24 863 0.45700604
fairness 24 51 0.5214334
reproduction 24 30 0.5686615
disability 24 32 0.5310094
games 24 133 0.53885084
accessibility 24 27 0.47990888
industrial internet 24 20 0.8573146
plants 24 77 0.49355733
discussion 24 51 0.5223478
motivations 24 45 0.5737002
cyber-physical systems 24 25 0.6529972
interventions 24 125 0.4877928
usability 24 26 0.6082178
sustainable development 24 31 0.64913005
behaviour 24 137 0.43164015
co2 24 59 0.53859794
legitimacy 24 62 0.64731026
pathophysiology 24 7 0.6738724
pedagogy 24 56 0.6731921
sentiment analysis 24 26 0.83889264
characterisation 24 28 0.49268878
examples 24 20 0.48725587
bariatric surgery 24 12 0.7182792
northern ireland 24 14 0.6518404
terrorism 24 37 0.61526275
emotion recognition 24 49 0.81880057
relations 24 664 0.44587597
content 24 259 0.44065663
capitalism 24 36 0.62536275
cancer cells 24 39 0.66751057
symptoms 24 88 0.70441025
new evidence 24 24 0.5565047
polarization 24 134 0.4550689
facebook 24 52 0.6551306
spr

physicians 19 7 0.6149917
threats 19 34 0.4647043
software 19 154 0.55063653
surfaces 19 130 0.49104533
real-time 19 392 0.5146958
belgium 19 19 0.593892
fertility 19 34 0.67950815
description 19 56 0.4584213
rethinking 19 79 0.5275942
price 19 161 0.6471583
peptides 19 52 0.6346844
economic development 19 27 0.6694386
markets 19 136 0.5745781
hyperspectral image classification 19 15 0.9081943
damage 19 209 0.57857877
estimating 19 151 0.45540655
graphs 19 63 0.50466883
trajectories 19 73 0.47772247
authenticity 19 25 0.52265906
targets 19 94 0.49080577
cultural heritage 19 21 0.50313663
income inequality 19 29 0.75532293
health monitoring 19 119 0.70038086
underwater sensor networks 19 28 0.95119023
statistics 19 46 0.3888487
gas sensors 19 91 0.79069823
fall 19 136 0.5025953
coverage 19 138 0.5323887
autophagy 19 46 0.8901207
graphene oxide 19 68 0.7658387
twenty-first century 19 16 0.46459314
voices 19 20 0.5429232
metabolomics 19 28 0.81702036
quantitative analysis 19 16 0.4523291


piezoceramic transducers 16 27 0.9780938
marriage 16 40 0.6445609
employees 16 77 0.7455379
ieee 16 28 0.84209955
electronic health records 16 10 0.65995485
entrepreneurship 16 67 0.70844007
heart rate 16 53 0.7918987
fiber bragg grating sensors 16 20 0.9327
mobile robot 16 44 0.77050036
heritage 16 73 0.50751334
professional development 16 62 0.79526865
fragmentation 16 41 0.58311564
jea 16 2 0.67124605
novel coronavirus disease 15 10 0.9456089
therapeutic potential 15 10 0.6162976
therapeutics 15 29 0.5714889
replication 15 102 0.52940094
pregnant women 15 6 0.6684058
clinicians 15 7 0.5344046
guidance 15 35 0.43749607
structural basis 15 21 0.7753475
police 15 97 0.7282939
protocol 15 246 0.66179633
man 15 4472 0.4141797
: evidence 15 399 0.5219803
lung cancer 15 19 0.59440094
high temperature 15 14 0.7161187
reference 15 234 0.4348974
palliative care 15 6 0.8347877
gnss 15 155 0.78280646
wearables 15 16 0.7612314
center 15 128 0.4156102
form 15 3436 0.40985233
ultrasound 15 70 0.54

homelessness 13 15 0.738261
subjectivity 13 14 0.66005206
cold war 13 5 0.712129
information retrieval 13 34 0.88274986
molecules 13 89 0.49743345
computation 13 186 0.45998186
income 13 144 0.5885572
sensor fusion 13 49 0.7481131
discourses 13 46 0.6620991
principle 13 91 0.41307968
dyslexia 13 15 0.8170976
memoriam 13 1 1.0
sarcopenia 13 4 0.87200344
body composition 13 13 0.8975041
typology 13 25 0.46082765
length 13 170 0.4483854
face recognition 13 26 0.8812216
errors 13 51 0.51467764
carbon dioxide 13 22 0.56741095
agenda 13 63 0.51259387
autobiography 13 1 1.0000001
research agenda 13 27 0.5151245
irrigation 13 75 0.7353493
deep eutectic solvents 13 15 0.98692966
geopolitics 13 11 0.718076
silence 13 21 0.51776344
potential applications 13 17 0.5210435
financial literacy 13 9 0.8831871
discipline 13 45 0.5587114
eds 13 103 0.40806058
economy 13 137 0.5710819
specificity 13 27 0.5339053
eeg signals 13 15 0.8527698
sexual harassment 13 6 0.719073
micrornas 13 5 0.7484695
-loop 13 

tests 11 74 0.41036442
antioxidant activity 11 1 0.9999999
theatre 11 12 0.5810352
nexus 11 30 0.5360704
options 11 38 0.470104
actions 11 403 0.41496295
mathematics education 11 6 0.775992
dark side 11 10 0.5101476
extended kalman filter 11 13 0.825276
oxford 11 4 0.5497396
genome 11 272 0.7341544
rest 11 563 0.42672396
uwb 11 53 0.7752345
damage identification 11 22 0.90718496
new paradigm 11 8 0.514488
intelligence 11 97 0.49202067
neuroinflammation 11 12 0.84549415
lamb waves 11 13 0.94444203
soils 11 23 0.6168603
plasticity 11 39 0.49593338
hydrogen 11 169 0.55268663
central and eastern europe 11 9 0.6420672
phylogeny 11 38 0.8908124
data science 11 15 0.62181914
lifespan 11 12 0.49999195
3d reconstruction 11 23 0.81668615
bench 11 69 0.43502173
liminality 11 11 0.7940114
friend 11 111 0.4449095
availability 11 28 0.44375804
deep neural network 11 44 0.68154126
smart contracts 11 4 0.57957935
materiality 11 7 0.78944314
rfid 11 66 0.755025
systematic approach 11 8 0.46221992
virtu

scholarship 10 18 0.58020985
ambiguity 10 49 0.50445193
pedestrians 10 13 0.8047252
nato 10 253 0.4788618
heegaard floer homology 10 15 0.99558365
long-term outcomes 10 4 0.60238063
gut 10 185 0.8763049
#metoo 10 1 0.9999999
coherence 10 64 0.5506081
vehicle detection 10 17 0.84826785
ionic liquids 10 10 0.66457045
translanguaging 10 14 0.95433706
hereditary angioedema 10 10 0.99517614
radiation 10 80 0.44947928
methodologies 10 29 0.44549388
osteoarthritis 10 3 0.65652204
american college 10 7 0.6007095
welfare state 10 47 0.8053653
simulations 10 69 0.4445514
high resolution 10 47 0.5289021
lab 10 1039 0.41246003
sar 10 382 0.50678694
registration 10 55 0.7773281
social innovation 10 15 0.7689593
comprehensive study 10 5 0.598208
benchmarking 10 31 0.46550688
follow-up 10 11 0.4651556
bridge 10 100 0.58980113
usefulness 10 5 0.50434995
self-assembly 10 23 0.6495526
capability 10 43 0.47198087
carbon dots 10 41 0.95201474
diagnostic accuracy 10 4 0.58351624
rolling bearings 10 8 0.940

intimate partner violence 9 9 0.69973654
minerals 9 7 0.50876284
speech 9 44 0.531451
realism 9 17 0.62130535
reactive oxygen species 9 7 0.7829768
essay 9 12 0.547559
new trends 9 4 0.5745634
norm 9 248 0.41075385
multiple sensors 9 7 0.71684545
crimes 9 5 0.71726096
independence 9 19 0.44951466
personhood 9 6 0.71399134
precision medicine 9 5 0.60543823
dispersion 9 44 0.48536196
secularism 9 2 0.79224753
alliances 9 10 0.6459874
fall detection 9 34 0.9240369
flipped classroom 9 11 0.8900106
colitis 9 15 0.8101004
ends 9 227 0.40694943
sustainable development goals 9 7 0.73965394
caregivers 9 18 0.6451234
optimal control 9 2 0.6813756
peacebuilding 9 11 0.83258736
performance comparison 9 7 0.61514515
profession 9 198 0.56067747
feeling 9 30 0.5789666
pd-l1 9 23 0.9116028
hygroscopicity 9 18 0.9054754
health-related quality 9 3 0.78622246
biosensing 9 74 0.72006416
neurotransmitters 9 6 0.9298615
assessments 9 37 0.4902305
case-control study 9 5 0.51798254
house 9 193 0.50162244
rest

matching 8 134 0.53162605
remarks 8 3 0.6363245
mechanical behavior 8 3 0.78151083
sequencing 8 105 0.65894574
social 8 1540 0.48897275
south asia 8 11 0.56423056
human brain 8 9 0.6855807
conduct 8 297 0.5283742
compassion 8 17 0.66729665
lead 8 310 0.4736089
gastric cancer 8 5 0.7904132
camp 8 156 0.46604612
psychiatry 8 4 0.6208807
pharmacology 8 10 0.58892196
uruguay 8 3 0.6016545
pharmaceuticals 8 5 0.58772326
autonomous underwater vehicles 8 7 0.85001594
deformation monitoring 8 16 0.80700874
banks 8 20 0.6596036
traceability 8 14 0.6497249
qualitative research 8 17 0.7453768
an overview 8 86 0.4195186
grit 8 54 0.44218245
kinematics 8 28 0.76405996
mitochondria 8 76 0.6681793
films 8 151 0.5483194
adhd 8 11 0.6711324
graph 8 1518 0.4254308
conversations 8 21 0.591599
grade 8 155 0.5677289
elementary school 8 23 0.71285784
mongolia 8 6 0.5558867
tweets 8 12 0.7400358
preservation 8 18 0.5042891
sugammadex 8 7 0.9435407
genetic algorithms 8 8 0.62548333
differential evolution 8 10

moderated mediation model 8 8 0.7894528
emotional exhaustion 8 6 0.9276798
liberalism 8 40 0.7489442
record 8 128 0.4525581
educational attainment 8 16 0.7436562
collective action 8 17 0.62019014
preservice teachers 8 31 0.795734
negotiations 8 27 0.54852855
curiosity 8 10 0.7853007
cash transfers 8 6 0.89145744
tourists 8 52 0.8023648
strategic narratives 8 6 0.8634442
bureaucracy 8 4 0.7887975
(cambridge 8 3 0.97701347
airborne pollen 8 25 0.98550564
model uncertainty 8 7 0.71337813
electronic medical records 8 5 0.6902441
cognitive load theory 8 14 0.93070036
notion 8 13 0.51600474
remittances 8 26 0.87925655
position statement 8 3 0.5472855
new dataset 8 12 0.8046023
inconel 8 2 0.99828136
wireless sensor 8 575 0.8092647
smart city applications 8 7 0.82494295
corona virus disease 7 3 0.99148613
lopinavir/ritonavir 7 2 0.9162772
receptor 7 180 0.5588467
acute myocardial infarction 7 5 0.73287016
dentistry 7 1 1.0
basic reproduction number 7 4 0.932159
leaders 7 161 0.5918892
blame 7

pixel 7 67 0.6647384
social mobility 7 5 0.73352265
intelligent transportation systems 7 6 0.83503985
tissue 7 154 0.5374237
duty 7 17 0.53584534
nations 7 121 0.43449345
wbans 7 11 0.9300995
dual role 7 7 0.53312576
functional materials 7 9 0.6570856
yeast 7 49 0.6854842
image classification 7 30 0.8119343
study protocol 7 12 0.6091031
so2 7 11 0.6916368
wood 7 32 0.46263924
situ measurements 7 7 0.7170577
visions 7 14 0.5167988
liquid metal 7 53 0.84925044
natural experiment 7 14 0.7230874
mobilities 7 32 0.789261
genetic basis 7 2 0.92142355
disclosure 7 36 0.5641873
direct detection 7 8 0.6549612
nitrate 7 27 0.71449494
acts 7 263 0.41307864
3d imaging 7 8 0.6893257
mainstream 7 15 0.48397192
flexible substrates 7 9 0.7922787
printed electronics 7 7 0.93255454
range 7 273 0.43799257
force myography 7 13 0.9622617
injustice 7 28 0.64615655
young women 7 6 0.68669116
hospitality 7 59 0.73593193
vibration control 7 1 1.0
darkness 7 4 0.54214096
joint optimization 7 8 0.78167486
endoth

molecular dynamics simulations 6 5 0.67111486
disease control 6 2 0.66420233
african countries 6 8 0.6940863
adenosine 6 6 0.64521426
inductive sensors 6 3 0.8019547
clearance 6 25 0.5241652
candida albicans 6 8 0.78463054
spacecraft 6 13 0.7048494
hypertonie 6 4 0.98806494
ensembles 6 17 0.50532717
fusing 6 31 0.584673
reactions 6 60 0.44254297
gulf 6 12 0.47946268
online monitoring 6 2 0.7415012
physiological signals 6 14 0.77204114
neglected tropical diseases 6 1 1.0
collision 6 43 0.49675262
explainable recommendation 6 1 0.99999994
laser scanner 6 17 0.86070955
theoretical study 6 8 0.60375947
modulators 6 13 0.5753791
union 6 86 0.59449667
indicator 6 64 0.41639945
different approaches 6 3 0.5963348
aircraft 6 58 0.55608386
deficits 6 12 0.48786873
metallic glasses 6 5 0.7893132
iii 6 60 0.46959832
photonic 6 184 0.6881999
u.s.a. 6 2 0.6853119
online learning 6 12 0.55021876
aluminum 6 21 0.58720577
optical fiber 6 120 0.76872844
composite materials 6 9 0.64127785
explosives 6 18

particle size 6 11 0.588868
abuse 6 32 0.5512423
athletes 6 19 0.5336296
lignin 6 14 0.46509552
experimental results 6 5 0.54468083
alterations 6 24 0.598044
matrix 6 137 0.46315208
tourism research 6 9 0.84137565
homeostasis 6 29 0.6330006
infrared images 6 4 0.78387547
fecal microbiota transplantation 6 7 0.93756914
recurrent clostridium difficile infection 6 2 0.98223305
antimicrobial peptides 6 3 0.90134865
ablation 6 34 0.5919091
venus 6 1 1.0000001
tumors 6 27 0.65854925
weight loss 6 10 0.57869387
sofosbuvir 6 8 0.9189286
court 6 51 0.5756031
phenology 6 17 0.79181415
facilitators 6 3 0.6399703
dark tourism 6 8 0.9406257
disaster management 6 5 0.7229431
pathogenic bacteria 6 9 0.66059357
central asia 6 5 0.6243708
total knee arthroplasty 6 3 0.61952996
october 6 1 1.0000001
defect detection 6 13 0.70908403
narrowband internet 6 1 0.9999998
-formula 6 1 0.9999998
outliers 6 6 0.65275455
minorities 6 19 0.628329
vaccination 6 12 0.592431
aqueous media 6 5 0.6294379
global pattern

voting 6 77 0.7022074
volunteers 6 13 0.46561956
outcome measures 6 4 0.6364652
empirical test 6 7 0.55492365
research directions 6 7 0.55200857
infant mortality 6 4 0.72175956
international migration 6 8 0.7291043
quantitation 6 5 0.7381752
operations 6 45 0.4504165
: a pilot study 6 15 0.5419677
human motion 6 25 0.70745873
temporality 6 8 0.6888431
predictive modeling 6 4 0.7752825
hungary 6 1 0.9999999
deception 6 11 0.5876595
teacher knowledge 6 7 0.85495657
classifications 6 6 0.6476969
student perspectives 6 4 0.7195413
korean adults 6 1 0.9999999
transcriptome analysis 6 9 0.78869843
housing prices 6 7 0.8131923
optimism 6 15 0.5300996
hidden markov model 6 13 0.66243196
foreign direct investment 6 12 0.8872602
prediction models 6 8 0.642459
teams 6 24 0.67367053
public participation 6 3 0.75068367
politeness 6 3 0.907655
wall 6 111 0.46556556
northern italy 6 7 0.5966954
leisure research 6 10 0.8045834
forest 6 121 0.55113685
gentrification 6 22 0.902421
work-family conflict 6

In [16]:
term_dois

{'10.4000/champpenal.9440'}

In [17]:
len([(term, nc_counts[term], msim) for term, msim in term_mean_sims.items()]), \
len([(term, nc_counts[term], msim) for term, msim in term_mean_sims.items() if msim >= 0.45]), \
len([(term, nc_counts[term], msim) for term, msim in term_mean_sims.items() if msim >= 0.5]), \
len([(term, nc_counts[term], msim) for term, msim in term_mean_sims.items() if msim >= 0.6]), \
len([(term, nc_counts[term], msim) for term, msim in term_mean_sims.items() if msim >= 0.5 and nc_counts[term] >= 10])

(4452, 3895, 3244, 2231, 1586)

In [18]:
term_plot_tuples = []

for term, msim in term_mean_sims.items():
    if msim >= 0.45:
        term_dois = set()
        term_vecs = []
        term_lower = term.lower()
        for doi, title in doi2title.items():
            title_lower = title.lower()
            substr_position = title_lower.find(term_lower)
            if substr_position >= 0:
                if substr_position >= 1:
                    char_before_ord = ord(title_lower[substr_position-1])
                    if char_before_ord >= 65 and char_before_ord <= 90:
                        continue
                    elif char_before_ord >= 97 and char_before_ord <= 122:
                        continue
                if doi in doi2vec:
                    term_dois.add(doi)
                    term_vecs.append(doi2vec[doi])
        
        print(term, msim, nc_counts[term], len(term_dois))
        
        if len(term_vecs) < 3:
            continue
        
        ac2 = AgglomerativeClustering(n_clusters=None, affinity="cosine", linkage="average", distance_threshold=0.3)
        kwLabels = ac2.fit_predict(term_vecs)
        kwLabels_unique = sorted(np.unique(kwLabels))        
        
        print(len(kwLabels_unique))
        print([c for _, c in Counter(kwLabels).most_common() if c >= 5 and float(c) / len(term_dois) >= 0.05])
        print([c for _, c in Counter(kwLabels).most_common() if float(c) / len(term_dois) < 0.05 or c < 5])
        
        print()
        
        for l, c in Counter(kwLabels).most_common():
            if c < 5:
                break
            if float(c) / len(term_dois) >= 0.05:
                mean_vec = np.zeros((1,300))
                for i, vec in enumerate(term_vecs):
                    if kwLabels[i] == l:
                        mean_vec += vec
                mean_vec = mean_vec / c
                
                #print((term, c, mean_vec))
                term_plot_tuples.append((term, c, mean_vec))
            
        

application 0.4591425 645 1405
126
[125, 89, 81, 72]
[69, 67, 54, 52, 51, 49, 42, 36, 32, 26, 22, 20, 19, 19, 17, 17, 15, 14, 13, 13, 13, 12, 12, 12, 12, 12, 11, 11, 11, 10, 9, 8, 8, 8, 8, 8, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

china 0.46752793 556 432
46
[86, 55, 41, 33, 24, 24, 22]
[13, 12, 11, 9, 8, 8, 7, 7, 6, 6, 6, 5, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

evidence 0.47702244 547 869
99
[113, 81, 75, 74, 45]
[34, 26, 22, 22, 20, 19, 19, 16, 16, 16, 13, 12, 12, 11, 9, 9, 8, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

patients 0.5102235 

76
[107, 88, 82, 81, 58]
[37, 34, 27, 25, 22, 20, 13, 12, 12, 12, 11, 11, 10, 9, 8, 8, 7, 6, 6, 6, 5, 5, 5, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

students 0.60521555 155 542
40
[163, 105, 98, 43]
[19, 13, 12, 11, 10, 9, 5, 5, 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

recent advances 0.49203092 154 99
28
[17, 11, 6, 6, 6, 6, 5]
[4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

practice 0.4875579 154 548
70
[96, 54, 37, 28]
[25, 21, 16, 15, 14, 13, 13, 12, 11, 11, 11, 9, 9, 9, 9, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

correction 0.5157631 153 105
31
[21, 15, 10, 10, 7]
[4, 4, 4, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

prediction 0.5010711 151 331
62
[40, 33, 25, 25, 22, 22]
[14, 9, 9

coronavirus 0.8554134 85 452
2
[451]
[1]

ethics 0.5524251 85 101
26
[29, 14, 8, 7, 6, 6]
[5, 4, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

emergence 0.45736533 84 80
34
[17, 5]
[4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

privacy 0.67107 84 164
17
[101, 21]
[7, 6, 5, 5, 4, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1]

determinants 0.49213603 83 158
34
[18, 17, 15, 14, 12, 8]
[7, 6, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

violence 0.5804268 83 140
21
[37, 23, 19, 16, 13, 7]
[3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1]

higher education 0.6028279 82 118
15
[42, 13, 13, 6, 6, 6, 6, 6]
[5, 4, 4, 3, 2, 1, 1]

experience 0.5177883 81 455
66
[83, 44, 42, 26, 24]
[22, 15, 13, 12, 11, 9, 8, 8, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

indonesia 0.5807142 81 56
13
[24, 7, 6, 6]
[3, 2, 2, 1,

mice 0.536321 62 98
20
[27, 14, 9, 7, 7, 6, 5, 5]
[4, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]

older adults 0.62031746 61 57
13
[24, 6, 6]
[4, 4, 3, 2, 2, 2, 1, 1, 1, 1]

performance analysis 0.60234904 61 65
14
[25, 19]
[4, 4, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1]

memory 0.4509957 61 199
27
[41, 33, 18, 14, 14, 12]
[9, 7, 7, 5, 5, 4, 4, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1]

policy 0.51692516 60 465
56
[67, 57, 46, 39, 38, 33, 27]
[21, 20, 10, 7, 7, 5, 5, 5, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

humans 0.53116655 59 56
21
[23, 5]
[4, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

relationships 0.4782109 59 172
48
[18, 18, 18, 15]
[8, 6, 6, 6, 5, 5, 5, 5, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

family 0.49915734 59 329
47
[72, 28, 23, 20, 20, 18, 17]
[15, 14, 11, 10, 9, 9, 5, 5, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 

commentary 0.52273405 47 20
8
[10]
[3, 2, 1, 1, 1, 1, 1]

literature review 0.4658018 47 76
35
[8, 6, 6, 5]
[4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

russia 0.59858817 47 72
18
[32, 9, 7]
[3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1]

body 0.46518898 47 285
36
[76, 46, 26, 18]
[13, 13, 8, 8, 7, 7, 6, 6, 6, 6, 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

peace 0.70687926 47 82
8
[65]
[4, 4, 3, 2, 2, 1, 1]

acknowledgement 1.0 47 1
barriers 0.45320877 46 51
24
[7]
[4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]

human rights 0.66583174 46 40
8
[23, 9]
[3, 1, 1, 1, 1, 1]

making 0.47542027 46 277
63
[47, 19, 17]
[13, 10, 9, 9, 8, 8, 7, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

new york 0.45697856 46 22
16
[]
[2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

discrimination 0.467

gut microbiota 0.9068461 39 77
1
[77]
[]

drugs 0.519995 38 54
21
[10, 8, 6, 6]
[2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

recent developments 0.46875224 38 23
13
[]
[4, 4, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]

collaboration 0.52901524 38 111
25
[24, 17, 11, 10, 7, 6]
[5, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

surface 0.4803465 38 647
56
[157, 67, 45, 42]
[32, 31, 28, 25, 25, 17, 17, 13, 12, 10, 10, 10, 9, 8, 7, 6, 5, 5, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

additive manufacturing 0.8522743 38 56
4
[53]
[1, 1, 1]

stroke 0.6143442 38 53
14
[15, 12, 11]
[3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

music 0.5033503 38 72
19
[15, 11, 7, 6, 5, 5]
[4, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1]

southeast asia 0.54454345 38 19
7
[9]
[4, 2, 1, 1, 1, 1]

emotion 0.6079385 38 402
20
[161, 90, 33, 30]
[17, 16, 14, 11, 7, 7, 4, 2, 2, 2, 1, 1, 1, 1, 1, 1]

mexico 0.54292446 38 38
11
[11, 6]
[4, 4, 3, 3, 2, 2, 1, 1, 1]

crime 0.65

vehicle 0.64703786 34 440
24
[246, 73, 61]
[15, 14, 4, 4, 4, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

vitro 0.54773724 34 73
22
[16, 9, 5, 5]
[4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1]

oxidative stress 0.7072973 34 31
7
[20, 5]
[2, 1, 1, 1, 1]

money 0.51942784 34 37
17
[5]
[4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

intervention 0.51430833 34 319
53
[115, 25, 17]
[14, 13, 11, 10, 10, 9, 6, 6, 6, 5, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

narratives 0.6053005 34 112
20
[32, 27, 10, 9, 7]
[4, 4, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

mobile devices 0.6335059 34 31
9
[12]
[4, 4, 4, 3, 1, 1, 1, 1]

pilot study 0.54882276 34 35
14
[13, 7]
[3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

inclusion 0.46536463 34 50
31
[6]
[4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

foucault 0.7483867 34 5
3
[]
[2, 2, 1]

saudi arabia 0.62591195 33 27
10
[16]
[2, 2,

intersection 0.5099201 30 95
22
[26, 16, 12, 12]
[4, 4, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

trade 0.5590759 30 178
33
[85, 11, 9]
[7, 6, 6, 5, 5, 4, 4, 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

arts 0.45517313 30 48
15
[12, 10, 9]
[2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1]

movement 0.4547915 30 210
39
[40, 33, 30, 14, 13, 11]
[9, 7, 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

tables 0.53690094 30 11
5
[7]
[1, 1, 1, 1]

beliefs 0.5389187 30 91
20
[25, 12, 10, 9, 8, 5]
[4, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]

spirituality 0.76253456 30 27
3
[22]
[4, 1]

early detection 0.52272266 29 19
10
[]
[4, 4, 3, 2, 1, 1, 1, 1, 1, 1]

point 0.4722861 29 321
61
[107, 37, 35]
[11, 9, 6, 6, 5, 5, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

fear 0.50988626 29 51
16
[9, 7, 6]
[4, 4, 4, 4, 3, 2, 2, 1, 1, 1,

machine 0.54428524 26 440
68
[85, 42, 30, 25, 24]
[17, 17, 17, 16, 15, 13, 11, 8, 8, 7, 6, 6, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

new method 0.4689457 26 21
12
[5]
[3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1]

vehicles 0.6528227 26 173
14
[70, 42, 34, 9]
[5, 3, 3, 1, 1, 1, 1, 1, 1, 1]

finance 0.5954863 26 42
11
[14, 11]
[4, 3, 2, 2, 2, 1, 1, 1, 1]

convergence 0.45801124 26 53
24
[14]
[3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

possibilities 0.5124974 26 33
18
[10]
[3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

lives 0.51291364 26 48
23
[10]
[4, 4, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

real-time monitoring 0.53936607 26 20
10
[5]
[3, 3, 2, 2, 1, 1, 1, 1, 1]

earth 0.48380592 26 95
22
[21, 15, 14, 7, 6, 6]
[4, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]

machine learning techniques 0.6324063 26 12
6
[5]
[2, 2, 1, 1, 1]

figu

united kingdom 0.5306522 23 18
11
[]
[4, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1]

degradation 0.50234765 23 45
14
[8, 7, 7, 5]
[4, 3, 3, 2, 1, 1, 1, 1, 1, 1]

awareness 0.47850323 23 71
22
[20, 6, 6, 5]
[4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

asthma 0.69967824 23 35
4
[26, 6]
[2, 1]

observations 0.527842 23 86
20
[33, 12, 8, 5, 5]
[4, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

dialogue 0.5324578 23 31
14
[8, 5]
[3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

comparative perspective 0.5918876 23 18
10
[]
[4, 3, 2, 2, 2, 1, 1, 1, 1, 1]

repair 0.48082185 23 56
13
[14, 10, 5, 5, 5]
[4, 3, 3, 2, 2, 1, 1, 1]

smartphone 0.6270003 23 222
22
[122, 26, 14]
[9, 9, 9, 5, 4, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1]

physics 0.46470028 23 65
24
[10, 9, 7, 5]
[4, 4, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

finland 0.47575286 23 21
14
[]
[3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]

semantics 0.7378998 23 50
9
[42]
[1, 1, 1, 1, 1, 1, 1, 1]

job 0.6143926 23 138
21
[86, 11]
[6, 6, 4, 3, 3, 

wild 0.46354952 21 103
26
[19, 12, 11, 10, 9, 9]
[5, 3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

devices 0.5207503 21 341
39
[59, 51, 44, 35, 28, 19]
[13, 12, 9, 9, 6, 6, 5, 5, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

high sensitivity 0.63258046 21 43
10
[20, 10, 5]
[2, 1, 1, 1, 1, 1, 1]

type 1 diabetes 0.59794235 21 16
5
[7]
[4, 2, 2, 1]

alzheimer's disease 0.78624886 21 23
3
[21]
[1, 1]

chronic obstructive pulmonary disease 0.65367043 21 4
3
[]
[2, 1, 1]

proliferation 0.5324013 21 30
14
[14]
[2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

future perspectives 0.49966955 21 14
12
[]
[2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

autism spectrum disorder 0.65728176 21 25
7
[15]
[3, 2, 2, 1, 1, 1]

qualitative study 0.558597 21 25
13
[6]
[4, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

cloud computing 0.7849357 21 48
3
[38, 9]
[1]

vehicular ad hoc networks 0.90689397 21 9
1
[9]
[]

genre 0.6356722 21 17
7
[7]
[3, 3, 1, 1, 1, 1]

trees 0.49888158 21 49
18
[16]
[

price 0.6471583 19 158
16
[90, 16, 12]
[7, 5, 5, 4, 4, 3, 2, 2, 2, 2, 2, 1, 1]

peptides 0.6346844 19 43
9
[17, 11, 5]
[4, 2, 1, 1, 1, 1]

economic development 0.6694386 19 26
6
[17]
[3, 3, 1, 1, 1]

markets 0.5745781 19 134
20
[45, 21, 12, 10, 7]
[6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1]

hyperspectral image classification 0.9081943 19 15
1
[15]
[]

damage 0.57857877 19 206
32
[119]
[10, 9, 7, 7, 6, 6, 5, 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

estimating 0.45540655 19 151
39
[24, 12, 11]
[7, 7, 7, 6, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

graphs 0.50466883 19 48
11
[18, 7, 5, 5]
[4, 2, 2, 2, 1, 1, 1]

trajectories 0.47772247 19 73
24
[19, 10, 9]
[4, 4, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

authenticity 0.52265906 19 24
11
[11]
[2, 2, 2, 1, 1, 1, 1, 1, 1, 1]

targets 0.49080577 19 93
28
[14, 9, 8, 7, 7, 7, 6, 5]
[4, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

1
[11]
[]

east asia 0.5369339 17 30
7
[10, 7, 6]
[3, 2, 1, 1]

iot applications 0.77659863 17 23
4
[18]
[3, 1, 1]

game 0.4875859 17 269
44
[92, 34, 21]
[10, 9, 9, 7, 7, 6, 5, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

britain 0.61918014 17 17
7
[8]
[2, 2, 2, 1, 1, 1]

urbanization 0.6290927 17 22
6
[11, 5]
[2, 2, 1, 1]

reason 0.510271 17 126
24
[39, 11, 8]
[6, 6, 6, 5, 5, 5, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1]

radiotherapy 0.75090456 17 11
4
[8]
[1, 1, 1]

oil 0.49929148 17 104
30
[14, 10, 10, 9, 7, 6, 6]
[5, 4, 4, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

sar images 0.8496523 17 24
2
[23]
[1]

assembly 0.47664282 17 120
30
[23, 17, 8, 7, 7]
[5, 5, 5, 5, 4, 4, 4, 4, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

antibiotics 0.71219206 17 19
4
[15]
[2, 1, 1]

alzheimer’s disease 0.764745 17 40
7
[32]
[3, 1, 1, 1, 1, 1]

bridges 0.6854467 17 31
6
[25]
[2, 1, 1, 1, 1]

gut microbiome 0.9130247

diversification 0.5016634 16 45
13
[15, 8, 6]
[3, 3, 2, 2, 1, 1, 1, 1, 1, 1]

friends 0.5687921 16 50
12
[15, 7, 6, 5]
[4, 4, 2, 2, 2, 1, 1, 1]

robot 0.61101013 16 504
35
[155, 115, 105, 29]
[19, 12, 12, 10, 7, 6, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

jordan 0.51357734 16 11
7
[]
[3, 3, 1, 1, 1, 1, 1]

part ii 0.4879861 16 8
8
[]
[1, 1, 1, 1, 1, 1, 1, 1]

parent 0.5691925 16 301
25
[121, 57, 27, 23, 16]
[12, 10, 5, 4, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]

reproducibility 0.46552867 16 17
13
[]
[4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

blockchain technology 0.73596936 16 8
2
[7]
[1]

fatigue 0.5561738 16 42
8
[14, 10, 6]
[4, 3, 2, 2, 1]

involvement 0.5287216 16 81
27
[26, 9, 6]
[4, 4, 4, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

mri 0.69297814 16 18
5
[8, 7]
[1, 1, 1]

atmosphere 0.57478595 16 36
12
[9, 9]
[3, 3, 2, 2, 2, 2, 1, 1, 1, 1]

social support 0.73770636 16 25
5
[18]
[3, 2, 1, 1]

photosynthesis 0.7149792 1

machine learning approach 0.6163303 15 19
7
[6]
[4, 3, 2, 2, 1, 1]

geographies 0.7738033 15 50
4
[46]
[2, 1, 1]

simultaneous detection 0.62934345 15 10
5
[5]
[2, 1, 1, 1]

skills 0.5602992 15 129
28
[39, 26, 10, 8]
[4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

kalman filter 0.7969719 15 105
7
[90]
[3, 3, 3, 2, 2, 2]

accelerometers 0.70801085 15 29
5
[12, 11]
[4, 1, 1]

industrial wireless sensor networks 0.83893687 15 9
1
[9]
[]

zika virus 0.8921446 15 38
1
[38]
[]

csr 0.7331497 15 53
4
[46, 5]
[1, 1]

volatile organic compounds 0.6607958 15 23
7
[10]
[4, 3, 2, 2, 1, 1]

head 0.4541247 15 107
33
[18, 15, 12, 9, 8, 7]
[5, 4, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

randomized controlled trials 0.7476018 15 9
3
[7]
[1, 1]

religiosity 0.68569016 15 20
6
[10]
[4, 2, 2, 1, 1]

neural network 0.65123355 15 420
30
[203, 32, 31, 25]
[19, 18, 12, 12, 11, 10, 9, 6, 4, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]

sri lank

names 0.46406195 13 19
9
[9]
[2, 2, 1, 1, 1, 1, 1, 1]

qatar 0.6321691 13 8
5
[]
[4, 1, 1, 1, 1]

critical care 0.69557846 13 10
3
[6]
[3, 1]

current knowledge 0.6053636 13 8
5
[]
[2, 2, 2, 1, 1]

reporting 0.48545244 13 58
22
[20, 5, 5]
[4, 3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

t cells 0.58388186 13 29
3
[21, 5]
[3]

microglia 0.89719176 13 18
1
[18]
[]

computed tomography 0.539703 13 18
7
[7]
[3, 2, 2, 2, 1, 1]

betalains 0.99611 13 10
1
[10]
[]

compression 0.49432078 13 38
19
[7, 6]
[3, 3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

conceptual framework 0.49815902 13 22
15
[]
[4, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

multi-objective optimization 0.69866425 13 9
5
[5]
[1, 1, 1, 1]

deformation 0.5728579 13 72
13
[32, 12, 6, 6]
[4, 3, 2, 2, 1, 1, 1, 1, 1]

draft genome sequence 0.9253423 13 12
1
[12]
[]

lifetime 0.5296614 13 36
14
[17, 5]
[2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

czech republic 0.5923475 13 4
3
[]
[2, 1, 1]

steel 0.7469655 13 55
8
[47]
[2, 1, 

liver 0.48081377 12 77
11
[44, 18]
[4, 2, 2, 2, 1, 1, 1, 1, 1]

fpga 0.613042 12 32
10
[8, 7]
[4, 4, 3, 2, 1, 1, 1, 1]

day 0.48379537 12 60
28
[7, 6, 6, 5]
[3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

concrete structures 0.83391595 12 17
2
[16]
[1]

sharing 0.4671333 12 112
27
[19, 18, 16, 8, 8, 7]
[4, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

natural disasters 0.60534984 12 10
5
[]
[3, 3, 2, 1, 1]

poor 0.45320866 12 61
19
[10, 9, 7, 6]
[4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

honor 0.63111657 12 12
7
[6]
[1, 1, 1, 1, 1, 1]

object 0.5279366 12 356
44
[133, 49, 25]
[16, 14, 10, 9, 8, 7, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

corrections 0.6481824 12 13
6
[8]
[1, 1, 1, 1, 1]

einleitung 0.99999994 12 1
austerity 0.6475593 12 20
5
[9]
[4, 3, 2, 2]

psychology 0.57018286 12 47
17
[14, 9]
[4, 4, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

probability 0.4859416 12 50
20
[11

subscription 1.0 12 0
ifc 0.63482004 12 0
rationality 0.5565091 12 23
10
[7]
[3, 2, 2, 2, 2, 2, 1, 1, 1]

substance use 0.7388702 12 53
7
[43]
[4, 2, 1, 1, 1, 1]

ebook 0.6162085 12 2
concerns 0.49698517 11 25
16
[]
[3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

january 0.55409217 11 15
6
[9]
[2, 1, 1, 1, 1]

genetic diversity 0.7879885 11 11
2
[9]
[2]

mainland china 0.49271703 11 7
6
[]
[2, 1, 1, 1, 1, 1]

intensive care unit 0.5201244 11 14
8
[]
[4, 3, 2, 1, 1, 1, 1, 1]

divergence 0.45532405 11 36
18
[9]
[4, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

evolutionary history 0.8688786 11 9
1
[9]
[]

saliva 0.61498517 11 36
6
[16, 10]
[3, 3, 2, 2]

initiation 0.46062192 11 21
13
[]
[4, 4, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]

complete genome sequence 0.9362481 11 9
1
[9]
[]

risk management 0.608998 11 15
5
[7]
[4, 2, 1, 1]

explanation 0.49005324 11 63
21
[23]
[3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1]

air pollution 0.6210235 11 30
8
[10, 8]
[4, 3, 2, 1, 1, 1]



microbial communities 0.9012287 11 27
1
[27]
[]

humidity 0.7208436 11 108
9
[85, 13]
[3, 2, 1, 1, 1, 1, 1]

organizational performance 0.71465653 11 17
4
[10, 5]
[1, 1]

cracks 0.6953717 11 17
4
[8, 7]
[1, 1]

wastewater 0.64285743 11 9
5
[5]
[1, 1, 1, 1]

rationale 0.46371347 11 15
11
[]
[3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

low-cost sensors 0.7682412 11 8
2
[6]
[2]

social anxiety 0.82325685 11 17
2
[16]
[1]

elasticity 0.49501428 11 16
11
[]
[4, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

performance enhancement 0.6272681 11 7
5
[]
[3, 1, 1, 1, 1]

profiles 0.45072162 11 76
29
[15, 12, 8, 5]
[3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

extreme learning machine 0.7531632 11 23
3
[19]
[3, 1]

learners 0.7114769 11 69
7
[43, 17]
[4, 2, 1, 1, 1]

macrophages 0.77830935 11 25
4
[22]
[1, 1, 1]

suitability 0.555077 11 15
11
[]
[4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]

inflation 0.7102953 11 14
4
[11]
[1, 1, 1]

chemometrics 0.6638975 11 7
3
[]
[4, 2, 1]

gnss receivers 0.8625468

customer satisfaction 0.79796934 10 11
1
[11]
[]

medical education 0.7337557 10 10
3
[6]
[3, 1]

generalization 0.46005633 10 8
7
[]
[2, 1, 1, 1, 1, 1, 1]

splitting 0.5226613 10 32
11
[7, 6, 5, 5]
[3, 1, 1, 1, 1, 1, 1]

sensitivity analysis 0.49267375 10 15
8
[]
[4, 3, 2, 2, 1, 1, 1, 1]

random forests 0.7113329 10 9
3
[6]
[2, 1]

everyday 0.6382601 10 71
12
[33, 16, 6, 5]
[3, 2, 1, 1, 1, 1, 1, 1]

prioritization 0.588829 10 14
4
[5]
[4, 3, 2]

national identity 0.698193 10 10
4
[5]
[3, 1, 1]

audiences 0.6236279 10 12
7
[]
[3, 3, 2, 1, 1, 1, 1]

data collection 0.6167632 10 49
11
[30, 6, 5]
[1, 1, 1, 1, 1, 1, 1, 1]

neuromorphic computing 0.89057285 10 14
1
[14]
[]

videos 0.5582244 10 28
10
[13]
[4, 2, 2, 2, 1, 1, 1, 1, 1]

infrared spectroscopy 0.643581 10 21
7
[11]
[3, 2, 2, 1, 1, 1]

european integration 0.80773497 10 12
2
[11]
[1]

arrival estimation 0.93736947 10 21
1
[21]
[]

sharing economy 0.6583362 10 6
4
[]
[3, 1, 1, 1]

derivation 0.48753873 10 13
10
[]
[2, 2, 2, 1, 1, 1

economic crisis 0.603876 10 12
6
[]
[4, 3, 2, 1, 1, 1]

natural language processing 0.6782766 10 18
6
[13]
[1, 1, 1, 1, 1]

academy 0.63736147 10 24
8
[11, 5]
[3, 1, 1, 1, 1, 1]

self-control 0.6939826 10 24
6
[18]
[2, 1, 1, 1, 1]

criminalization 0.759754 10 7
2
[]
[4, 3]

concern 0.48130998 10 51
27
[]
[4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

mobile applications 0.66953725 10 6
3
[]
[3, 2, 1]

contemporary china 0.78424305 10 4
2
[]
[3, 1]

legitimation 0.7268644 10 20
4
[14]
[4, 1, 1]

food democracy 0.91261 10 11
1
[11]
[]

hyperspectral images 0.8527348 10 12
1
[12]
[]

electrospray ionization mass spectrometry 0.83656543 10 16
3
[14]
[1, 1]

differential diagnosis 0.6418035 9 3
3
[]
[1, 1, 1]

sars-cov 0.8748378 9 45
2
[44]
[1]

chloroquine 0.64825433 9 4
3
[]
[2, 1, 1]

immune responses 0.71077263 9 14
4
[6]
[3, 3, 2]

infections 0.6411794 9 60
10
[35, 13]
[3, 2, 2, 1, 1, 1, 1, 1]

china coronavirus 0.99316406 9 2
doctors 0.6269555 9 4
4

reactive oxygen species 0.7829768 9 7
2
[5]
[2]

essay 0.547559 9 12
8
[]
[3, 2, 2, 1, 1, 1, 1, 1]

new trends 0.5745634 9 4
3
[]
[2, 1, 1]

multiple sensors 0.71684545 9 7
3
[]
[4, 2, 1]

crimes 0.71726096 9 5
2
[]
[4, 1]

personhood 0.71399134 9 6
3
[]
[4, 1, 1]

precision medicine 0.60543823 9 5
5
[]
[1, 1, 1, 1, 1]

dispersion 0.48536196 9 44
18
[11, 5, 5]
[4, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

secularism 0.79224753 9 2
alliances 0.6459874 9 10
3
[5]
[4, 1]

fall detection 0.9240369 9 34
1
[34]
[]

flipped classroom 0.8900106 9 11
1
[11]
[]

colitis 0.8101004 9 13
4
[10]
[1, 1, 1]

sustainable development goals 0.73965394 9 7
3
[5]
[1, 1]

caregivers 0.6451234 9 18
7
[9]
[3, 2, 1, 1, 1, 1]

optimal control 0.6813756 9 2
peacebuilding 0.83258736 9 11
2
[10]
[1]

performance comparison 0.61514515 9 7
6
[]
[2, 1, 1, 1, 1, 1]

profession 0.56067747 9 195
32
[89, 18, 16]
[6, 6, 5, 5, 4, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

feeling 0.5789666 9 

educators 0.6509244 9 27
8
[15]
[4, 2, 2, 1, 1, 1, 1]

kindergarten 0.79506004 9 37
3
[35]
[1, 1]

conflict management 0.5867403 9 30
5
[17, 10]
[1, 1, 1]

firm 0.59506917 9 250
21
[105, 43, 34, 21]
[10, 7, 6, 4, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1]

biopolitics 0.86327004 9 8
1
[8]
[]

buribunks 0.98543537 9 2
managers 0.6186244 9 36
13
[8, 6, 5, 5]
[3, 2, 1, 1, 1, 1, 1, 1, 1]

structural change 0.6285612 9 15
5
[11]
[1, 1, 1, 1]

thanks 0.7760453 9 3
2
[]
[2, 1]

organizational commitment 0.862001 9 11
1
[11]
[]

: an exploratory study 0.4925914 9 0
emotional labor 0.7804301 9 13
3
[9]
[3, 1]

field experiment 0.57526124 9 28
10
[12, 5]
[3, 2, 1, 1, 1, 1, 1, 1]

testimony 0.68499774 9 8
4
[]
[4, 2, 1, 1]

chicago press 1.0000001 9 1
parental involvement 0.7959096 9 15
3
[12]
[2, 1]

critical race theory 0.879948 9 17
1
[17]
[]

ritual 0.63436675 9 20
7
[7, 6]
[2, 2, 1, 1, 1]

world politics 0.7703782 9 9
2
[6]
[3]

supply chains 0.8328393 9 33
2
[32]
[1]

cloth 0.5444236 9 16
7
[5,

sugammadex 0.9435407 8 7
1
[7]
[]

genetic algorithms 0.62548333 8 8
5
[]
[4, 1, 1, 1, 1]

differential evolution 0.7570462 8 10
3
[8]
[1, 1]

memories 0.49603125 8 20
11
[]
[3, 3, 3, 3, 2, 1, 1, 1, 1, 1, 1]

mozambique 0.7177095 8 5
2
[]
[4, 1]

life satisfaction 0.726049 8 19
4
[15]
[2, 1, 1]

auctions 0.831363 8 22
2
[21]
[1]

unity 0.4517391 8 16
10
[]
[4, 2, 2, 2, 1, 1, 1, 1, 1, 1]

instagram 0.69959325 8 8
2
[5]
[3]

dissent 0.70453197 8 9
3
[5]
[3, 1]

wearable device 0.7343407 8 35
5
[28]
[4, 1, 1, 1]

interoperability 0.7538244 8 10
4
[7]
[1, 1, 1]

urban planning 0.7473042 8 4
3
[]
[2, 1, 1]

collaborative filtering 0.95698756 8 21
1
[21]
[]

multi-robot systems 0.8493699 8 5
1
[5]
[]

implementing 0.47213888 8 35
18
[6, 6]
[4, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

developing world 0.66361403 8 5
5
[]
[1, 1, 1, 1, 1]

emerging technologies 0.51807874 8 4
4
[]
[1, 1, 1, 1]

key role 0.51024675 8 7
6
[]
[2, 1, 1, 1, 1, 1]

immune evasion 0.72068876 8 5
3
[]
[3, 1, 1]

c

temperature sensing 0.74598795 8 25
6
[18]
[3, 1, 1, 1, 1]

high performance 0.5190237 8 22
13
[]
[4, 3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

layer 0.47403234 8 266
43
[45, 35, 29, 25, 18]
[13, 13, 12, 10, 9, 5, 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

systematic review and meta-analysis 0.69430774 8 62
9
[48]
[4, 3, 2, 1, 1, 1, 1, 1]

open problems 0.8339225 8 2
later life 0.7259732 8 15
4
[8, 5]
[1, 1]

probe 0.4834296 8 113
28
[29, 13, 10, 7, 7]
[4, 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

microscopy 0.5606279 8 101
15
[35, 22, 13, 7]
[4, 4, 3, 2, 2, 2, 2, 2, 1, 1, 1]

chitosan 0.6732596 8 21
6
[8, 7]
[2, 2, 1, 1]

future research directions 0.62336904 8 3
3
[]
[1, 1, 1]

capabilities 0.5219074 8 54
19
[23]
[4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

prices 0.7038343 8 66
7
[44, 13]
[3, 2, 2, 1, 1]

cyprus 0.54692805 8 8
6
[]
[2, 2, 1, 1, 1, 1]

dawn 0.52015483 8 3
3
[]
[1, 1, 1]

territory 0.

37
[18, 15, 7, 6]
[4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

potential use 1.0000001 7 1
family planning 0.83744776 7 20
3
[18]
[1, 1]

surface electromyography 0.9219268 7 15
1
[15]
[]

cancers 0.73068994 7 17
3
[13]
[3, 1]

multi-target tracking 0.9508967 7 17
1
[17]
[]

civil structures 0.8509789 7 7
1
[7]
[]

measurement system 0.5763554 7 48
10
[16, 13]
[4, 4, 3, 2, 2, 2, 1, 1]

recent applications 0.5722395 7 4
4
[]
[1, 1, 1, 1]

grammar 0.64507437 7 19
6
[11]
[2, 2, 2, 1, 1]

reasoning 0.543209 7 90
16
[41, 10, 8, 5, 5]
[3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1]

body mass index 0.7284176 7 6
2
[5]
[1]

route 0.45635614 7 62
26
[11, 10, 6]
[4, 3, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

hierarchy 0.4577381 7 22
15
[]
[3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

assisted 0.47012922 7 173
42
[27, 19, 18, 17, 13]
[6, 5, 5, 5, 5, 4, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

16
[25, 8]
[4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

temperature dependence 0.76096416 7 2
kidney disease 0.6395093 7 19
5
[8]
[4, 3, 3, 1]

causation 0.613235 7 8
5
[]
[4, 1, 1, 1, 1]

stable isotopes 0.9903268 7 4
1
[]
[4]

conceptions 0.5479733 7 26
9
[9, 5]
[3, 3, 2, 1, 1, 1, 1]

ships 0.46209997 7 8
6
[]
[2, 2, 1, 1, 1, 1]

nitazoxanide 0.91098404 7 3
1
[]
[3]

voltammetric electronic tongue 0.9623508 7 7
1
[7]
[]

benchmark 0.45163894 7 52
27
[7]
[4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

biochemistry 0.7709931 7 8
2
[7]
[1]

drought 0.7690818 7 17
4
[12]
[3, 1, 1]

image segmentation 0.7631372 7 10
3
[8]
[1, 1]

team 0.59506124 7 90
15
[51, 11, 6, 5]
[2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1]

cell death 0.7589106 7 18
5
[14]
[1, 1, 1, 1]

ischemic stroke 0.8221363 7 4
2
[]
[2, 2]

misconceptions 0.58738476 7 14
5
[8]
[2, 2, 1, 1]

salinity 0.619525 7 22
7
[11]
[4, 3, 1, 1, 1, 1]

ochratoxin a 0.95036036 7 7
1
[7]
[]

pd-l1 expression 0.9656473 7 6


vehicular ad-hoc networks 0.89535797 7 5
1
[5]
[]

meritocracy 1.0 7 1
heterogeneous wireless sensor networks 0.92024195 7 10
1
[10]
[]

universe 0.5516691 7 7
4
[]
[3, 2, 1, 1]

jerusalem 0.5706428 7 6
4
[]
[3, 1, 1, 1]

silica 0.57365793 7 34
9
[12, 8, 5]
[2, 2, 2, 1, 1, 1]

cervical cancer 0.66769266 7 3
3
[]
[1, 1, 1]

environmental samples 0.6265341 7 5
3
[]
[3, 1, 1]

operational performance 0.8890911 7 7
1
[7]
[]

best practices 0.5644516 7 5
3
[]
[3, 1, 1]

plea 0.498131 7 10
7
[]
[3, 2, 1, 1, 1, 1, 1]

gas sensor 0.7831888 7 176
4
[150, 22]
[3, 1]

puerto rico 0.65812194 7 4
3
[]
[2, 1, 1]

preliminary results 0.54012066 7 7
6
[]
[2, 1, 1, 1, 1, 1]

microfluidic devices 0.8095321 7 19
3
[17]
[1, 1]

machine tools 0.7642459 7 2
economic activity 0.72737145 7 6
3
[]
[3, 2, 1]

diagnostic value 1.0 7 1
interference 0.5328375 7 94
25
[33, 9, 8, 7, 5]
[3, 3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

structural insights 0.82705796 7 8
2
[7]
[1]

: the effects 0.47449145

17
[44, 6, 6]
[4, 4, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1]

different approaches 0.5963348 6 3
3
[]
[1, 1, 1]

aircraft 0.55608386 6 58
10
[17, 11, 10, 9]
[4, 2, 2, 1, 1, 1]

deficits 0.48786873 6 12
7
[]
[4, 2, 2, 1, 1, 1, 1]

metallic glasses 0.7893132 6 5
2
[]
[4, 1]

iii 0.46959832 6 59
24
[10, 5]
[4, 4, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

photonic 0.6881999 6 168
10
[80, 58, 13]
[4, 4, 3, 2, 2, 1, 1]

u.s.a. 0.6853119 6 2
online learning 0.55021876 6 12
7
[5]
[2, 1, 1, 1, 1, 1]

aluminum 0.58720577 6 21
9
[6]
[3, 3, 2, 2, 2, 1, 1, 1]

optical fiber 0.76872844 6 120
8
[110]
[4, 1, 1, 1, 1, 1, 1]

composite materials 0.64127785 6 8
3
[]
[4, 3, 1]

explosives 0.6576326 6 18
4
[7, 5, 5]
[1]

infant 0.5279968 6 76
16
[22, 21, 12]
[4, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]

ownership 0.5517086 6 47
19
[11, 10]
[4, 3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

optical fibers 0.7853371 6 22
4
[18]
[2, 1, 1]

despair 0.814428 6 2
deviance 0.6716446 6 15
4
[7, 6]
[1, 

cyber security 0.62874264 6 3
2
[]
[2, 1]

song 0.48093727 6 7
6
[]
[2, 1, 1, 1, 1, 1]

critical evaluation 0.47909814 6 5
5
[]
[1, 1, 1, 1, 1]

tokyo 0.58028555 6 6
4
[]
[2, 2, 1, 1]

natural disaster 0.58936566 6 19
9
[8]
[3, 2, 1, 1, 1, 1, 1, 1]

neutrinoless 0.993438 6 22
1
[22]
[]

biosynthesis 0.64085937 6 18
7
[6]
[3, 3, 2, 2, 1, 1]

skin cancer 0.58922875 6 3
3
[]
[1, 1, 1]

distinction 0.51260006 6 12
9
[]
[3, 2, 1, 1, 1, 1, 1, 1, 1]

infancy 0.58235604 6 4
4
[]
[1, 1, 1, 1]

social exclusion 0.66635317 6 10
5
[5]
[2, 1, 1, 1]

individualism 0.5698701 6 8
5
[]
[3, 2, 1, 1, 1]

sanctions 0.6762361 6 19
5
[13]
[2, 2, 1, 1]

lte networks 1.0 6 1
video surveillance 0.7580401 6 11
2
[8]
[3]

international evidence 0.7913296 6 5
2
[]
[3, 2]

child health 0.8045261 6 6
2
[5]
[1]

changing world 1.0000002 6 1
hybrid method 0.71505916 6 4
3
[]
[2, 1, 1]

workshop 0.46297553 6 17
12
[]
[4, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]

health information 0.63143855 6 22
7
[11]
[3, 3, 2, 1, 1, 1]

pr

bioactive glasses 0.93828547 6 8
1
[8]
[]

breast cancer patients 0.66084397 6 4
2
[]
[3, 1]

negation 0.7316646 6 9
4
[5]
[2, 1, 1]

malware detection 0.8492938 6 9
1
[9]
[]

museum 0.5047417 6 36
12
[12, 5]
[3, 3, 2, 2, 2, 2, 2, 1, 1, 1]

national survey 0.5427483 6 9
7
[]
[2, 2, 1, 1, 1, 1, 1]

location-based social networks 0.9276193 6 6
1
[6]
[]

affinity 0.5161942 6 38
10
[11, 8, 5, 5]
[3, 2, 1, 1, 1, 1]

asia-pacific 0.552157 6 3
3
[]
[1, 1, 1]

ebola virus disease 0.8445225 6 11
1
[11]
[]

devil 0.56744474 6 9
6
[]
[2, 2, 2, 1, 1, 1]

hiv/aids 0.56169075 6 6
5
[]
[2, 1, 1, 1, 1]

iot architecture 0.80493903 6 7
2
[6]
[1]

cost-effectiveness analysis 0.7253876 6 2
-shelf 0.51817274 6 0
life sciences 0.59236825 6 12
3
[8]
[3, 1]

knowledge gaps 0.72432315 6 2
season 0.5592431 6 92
18
[30, 12, 11, 7, 7, 6, 5]
[2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

wireless communications 0.91259 6 2
stress monitoring 0.7283567 6 12
3
[9]
[2, 1]

laparoscopic sleeve gastrectomy 0.9946785 6 8
1
[8]
[]



moderator 0.60882574 6 18
10
[5]
[4, 2, 1, 1, 1, 1, 1, 1, 1]

earthquakes 0.5267838 6 4
4
[]
[1, 1, 1, 1]

photoplethysmography 0.863842 6 17
1
[17]
[]

human development 0.5446537 6 8
5
[]
[3, 2, 1, 1, 1]

electoral competition 0.8155103 6 11
1
[11]
[]

transistors 0.74044645 6 77
3
[51, 21, 5]
[]

protected areas 0.6584936 6 7
3
[]
[4, 2, 1]

feature fusion 0.739952 6 16
4
[12]
[2, 1, 1]

weights 0.4895938 6 8
6
[]
[2, 2, 1, 1, 1, 1]

abstraction 0.48594585 6 20
14
[]
[3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

atom 0.48339465 6 131
20
[42, 30, 20, 9]
[4, 3, 3, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]

new model 0.4542923 6 8
8
[]
[1, 1, 1, 1, 1, 1, 1, 1]

collection 0.4963592 6 97
26
[35, 9, 8, 7, 5, 5]
[4, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

qos 0.8215894 6 21
2
[20]
[1]

rituals 0.69943047 6 8
5
[]
[3, 2, 1, 1, 1]

political ecology 0.8547311 6 13
1
[13]
[]

glycaemic control 0.9309914 6 2
display 0.48525342 6 46
17
[7, 6]
[4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1

In [19]:
term_lower = "coronavirus"
for doi, title in doi2title.items():
    title_lower = title.lower()
    substr_position = title_lower.find(term_lower)
    if substr_position >= 0:
        if substr_position >= 1:
            char_before_ord = ord(title_lower[substr_position-1])
            if char_before_ord >= 65 and char_before_ord <= 90:
                continue
            elif char_before_ord >= 97 and char_before_ord <= 122:
                continue
        if doi in doi2vec:
            print(title)

Reverse Logistics Network Design for Effective Management of Medical Waste in Epidemic Outbreaks: Insights from the Coronavirus Disease 2019 (COVID-19) Outbreak in Wuhan (China)
Bat origin of a new human coronavirus: there and back again
A mathematical model for simulating the phase-based transmissibility of a novel coronavirus
Distinct Roles for Sialoside and Protein Receptors in Coronavirus Infection
Overlapping and discrete aspects of the pathology and pathogenesis of the emerging human pathogenic coronaviruses SARS‐CoV, MERS‐CoV, and 2019‐nCoV
Identification of Coronavirus Isolated from a Patient in Korea with COVID-19
2019 novel coronavirus of pneumonia in Wuhan, China: emerging attack and management strategies
The novel coronavirus outbreak in Wuhan, China
Community Transmission of Severe Acute Respiratory Syndrome Coronavirus 2, Shenzhen, China, 2020
Review of the Clinical Characteristics of Coronavirus Disease 2019 (COVID-19)
Detection of 2019 novel coronavirus (2019-nCoV) by r

In [20]:
[(t, c, v) for t,c,v in term_plot_tuples if t=="china"]

[('china', 86, array([[-3.35388124e-01, -2.39224308e-01,  2.52788498e-01,
           1.53926418e-01, -3.14634905e-02,  8.95832851e-02,
           3.24194992e-01,  6.34812429e-02, -2.19611450e-01,
          -1.47154979e-01, -5.36232075e-01,  3.80858893e-01,
          -1.41044899e-01,  2.25582091e-01, -2.57635378e-01,
          -3.39703126e-02,  1.87238248e-01, -1.00324365e-01,
           2.07575262e-03, -4.06006049e-01, -2.56591171e-01,
           5.85056674e-01, -3.87011137e-01, -5.12704904e-01,
          -8.55248418e-02,  2.04200451e-01,  8.35558908e-02,
           1.62306862e-01, -7.79484244e-02,  2.66298823e-01,
           1.20225209e-01, -7.87101129e-02,  2.25268907e-01,
          -2.11385063e-02, -2.04555181e-01, -4.80148316e-02,
           1.03356087e-01,  2.20980169e-01, -1.28599550e-03,
           8.63843100e-02,  1.46886696e-01, -3.99130792e-01,
          -6.03075282e-02,  1.56953942e-01, -2.69948410e-01,
           1.03901934e-02, -4.05032530e-01,  2.93945177e-02,
          -

In [21]:
term_plot_tuples = sorted(term_plot_tuples, key=lambda t: nc_counts[t[0]], reverse=True)

In [22]:
len(term_plot_tuples) # 7290 vorher

5815

In [23]:
pickle.dump(term_plot_tuples, open("cov/kw_count_vec_tuples.p", "wb"))

In [24]:
term_plot_tuples = pickle.load(open("cov/kw_count_vec_tuples.p", "rb"))

In [25]:
kw_list = [kw for kw, _, _ in term_plot_tuples]

In [26]:
kw_vecs = [v.reshape(300) for _, _, v in term_plot_tuples]

In [27]:
kw_sims = cosine_similarity(kw_vecs)

In [28]:
min_dist = 0.4

kw_md_list = []

for i, kw in enumerate(kw_list):
    #if term_mean_sims[kw] < 0.6:
    #    continue
    #if nc_counts[kw] < 20:
    #    continue
    vec = kw_vecs[i]
    found = False
    for j, kw2, vec2 in kw_md_list:
        if 1. - kw_sims[i,j] < min_dist: # or kw[:-1] in kw2:
            found = True
            break
    if not found:
        kw_md_list.append((i, kw, vec))

In [29]:
len(kw_md_list), [(kw, nc_counts[kw]) for _, kw, _ in kw_md_list]

(38,
 [('application', 645),
  ('application', 645),
  ('china', 556),
  ('china', 556),
  ('china', 556),
  ('patients', 532),
  ('patients', 532),
  ('children', 410),
  ('synthesis', 213),
  ('relationship', 204),
  ('evolution', 199),
  ('correction', 153),
  ('impacts', 78),
  ('rise', 63),
  ('breast cancer', 62),
  ('mice', 62),
  ('family', 59),
  ('feasibility', 45),
  ('surgery', 39),
  ('asia', 36),
  ('noise', 33),
  ('autism', 32),
  ('earth', 26),
  ('blood', 25),
  ('fairness', 24),
  ('degradation', 23),
  ('observations', 23),
  ('plane', 22),
  ('targets', 19),
  ('retrieval', 18),
  ('debate', 15),
  ('mars', 14),
  ('similarity', 13),
  ('stiffness', 10),
  ('injury', 9),
  ('singularity', 6),
  ('wall', 6),
  ('motion compensation', 6)])

In [30]:
search_term = "china"

for i, kw in enumerate(kw_list):
    if search_term in kw:
        print(i, kw)

4 china
5 china
6 china
7 china
8 china
9 china
10 china
5788 urban china


In [31]:
i = 2545

for j in np.argsort(-kw_sims[i, :]):
    if kw_sims[i, j] >= 0.8:
        print("{:.3f}".format(kw_sims[i, j]), kw_list[j], j)
    else:
        break

1.000 rules 2545
0.986 accumulation 3222
0.982 inflation 3852
0.982 keynes 5279
0.981 economy 3381
0.976 monetary policy 3054
0.972 policy 619
0.965 finance 1816
0.956 demand 808
0.944 union 5343
0.942 volatility 3988
0.941 debt 2534
0.938 capital 1624
0.938 business 1380
0.937 regimes 5677
0.936 expectations 2573
0.932 reform 2881
0.931 crises 3202
0.930 critique 1254
0.929 country 2410
0.927 crisis 727
0.925 rise 587
0.924 brazil 840
0.922 markets 2489
0.921 account 3437
0.918 growth 321
0.918 credit 2793
0.918 competition 997
0.916 government 1895
0.915 market 2155
0.914 u.s. 2778
0.912 austerity 3506
0.912 new evidence 2010
0.911 periphery 4736
0.909 income 3350
0.908 policies 2405
0.908 entry 3296
0.907 structural change 4490
0.907 structural transformation 5269
0.905 emerging markets 4271
0.904 decline 1889
0.904 price 2483
0.902 economic growth 1047
0.901 determinants 368
0.898 investment 2564
0.897 countries 3153
0.896 price 2482
0.895 empirical evidence 1177
0.894 economic dev

In [32]:
kw_pub_sims = cosine_similarity(X, kw_vecs[i].reshape(1, -1)).flatten() #.reshape(-1)

print(np.sum(kw_pub_sims >= 0.9), np.sum(kw_pub_sims >= 0.8), np.sum(kw_pub_sims >= 0.7), np.sum(kw_pub_sims >= 0.6), np.sum(kw_pub_sims >= 0.5))

for k, j in enumerate(np.argsort(-kw_pub_sims)):
    if k > 50:
        break
    doi = index2doi[j]
    print(k, "{:.3f}".format(kw_pub_sims[j]))
    print(doi2str[doi])
    #print(doi2title[doi])
    print()
    

139 642 2000 4380 10171
0 0.982
<CrossRefWork Piergallini (2018): Nonlinear policy behavior, multiple equilibria and debt-deflation attractors. http://dx.doi.org/10.1007/s00191-018-0562-8 >

1 0.978
<CrossRefWork Tatliyer (2017): Inflation targeting and the need for a new central banking framework. http://dx.doi.org/10.1080/01603477.2017.1368026 >

2 0.977
<CrossRefWork Kitano, Takaku (2017): Capital controls as a credit policy tool in a small open economy. http://dx.doi.org/10.1515/bejm-2016-0231 >

3 0.976
<CrossRefWork Martins, Pires-Alves, Modenesi, Leite (2017): The transmission mechanism of monetary policy: Microeconomic aspects of macroeconomic issues. http://dx.doi.org/10.1080/01603477.2017.1319249 >

4 0.973
<CrossRefWork Azcona (2017): Exchange rate policy and the role of non-traded goods prices in real exchange rate fluctuations. http://dx.doi.org/10.1515/bejm-2015-0185 >

5 0.972
<CrossRefWork Sau (2018): Coping with deflation and the liquidity trap in the eurozone: A post 

In [33]:
kw_pub_sims.shape

(53629,)

In [34]:
ac_kw = AgglomerativeClustering(n_clusters=None, affinity="cosine", linkage="average", distance_threshold=0.1)
kwClusterLabels = ac_kw.fit_predict(kw_vecs)

In [35]:
np.unique(kwClusterLabels).max()

525

In [36]:
for cl in sorted(np.unique(kwClusterLabels)):
    print("Cluster", cl)
    for i, label in enumerate(kwClusterLabels):
        if cl == label:
            print(kw_list[i])
    print()
    

Cluster 0
machine learning
anomaly detection
intrusion detection
intrusion detection systems
malware
malware detection

Cluster 1
treatment
relationship
validation
students
association
efficacy
outcomes
depression
differences
adults
resilience
experiences
benefits
associations
behavior
anxiety
roles
individuals
question
predictors
emotion
parents
contributions
depressive symptoms
child
young adults
attention
mediating role
mothers
longitudinal study
gender differences
moderating role
symptoms
person
hope
college
acceptance
predicting
mother
friends
parent
social support
suicide
behaviors
difficulties
day
disorder
college students
parenting
social anxiety
fathers
predictor
emotion regulation
mediators
self-esteem
compassion
student
mediator
adult
style
disclosure
emerging adults
attachment
help
loneliness
moderator
perfectionism

Cluster 2
italy
turkey
spain
poland
bacteria
parameters
argentina
concentration
atmosphere
abundance
concentrations
airborne pollen
season
aerobiology

Cluster

video
infrastructure
shadow
sdn
delivery
target
(iot
wake
threats
software
real-time
health monitoring
coverage
automation
iot devices
wireless body area networks
deployment
topology
crowdsourcing
smart home
delay
storage
iot applications
game
users
based
vehicular networks
technique
services
manufacturing
spectrum
distributed
environments
computing
wsn
authentication
wireless networks
ieee
protocol
digital
research challenges
analytics
mobile edge computing
tasks
energy consumption
agreement
platform
industrial wireless sensor networks
head
low-
channel
connectivity
urban environment
wireless rechargeable sensor networks
fire
target localization
hardware
compression
lifetime
computation
smart grid
iot environments
sharing
probability
routing
crowd
sensor data
cooperative spectrum sensing
integrity
scheduling
synchronization
resource allocation
vanets
attacks
attack
areas
intelligence
computer
smart grids
provision
lorawan
emergency
fog
hybrid
data collection
spectrum sensing
microserv

complementarity
companies
alliances
innovations
firm
cluster
variety
enterprise
pharmaceutical industry
patents

Cluster 109
china
correction
estimation
data
water
surface
measurements
variability
index
observations
trees
landscape
estimating
predicting
retrieval
series
productivity
california
wheat
resolution
winter wheat
rice
irrigation
yield
time series
maize
vegetation
nutrition
fluorescence
chlorophyll
winter
intercomparison
satellite data
wind
nitrogen
island
landscapes
energy balance
water use
satellite images
coefficient
vegetation indices
season
landsat
forest
deficit irrigation

Cluster 110
people
differences
emotions
expression
conversation
empathy
emotion recognition
voice
eye
touch
gaze
interview
posture
expressions

Cluster 111
religion
islam
rules
political science
constitution
century
island

Cluster 112
children
research
you
learning
education
they
japan
language
identity
teaching
korea
south korea
england
community
engagement
meaning
practices
making
school
youth
univ

communication
relationships
family
narratives
families
adoption
stories
narrative
deployment
parent
family communication
military

Cluster 175
mortality
acute respiratory distress syndrome
guidelines
critically ill patients
injury
lung

Cluster 176
chip
concrete
sensor applications
rfid
wireless sensors

Cluster 177
service
business
services
economy
value creation
circular economy
business model innovation
open innovation

Cluster 178
hiv
homology
signatures

Cluster 179
learning
classification
deep learning
machine learning
recognition
convolutional neural networks
memory
networks
convolutional neural network
neural networks
training
attention
deep convolutional neural networks
brain
network
cyber-physical systems
damage detection
deep learning approach
computer vision
hyperspectral image classification
deep convolutional neural network
neural network
face recognition
samples
fpga
belief
attack
deep neural network
transfer learning
inference
ocean
deep learning techniques
cnn
deep bel

reform
skills
background
grade
student
educational attainment
cash transfers

Cluster 249
determination
quantification
visualization
hyperspectral imaging
rice
viability
infrared spectroscopy
multispectral imaging
near-infrared spectroscopy
raman
varieties
variety
multivariate analysis

Cluster 250
relationship
you
adolescents
violence
prevalence
relationships
family
well-being
risk factors
sex
adolescence
youth
america
childhood
longitudinal study
adulthood
alcohol
influences
substance
norms
trajectories
contexts
self-efficacy
girls
substance use
profiles
peer
peers
drug
emerging adulthood
mentoring
help
abuse
socialization
competence

Cluster 251
survey
mapping
techniques
comprehensive survey
plane
technique
virtualization
code

Cluster 252
cells
polymers
cell
polymer
charge
flexible substrates
photovoltaics

Cluster 253
protest
collective action

Cluster 254
law
freedom
human rights
university
right
rights
organizations
organization
international organizations
agreement
assistance
a

Cluster 480
pigs

Cluster 481
type 1 diabetes
pancreas

Cluster 482
times
location
routing
scheduling
allocation
wind

Cluster 483
asthma

Cluster 484
high resolution

Cluster 485
mars

Cluster 486
malaria

Cluster 487
mri

Cluster 488
emotions
emotion
fear
emotion recognition
video
users
skin
user
physiological signals

Cluster 489
values
morality

Cluster 490
bilingualism

Cluster 491
agency

Cluster 492
communications

Cluster 493
incarceration

Cluster 494
motives

Cluster 495
mixed reality

Cluster 496
microplastics

Cluster 497
beliefs

Cluster 498
association
associations
obesity
body
findings
childhood
adulthood
birth
body composition
weight
cohort study
body mass index
cohort
adiposity
offspring
infant
marker
maternal obesity
overweight

Cluster 499
plane
planetary health

Cluster 500
predicting
random forests
random forest
forest

Cluster 501
radar
wall

Cluster 502
india
indonesia
south korea
turkey
spain
hong kong
singapore
malaysia
citizens
revolution
authoritarianism
gree

In [37]:
from sklearn.neighbors import KNeighborsClassifier

In [38]:
neigh = KNeighborsClassifier(n_neighbors=5, metric="cosine")
neigh.fit(kw_vecs, kwClusterLabels)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='cosine',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [39]:
doi = "10.1007/s00170-017-1169-1"
vec = doi2vec[doi]
#neigh.predict_proba(vec.reshape(1,-1))

KeyError: '10.1007/s00170-017-1169-1'

In [None]:
pub_labels = neigh.predict(X)

In [None]:
#cluster_pub_counts = defaultdict(int)
#for l in pub_labels

cluster_pub_counts = Counter(pub_labels)

In [None]:
from scipy.stats import linregress


In [None]:
cl2kws = {}
cl_centroids = np.zeros((len(np.unique(kwClusterLabels)), 300))

for cl in sorted(np.unique(kwClusterLabels)):
    print("Cluster", cl)
    print("Size:", cluster_pub_counts[cl])
    
    cluster_kws = []
    cluster_vecs = []
    for i, label in enumerate(kwClusterLabels):
        if cl == label:
            #print(kw_list[i])
            cluster_vecs.append(kw_vecs[i])
            already_contained = False
            for kw in cluster_kws:
                if kw_list[i][:-1] in kw:
                    already_contained = True
                    break
            if not already_contained:
                cluster_kws.append(kw_list[i])
    cl2kws[cl] = cluster_kws
    #for vec in 
    cl_centroids[cl, :] = np.average(cluster_vecs, axis=0)
    #cl_centroids[cl, :] = np.mean(np.asarray(cluster_kws))
    print(cluster_kws[:])
    print()

In [None]:
from sklearn.manifold import TSNE


X_embedded = TSNE(n_components=2, metric="cosine").fit_transform(cl_centroids)

C_embedded = TSNE(n_components=3, metric="cosine").fit_transform(cl_centroids)
cmin = np.min(C_embedded)
cmax = np.max(C_embedded)
colors = (C_embedded - cmin) / (cmax - cmin) * 256
colors = np.floor(colors)

In [None]:
from sklearn.manifold import TSNE

n_centroids = len(cl_centroids)
n_kw_md = len(kw_md_list)

X_plot = np.zeros((n_centroids + n_kw_md, 300))

X_plot[:n_centroids, :] = cl_centroids
for i, (_, kw, vec) in enumerate(kw_md_list):
    X_plot[n_centroids + i, :] = vec

X_embedded = TSNE(n_components=2, metric="cosine").fit_transform(X_plot)

C_embedded = TSNE(n_components=3, metric="cosine").fit_transform(cl_centroids)
cmin = np.min(C_embedded)
cmax = np.max(C_embedded)
colors = (C_embedded - cmin) / (cmax - cmin) * 256
colors = np.floor(colors)

In [None]:
X_embedded[40]

In [None]:
import pandas as pd
from bokeh.colors import RGB

df = pd.DataFrame(index = sorted(np.unique(kwClusterLabels)), columns = [])

for cl in sorted(np.unique(kwClusterLabels)):    
    df.at[cl, "clid"] = cl
    df.at[cl, "x"] = X_embedded[cl,0]
    df.at[cl, "y"] = X_embedded[cl,1]
    df.at[cl, "size"] = cluster_pub_counts[cl]
    
    df.at[cl, "circle_size"] = np.sqrt(cluster_pub_counts[cl]) * 2
    keywords = cl2kws[cl]
    df.at[cl, "keywords"] = ", ".join(keywords)
    df.at[cl, "keywords_top5"] = ", ".join(keywords[:5])
    df.at[cl, "keywords_top10"] = ", ".join(keywords[5:10])
    df.at[cl, "keywords_top15"] = ", ".join(keywords[10:15])
    df.at[cl, "keywords_top20"] = ", ".join(keywords[15:20])
    
    df.at[cl, "rgb"] = RGB(colors[cl,0], colors[cl,1], colors[cl,2])
    
    cl_years = []
    for doi, i in doi2index.items():
        if pub_labels[i] == cl:
            cl_years.append(doi2year[doi])
    pubs_per_year = Counter(cl_years)
    
    x = [2016, 2017, 2018, 2019, 2020]
    y = [pubs_per_year[y] for y in x]
    x = [-2, -1, 0, 1, 2]
    y[-1] = pubs_per_year[2019] * 52. / 12.

    slope, intercept, r_value, p_value, std_err = linregress(x, y)
    df.at[cl, "growth_rate"] = slope/np.average(y)*100
    df.at[cl, "growth_r2"] = r_value**2

In [None]:
df_labels = pd.DataFrame()

for i, (_, kw, vec) in enumerate(kw_md_list):
    df_labels.at[i, "kw"] = kw
    df_labels.at[i, "x"] = X_embedded[n_centroids + i, 0]
    df_labels.at[i, "y"] = X_embedded[n_centroids + i, 1]
    

In [None]:
df[df["growth_rate"]>10]

In [None]:
from bokeh.plotting import figure, output_file, show, save, output_notebook
from bokeh.models import HoverTool, Range1d, LabelSet, LassoSelectTool, CustomJS, TapTool, OpenURL
from bokeh.models.sources import ColumnDataSource
from bokeh.io import reset_output

from bokeh.palettes import Spectral6, RdBu, PuOr, Magma256, Inferno, Cividis256, RdBu11, RdYlBu11
from bokeh.transform import linear_cmap

reset_output()


#class hover(HoverTool):
#    hover.tooltips = [
#    ("index", "$index"),


hover = HoverTool()
hover.tooltips = [
    ("Cluster", "@clid"),
    #("Keywords", "@keywords"),
    ("Keywords Top-05", "@keywords_top5"),
    ("Keywords Top-10", "@keywords_top10"),
    ("Keywords Top-15", "@keywords_top15"),
    (":eywords Top-20", "@keywords_top20"),
    ("Size", "@size"),
    ("Growth Rate", "@growth_rate"),
    ("Growth R²", "@growth_r2"),
]


mapper = linear_cmap(field_name='growth_rate', palette=RdYlBu11 ,low=-20. ,high=20.)

s1 = ColumnDataSource(data=df)

label_source = ColumnDataSource(df_labels)


callback = CustomJS(code="""
// the event that triggered the callback is cb_obj:
// The event type determines the relevant attributes
console.log('Tap event occured at x-position: ' + cb_obj.x)
""")


def lasso_cb(x):
    print(x)

#lasso = LassoSelectTool(callback=lasso_cb)
lasso = LassoSelectTool()


p = figure(plot_width=1600, plot_height=1000, active_scroll = "wheel_zoom")
#p = figure(plot_width=1200, plot_height=1000, output_backend="webgl", active_scroll = "wheel_zoom")
#p.circle(x='x', y='y', size=8, color="rgb", alpha="alpha", source=s1)
p.circle(x='x', y='y', size="circle_size", color="rgb", alpha=0.75, source=s1, line_color=mapper, line_width=4)

#p.scatter(x='x', y='y', source=label_source, size=8, color="black", marker="diamond")
    
#labels = LabelSet(x='x', y='y', x_offset=-20, y_offset=5, text='kw', level='glyph', 
#              source=label_source, render_mode='canvas')
#p.add_layout(labels)

p.add_tools(hover)
p.add_tools(lasso)

p.js_on_event('tap', callback)

#p.toolbar.active_scroll = "wheel_zoom"

#df["xval"].max()

left, right, bottom, top = -120, 120, -90, 90
left, right, bottom, top = -80, 80, -60, 60
left, right, bottom, top = -120, 120, -90, 90
p.x_range=Range1d(left, right)
p.y_range=Range1d(bottom, top)


#layout = row(p, widgetbox(data_table))
#layout = column(p, widgetbox(data_table))

show(p)
save(p)

In [None]:
df.to_excel("cluster_kws.xlsx")

In [None]:
df[df["size"] >= 50].sort_values("growth_rate", ascending=False)[["size", "keywords", "growth_rate", "growth_r2", "x", "y"]].head(50)

In [None]:
[pub_labels == 40]

In [None]:
cl = 788

print(cl2kws[cl])
cl_years = []
for doi, i in doi2index.items():
    if pub_labels[i] == cl:
        cl_years.append(doi2year[doi])
pubs_per_year = Counter(cl_years)

x = [2015, 2016, 2017, 2018, 2019]
y = [pubs_per_year[y] for y in x]
x = [-2, -1, 0, 1, 2]
y[-1] = pubs_per_year[2019] * 52. / 35.


print(sorted(pubs_per_year.items()), int(y[-1]))

slope, intercept, r_value, p_value, std_err = linregress(x, y)
print("Growth Rate:", "{:.2f}".format(slope/np.average(y)*100), "% (R²: {:.2f})".format(r_value**2))
print()

for doi, i in doi2index.items():
    if pub_labels[i] == cl:
        print(doi2str[doi])

In [None]:
search_term = "thermal"

for i, kw in enumerate(kw_list):
    if search_term in kw:
        print(i, kw)

In [None]:
kw_i = 1074  #69  

for j in np.argsort(-kw_sims[kw_i, :]):
    if kw_sims[kw_i, j] >= 0.8:
        print("{:.3f}".format(kw_sims[kw_i, j]), kw_list[j], j)
    else:
        break

In [None]:
np.sum(kw_pub_sims[kw_i] > 0.1)

In [None]:
kw_pub_sims = cosine_similarity(X, kw_vecs[kw_i].reshape(1, -1)).flatten() #.reshape(-1)
np.sum(kw_pub_sims >= 1.1)

In [None]:
kw_pub_sims = cosine_similarity(X, kw_vecs[69].reshape(1, -1)).flatten()*0.5 \
                + cosine_similarity(X, kw_vecs[2449].reshape(1, -1)).flatten()*0.5
np.sum(kw_pub_sims >= 0.65)

In [None]:
kw_sims2 = (kw_sims[69, :] + kw_sims[2449, :])*0.5

In [None]:
#2545

kw_pub_sims = (cosine_similarity(X, kw_vecs[69].reshape(1, -1)).flatten() \
                + cosine_similarity(X, kw_vecs[2449].reshape(1, -1)).flatten() \
                + cosine_similarity(X, kw_vecs[2545].reshape(1, -1)).flatten())/3.

kw_sims2 = (kw_sims[69, :] + kw_sims[2449, :] + kw_sims[2545, :]) / 3.

In [None]:
kw_pub_sims.shape

In [None]:
dois_filtered = []
pubvecs_filtered = []

min_sim = 0.5

for i, sim in enumerate(kw_pub_sims):
    if sim >= min_sim:
        doi = index2doi[i]
        dois_filtered.append(doi)
        pubvecs_filtered.append(doi2vec[doi])

kw_filtered = []

#for i, sim in enumerate(kw_sims[kw_i, :]):
for i, sim in enumerate(kw_sims2):
    if sim >= min_sim:
        kw_filtered.append((i, kw_list[i], kw_vecs[i]))


In [None]:
len(dois_filtered), len(kw_filtered)

In [None]:
from joblib import Memory
location = './cachedir'
memory = Memory(location, verbose=1)

In [None]:
ac_filtered = AgglomerativeClustering(n_clusters=None, affinity="cosine", linkage="average", distance_threshold=0.2, memory=memory)
clLabels_filtered = ac_filtered.fit_predict(pubvecs_filtered)
#clLabels_filtered = ac_filtered.fit_predict(X_filtered)
no_clusters_filtered = len(np.unique(clLabels_filtered))
no_clusters_filtered

In [None]:
neigh_filtered = KNeighborsClassifier(n_neighbors=3, metric="cosine")
neigh_filtered.fit(pubvecs_filtered, clLabels_filtered)

In [None]:
X_filtered = np.zeros((no_clusters_filtered + len(kw_filtered), 300))

for i in sorted(np.unique(clLabels_filtered)):
    X_filtered[i, :] = np.average([vec for vec, check in zip(pubvecs_filtered, clLabels_filtered==i) if check], axis=0)

for i, (j, kw, vec) in enumerate(kw_filtered):
    X_filtered[no_clusters_filtered+i, :] = vec

In [None]:
clKWs_filtered = neigh_filtered.predict(X_filtered[no_clusters_filtered:, :])

In [None]:
clkw_counts_filtered = np.zeros((len(kw_filtered), no_clusters_filtered), dtype=int)

for i, doi in enumerate(dois_filtered):
    title_lower = doi2title[doi].lower()
    for j, (_, kw, _) in enumerate(kw_filtered):
        substr_position = title_lower.find(kw)
        if substr_position >= 0:
            if substr_position >= 1:
                char_before_ord = ord(title_lower[substr_position-1])
                #char_after_ord = ord(title_lower[substr_position+len()])
                if char_before_ord >= 65 and char_before_ord <= 90:
                    continue
                elif char_before_ord >= 97 and char_before_ord <= 122:
                    continue
                clkw_counts_filtered[j, clLabels_filtered[i]] += 1

In [None]:
clkw_counts_filtered = clkw_counts_filtered.astype(int)

In [None]:
X_emb_filtered = TSNE(n_components=2, metric="cosine").fit_transform(X_filtered)

C_emb_filtered = TSNE(n_components=3, metric="cosine").fit_transform(X_filtered)
cmin_filtered = np.min(C_emb_filtered)
cmax_filtered = np.max(C_emb_filtered)
colors_filtered = (C_emb_filtered - cmin_filtered) / (cmax_filtered - cmin_filtered) * 256
colors_filtered = np.floor(colors_filtered)

In [None]:
colors_filtered

In [None]:
df_filtered = pd.DataFrame(index = sorted(np.unique(clLabels_filtered)), columns = [])

for cl in sorted(np.unique(clLabels_filtered[:])):    
    df_filtered.at[cl, "clid"] = cl
    df_filtered.at[cl, "x"] = X_emb_filtered[cl,0]
    df_filtered.at[cl, "y"] = X_emb_filtered[cl,1]
    df_filtered.at[cl, "size"] = np.sum(clLabels_filtered == cl)
    
    df_filtered.at[cl, "circle_size"] = np.sqrt(df_filtered.at[cl, "size"]) * 5
    
    df_filtered.at[cl, "rgb"] = RGB(colors_filtered[cl,0], colors_filtered[cl,1], colors_filtered[cl,2])
    
    #keywords = [kw for (_, kw, _), clkw in zip(kw_filtered, clKWs_filtered) if cl == clkw]
    
    keywords = []
    for i in np.argsort(-clkw_counts_filtered[:, cl]):
        if clkw_counts_filtered[i, cl] >= 0.5:
            if len([kw for kw in keywords if kw_filtered[i][1][:-1] in kw]) == 0:
                if cosine_similarity(X_filtered[cl, :].reshape(1, -1), X_filtered[no_clusters_filtered+i, :].reshape(1, -1)) >= 0.75:
                    keywords.append("{} ({})".format(kw_filtered[i][1], clkw_counts_filtered[i, cl]))
        else:
            break
    
    df_filtered.at[cl, "keywords"] = ", ".join(keywords)
    df_filtered.at[cl, "keywords_top5"] = ", ".join(keywords[:5])
    df_filtered.at[cl, "keywords_top10"] = ", ".join(keywords[5:10])
    df_filtered.at[cl, "keywords_top15"] = ", ".join(keywords[10:15])
    df_filtered.at[cl, "keywords_top20"] = ", ".join(keywords[15:20])
    
    cl_years = []
    for i, doi in enumerate(dois_filtered):
        if clLabels_filtered[i] == cl:
            cl_years.append(doi2year[doi])
    pubs_per_year = Counter(cl_years)
    
    x = [2015, 2016, 2017, 2018, 2019]
    y = [pubs_per_year[y] for y in x]
    x = [-2, -1, 0, 1, 2]
    y[-1] = pubs_per_year[2019] * 52. / 35.

    slope, intercept, r_value, p_value, std_err = linregress(x, y)
    df_filtered.at[cl, "growth_rate"] = slope/np.average(y)*100
    df_filtered.at[cl, "growth_r2"] = r_value**2

In [None]:
df_filtered[df_filtered["size"] >= 50].sort_values("growth_rate", ascending=False).head(50)[["size", "keywords", "growth_rate", "growth_r2", "x", "y"]]

In [None]:
df_labels_filtered = pd.DataFrame()

for i, (j, kw, vec) in enumerate(kw_filtered):    
    df_labels_filtered.at[i, "kw"] = kw
    df_labels_filtered.at[i, "x"] = X_emb_filtered[no_clusters_filtered+i, 0]
    df_labels_filtered.at[i, "y"] = X_emb_filtered[no_clusters_filtered+i, 1]
    df_labels_filtered.at[i, "max_count"] = clkw_counts_filtered[i, clKWs_filtered[i]]
    df_labels_filtered.at[i, "nc_count"] = nc_counts[kw]

In [None]:
df_labels_filtered = df_labels_filtered[df_labels_filtered["nc_count"] >= 100].sort_values("max_count", ascending=False)
df_labels_filtered = df_labels_filtered[df_labels_filtered["max_count"] >= 100]

In [None]:
df_labels_filtered = pd.DataFrame()

kw_filtered_in_plot = []

for i, (j, kw, vec) in enumerate(kw_filtered):  
    if term_mean_sims[kw] < 0.5:
        continue
    found = False
    for _, _, vec2 in kw_filtered_in_plot:
        if cosine_similarity(vec.reshape(1, -1), vec2.reshape(1, -1)) >= 0.7:
            found = True
            break
    if not found:
        kw_filtered_in_plot.append((j, kw, vec))
        df_labels_filtered.at[i, "kw"] = kw
        df_labels_filtered.at[i, "x"] = X_emb_filtered[no_clusters_filtered+i, 0]
        df_labels_filtered.at[i, "y"] = X_emb_filtered[no_clusters_filtered+i, 1]
        df_labels_filtered.at[i, "max_count"] = clkw_counts_filtered[i, clKWs_filtered[i]]
        df_labels_filtered.at[i, "nc_count"] = nc_counts[kw]

In [None]:
reset_output()


hover2 = HoverTool()
hover2.tooltips = [
    ("Cluster", "@clid"),
    #("Keywords", "@keywords"),
    ("Keywords Top-05", "@keywords_top5"),
    ("Keywords Top-10", "@keywords_top10"),
    ("Keywords Top-15", "@keywords_top15"),
    ("Keywords Top-20", "@keywords_top20"),
    ("Size", "@size"),
    ("Growth Rate", "@growth_rate"),
    ("Growth R²", "@growth_r2"),
]


#mapper = linear_cmap(field_name='growth_rate', palette=RdYlBu11 ,low=-20. ,high=20.)

s1_filtered = ColumnDataSource(data=df_filtered)

label_source_filtered = ColumnDataSource(df_labels_filtered)




p2 = figure(plot_width=1600, plot_height=1000, active_scroll = "wheel_zoom")
#p = figure(plot_width=1200, plot_height=1000, output_backend="webgl", active_scroll = "wheel_zoom")
#p.circle(x='x', y='y', size=8, color="rgb", alpha="alpha", source=s1)
p2.circle(x='x', y='y', size="circle_size", color="rgb", alpha=0.75, source=s1_filtered, line_alpha=0, line_width=0.)

p2.scatter(x='x', y='y', source=label_source_filtered, size=8, color="black", marker="diamond")
    
labels2= LabelSet(x='x', y='y', x_offset=-20, y_offset=5, text='kw', level='glyph', 
              source=label_source_filtered, render_mode='canvas')
p2.add_layout(labels2)

p2.add_tools(hover2)

#p.toolbar.active_scroll = "wheel_zoom"

#df["xval"].max()

left, right, bottom, top = -120, 120, -90, 90
left, right, bottom, top = -80, 80, -60, 60
left, right, bottom, top = -120, 120, -90, 90
p2.x_range=Range1d(left, right)
p2.y_range=Range1d(bottom, top)


#layout = row(p, widgetbox(data_table))
#layout = column(p, widgetbox(data_table))

show(p2)
#save(layout)

In [None]:
cl = 53

for i, lbl in enumerate(clLabels_filtered):
    if lbl == cl:
        doi = dois_filtered[i]
        print(doi2str[doi])