In [27]:
import csv
import pandas as pd
import numpy as np

import os.path
from gensim import corpora
from gensim.models import LsiModel
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from gensim.models.coherencemodel import CoherenceModel
import matplotlib.pyplot as plt

In [28]:
file_name2 = "scopus.csv"
df_scopus = pd.read_csv(file_name2)
df_scopus.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 836 entries, 0 to 835
Data columns (total 54 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Authors                        836 non-null    object 
 1   Author(s) ID                   835 non-null    object 
 2   Title                          836 non-null    object 
 3   Year                           836 non-null    int64  
 4   Source title                   836 non-null    object 
 5   Volume                         823 non-null    object 
 6   Issue                          759 non-null    object 
 7   Art. No.                       137 non-null    object 
 8   Page start                     713 non-null    object 
 9   Page end                       713 non-null    object 
 10  Page count                     1 non-null      float64
 11  Cited by                       667 non-null    float64
 12  DOI                            734 non-null    obj

In [29]:
relevant_columns = {"index": "DOI", "LSA": "Title  Abstract  Author Keywords  Index Keywords".split("  ")}

In [129]:
df_docs = df_scopus[["DOI"]+relevant_columns["LSA"]]
df_docs = df_docs.set_index("DOI")
df_docs["Abstract"] = df_docs["Abstract"].replace({"[No abstract available]": np.nan})
df_docs = df_docs.fillna("")
df_docs

Unnamed: 0_level_0,Title,Abstract,Author Keywords,Index Keywords
DOI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10.1108/IJSHE-12-2020-0484,"Students’ learning sustainability – implicit, ...",Purpose: This study aims to understand better ...,Higher education institutions (HEIs); Sustaina...,
10.1057/s41599-022-01129-0,Going beyond the AHA! moment: insight discover...,"In this paper, we develop and apply the concep...",,
10.1186/s12909-022-03308-8,An online Delphi study to investigate the comp...,Background: Several competency frameworks are ...,Allied health disciplines; CanMEDS; Continuous...,article; audiology; Belgium; clinical article;...
10.1186/s12909-022-03283-0,How do medical students learn conceptual knowl...,Background: Acquiring medical knowledge is a k...,Learning difficulties; Learning techniques; Me...,adult; article; controlled study; curriculum; ...
10.1186/s12909-022-03259-0,How medical students co-regulate their learnin...,Background: Self-regulated learning is a key c...,Clinical clerkships; Co-regulated learning; Me...,clinical education; human; learning; medical e...
...,...,...,...,...
10.1080/0305569930190108,Professional Development and Competence-based ...,The rapid expansion of competence-based educat...,,
,Key competencies for receiving or making your ...,,,
10.1080/0309877920160311,“Quality Management’ or “The Educative Workpla...,Competence-based education has been the topic ...,,
10.1016/0261-5177(92)90051-8,The implications of competence based education...,,,


In [130]:
df_docs["doc"] = df_docs.apply(lambda x: ". ".join(x.values), axis=1)
df_docs

Unnamed: 0_level_0,Title,Abstract,Author Keywords,Index Keywords,doc
DOI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10.1108/IJSHE-12-2020-0484,"Students’ learning sustainability – implicit, ...",Purpose: This study aims to understand better ...,Higher education institutions (HEIs); Sustaina...,,"Students’ learning sustainability – implicit, ..."
10.1057/s41599-022-01129-0,Going beyond the AHA! moment: insight discover...,"In this paper, we develop and apply the concep...",,,Going beyond the AHA! moment: insight discover...
10.1186/s12909-022-03308-8,An online Delphi study to investigate the comp...,Background: Several competency frameworks are ...,Allied health disciplines; CanMEDS; Continuous...,article; audiology; Belgium; clinical article;...,An online Delphi study to investigate the comp...
10.1186/s12909-022-03283-0,How do medical students learn conceptual knowl...,Background: Acquiring medical knowledge is a k...,Learning difficulties; Learning techniques; Me...,adult; article; controlled study; curriculum; ...,How do medical students learn conceptual knowl...
10.1186/s12909-022-03259-0,How medical students co-regulate their learnin...,Background: Self-regulated learning is a key c...,Clinical clerkships; Co-regulated learning; Me...,clinical education; human; learning; medical e...,How medical students co-regulate their learnin...
...,...,...,...,...,...
10.1080/0305569930190108,Professional Development and Competence-based ...,The rapid expansion of competence-based educat...,,,Professional Development and Competence-based ...
,Key competencies for receiving or making your ...,,,,Key competencies for receiving or making your ...
10.1080/0309877920160311,“Quality Management’ or “The Educative Workpla...,Competence-based education has been the topic ...,,,“Quality Management’ or “The Educative Workpla...
10.1016/0261-5177(92)90051-8,The implications of competence based education...,,,,The implications of competence based education...


In [32]:
from gensim.parsing.preprocessing import remove_stopwords, strip_punctuation, preprocess_string, strip_short, stem_text

In [131]:
def preprocess(text):
    CUSTOM_FILTERS = [lambda x: x.lower(), 
                                remove_stopwords, 
                                strip_punctuation, 
                                strip_short, 
                                stem_text]
    text = preprocess_string(text, CUSTOM_FILTERS)
    return text

In [132]:
df_docs['doc_clean'] = df_docs['doc'].apply(lambda x: preprocess(x))

In [133]:
df_docs

Unnamed: 0_level_0,Title,Abstract,Author Keywords,Index Keywords,doc,doc_clean
DOI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10.1108/IJSHE-12-2020-0484,"Students’ learning sustainability – implicit, ...",Purpose: This study aims to understand better ...,Higher education institutions (HEIs); Sustaina...,,"Students’ learning sustainability – implicit, ...","[students’, learn, sustain, implicit, explicit..."
10.1057/s41599-022-01129-0,Going beyond the AHA! moment: insight discover...,"In this paper, we develop and apply the concep...",,,Going beyond the AHA! moment: insight discover...,"[go, aha, moment, insight, discoveri, transdis..."
10.1186/s12909-022-03308-8,An online Delphi study to investigate the comp...,Background: Several competency frameworks are ...,Allied health disciplines; CanMEDS; Continuous...,article; audiology; Belgium; clinical article;...,An online Delphi study to investigate the comp...,"[onlin, delphi, studi, investig, complet, canm..."
10.1186/s12909-022-03283-0,How do medical students learn conceptual knowl...,Background: Acquiring medical knowledge is a k...,Learning difficulties; Learning techniques; Me...,adult; article; controlled study; curriculum; ...,How do medical students learn conceptual knowl...,"[medic, student, learn, conceptu, knowledg, hi..."
10.1186/s12909-022-03259-0,How medical students co-regulate their learnin...,Background: Self-regulated learning is a key c...,Clinical clerkships; Co-regulated learning; Me...,clinical education; human; learning; medical e...,How medical students co-regulate their learnin...,"[medic, student, regul, learn, clinic, clerksh..."
...,...,...,...,...,...,...
10.1080/0305569930190108,Professional Development and Competence-based ...,The rapid expansion of competence-based educat...,,,Professional Development and Competence-based ...,"[profession, develop, compet, base, educ, rapi..."
,Key competencies for receiving or making your ...,,,,Key competencies for receiving or making your ...,"[kei, compet, receiv, make, histori, issu, ari..."
10.1080/0309877920160311,“Quality Management’ or “The Educative Workpla...,Competence-based education has been the topic ...,,,“Quality Management’ or “The Educative Workpla...,"[“qualiti, management’, “the, educ, workplace’..."
10.1016/0261-5177(92)90051-8,The implications of competence based education...,,,,The implications of competence based education...,"[implic, compet, base, educ, train, programm, ..."


In [134]:
import json
  
f = open('citation_net.json', encoding='utf-8-sig')
data = json.load(f)
network = data["network"]

In [145]:
network_items = pd.DataFrame(network["items"])
network_items["DOI"] = network_items["url"].apply(lambda x: x.replace("https://doi.org/", "") if str(x) != "nan" else np.nan)
network_items = network_items.set_index("DOI")

In [146]:
network_items

Unnamed: 0_level_0,id,label,description,url,x,y,cluster,weights,scores
DOI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
10.1108/ijshe-12-2020-0484,1,alm k. (2022),<table><tr><td>Authors:</td><td>alm k.; beery ...,https://doi.org/10.1108/ijshe-12-2020-0484,0.9303,0.0346,1,"{'Links': 4.0, 'Citations': 1.0, 'Norm. citati...","{'Pub. year': 2022.0, 'Citations': 1.0, 'Norm...."
10.1057/s41599-022-01047-1,7,škrinjarić b. (2022),<table><tr><td>Authors:</td><td>škrinjarić b.<...,https://doi.org/10.1057/s41599-022-01047-1,0.5354,0.1785,7,"{'Links': 5.0, 'Citations': 0.0, 'Norm. citati...","{'Pub. year': 2022.0, 'Citations': 0.0, 'Norm...."
10.14201/teri.25394,10,leal m.s. (2022),<table><tr><td>Authors:</td><td>leal m.s.; góm...,https://doi.org/10.14201/teri.25394,-0.4185,0.3001,3,"{'Links': 1.0, 'Citations': 0.0, 'Norm. citati...","{'Pub. year': 2022.0, 'Citations': 0.0, 'Norm...."
10.3390/su14094916,11,venn r. (2022),<table><tr><td>Authors:</td><td>venn r.; perez...,https://doi.org/10.3390/su14094916,0.6689,-0.1122,1,"{'Links': 6.0, 'Citations': 0.0, 'Norm. citati...","{'Pub. year': 2022.0, 'Citations': 0.0, 'Norm...."
10.1108/et-11-2020-0341,12,van der baan n. (2022),<table><tr><td>Authors:</td><td>van der baan n...,https://doi.org/10.1108/et-11-2020-0341,-0.2235,-0.1336,4,"{'Links': 2.0, 'Citations': 0.0, 'Norm. citati...","{'Pub. year': 2022.0, 'Citations': 0.0, 'Norm...."
...,...,...,...,...,...,...,...,...,...
10.1080/0305569980240309,810,hyland t. (1998),<table><tr><td>Authors:</td><td>hyland t.</td>...,https://doi.org/10.1080/0305569980240309,1.3659,-0.2596,8,"{'Links': 2.0, 'Citations': 10.0, 'Norm. citat...","{'Pub. year': 1998.0, 'Citations': 10.0, 'Norm..."
10.1111/1467-9752.00070,814,hyland t. (1997),<table><tr><td>Authors:</td><td>hyland t.</td>...,https://doi.org/10.1111/1467-9752.00070,1.2536,-0.2308,8,"{'Links': 2.0, 'Citations': 55.0, 'Norm. citat...","{'Pub. year': 1997.0, 'Citations': 55.0, 'Norm..."
10.1111/j.1467-9752.1996.tb00406.x,818,bridges d. (1996),<table><tr><td>Authors:</td><td>bridges d.</td...,https://doi.org/10.1111/j.1467-9752.1996.tb004...,0.3718,0.0827,7,"{'Links': 1.0, 'Citations': 18.0, 'Norm. citat...","{'Pub. year': 1996.0, 'Citations': 18.0, 'Norm..."
10.1080/0305569940200208,829,hyland t. (1994b),<table><tr><td>Authors:</td><td>hyland t.</td>...,https://doi.org/10.1080/0305569940200208,1.4045,-0.2693,8,"{'Links': 1.0, 'Citations': 6.0, 'Norm. citati...","{'Pub. year': 1994.0, 'Citations': 6.0, 'Norm...."


In [161]:
sub_df_docs = df_docs.copy().reset_index()
sub_df_docs = sub_df_docs.merge(network_items.reset_index(), on="DOI", how="inner").dropna()

In [164]:
sub_df_docs["cluster"].value_counts()

1    46
2    32
3    23
4    12
5     9
7     7
6     6
8     3
Name: cluster, dtype: int64

In [66]:
from gensim import corpora
from gensim import models

In [189]:
df_docs_cluster = sub_df_docs[sub_df_docs["cluster"] == 2]
corpus = df_docs_cluster['doc_clean']
dictionary = corpora.Dictionary(corpus)

bow = [dictionary.doc2bow(text) for text in corpus]

tfidf = models.TfidfModel(bow)
corpus_tfidf = tfidf[bow]

In [40]:
from gensim.models import LsiModel
from gensim.models.coherencemodel import CoherenceModel

In [190]:
%%time
for i in range(2, 16):
    lsi = LsiModel(corpus_tfidf, num_topics=i, id2word=dictionary)
    coherence_model = CoherenceModel(model=lsi, texts=df_docs_cluster['doc_clean'], dictionary=dictionary, coherence='c_v')
    coherence_score = coherence_model.get_coherence()
    print('Coherence score with {} clusters: {}'.format(i, coherence_score))

Coherence score with 2 clusters: 0.3319961121253763
Coherence score with 3 clusters: 0.44923320220030205
Coherence score with 4 clusters: 0.5438725856291666
Coherence score with 5 clusters: 0.5137276696813281
Coherence score with 6 clusters: 0.4243115149140215
Coherence score with 7 clusters: 0.4102539882875257
Coherence score with 8 clusters: 0.4048022032647709
Coherence score with 9 clusters: 0.4052681262048962
Coherence score with 10 clusters: 0.41549891632588115
Coherence score with 11 clusters: 0.4558606325447467
Coherence score with 12 clusters: 0.4540632908284445
Coherence score with 13 clusters: 0.4348391116833374
Coherence score with 14 clusters: 0.43661209089236935
Coherence score with 15 clusters: 0.4396292446741696
Wall time: 45.7 s


In [191]:
num_topics = 5
lsi = LsiModel(corpus_tfidf, num_topics=num_topics, id2word=dictionary)

In [192]:
corpus_lsi = lsi[bow]
scores = [[] for _ in range(num_topics)]
for doc in corpus_lsi:
    for k in range(num_topics):
        scores[k].append(round(doc[k][1],2))

df_topic = df_docs_cluster.copy()
for k in range(num_topics):
    df_topic[f'score_topic_{k}'] = scores[k]

df_topic['Topic']= df_topic[[f'score_topic_{k}' for k in range(num_topics)]].apply(lambda x: x.argmax(), axis=1)
df_topic

Unnamed: 0,DOI,Title,Abstract,Author Keywords,Index Keywords,doc,doc_clean,id,label,description,...,y,cluster,weights,scores,score_topic_0,score_topic_1,score_topic_2,score_topic_3,score_topic_4,Topic
3,10.12973/eu-jer.11.2.965,"It is the Shared Aims, Trust and Compassion th...",Teachers´ life-long learning and occupational ...,Co-passion; Dialogue; Professional identity; S...,,"It is the Shared Aims, Trust and Compassion th...","[share, aim, trust, compass, allow, peopl, pro...",16,nissilä s.-p. (2022),<table><tr><td>Authors:</td><td>nissilä s.-p.;...,...,-0.3984,2,"{'Links': 1.0, 'Citations': 0.0, 'Norm. citati...","{'Pub. year': 2022.0, 'Citations': 0.0, 'Norm....",-4.77,2.5,-1.86,0.43,-2.24,1
314,10.1186/s40461-020-00108-6,How do teachers collaborate in Hungarian VET s...,Background: This study investigates teacher co...,Collegiality; Teacher collaboration; Teacher p...,,How do teachers collaborate in Hungarian VET s...,"[teacher, collabor, hungarian, vet, school, qu...",47,bükki e. (2021),<table><tr><td>Authors:</td><td>bükki e.; fehé...,...,-0.406,2,"{'Links': 1.0, 'Citations': 1.0, 'Norm. citati...","{'Pub. year': 2021.0, 'Citations': 1.0, 'Norm....",-15.46,5.81,3.35,-4.73,-7.11,1
341,10.1007/s42087-019-00080-y,How Do Students Describe Their Study Processes...,The competence-based approach has entered Finn...,Competence; Competence-based education; Narrat...,,How Do Students Describe Their Study Processes...,"[student, studi, process, compet, base, vocat,...",194,kepanen p. (2020),<table><tr><td>Authors:</td><td>kepanen p.; mä...,...,-0.3204,2,"{'Links': 4.0, 'Citations': 0.0, 'Norm. citati...","{'Pub. year': 2020.0, 'Citations': 0.0, 'Norm....",-4.4,1.59,-0.27,1.4,-0.12,1
343,10.1080/02619768.2019.1681965,Powerful learning environments in secondary vo...,Stakeholders in vocational education have diff...,co-design; powerful learning environments; sha...,,Powerful learning environments in secondary vo...,"[power, learn, environ, secondari, vocat, educ...",210,placklé i. (2020),<table><tr><td>Authors:</td><td>placklé i.; kö...,...,-0.3013,2,"{'Links': 1.0, 'Citations': 4.0, 'Norm. citati...","{'Pub. year': 2020.0, 'Citations': 4.0, 'Norm....",-5.83,1.89,-3.27,1.01,3.45,4
345,10.1080/13636820.2019.1644364,Does implementation of competence-based educat...,Competence-based education (CBE) is an innovat...,competence-based education (CBE); student sati...,,Does implementation of competence-based educat...,"[implement, compet, base, educ, mediat, impact...",236,van griethuijsen r.a.l.f. (2020),<table><tr><td>Authors:</td><td>van griethuijs...,...,-0.1211,2,"{'Links': 10.0, 'Citations': 5.0, 'Norm. citat...","{'Pub. year': 2020.0, 'Citations': 5.0, 'Norm....",-11.58,-9.37,-0.48,-0.17,-0.11,4
346,10.1080/13636820.2019.1635634,Evaluating competence-based vocational educati...,This paper investigates the realisation of com...,agricultural schools; Competence-based educati...,,Evaluating competence-based vocational educati...,"[evalu, compet, base, vocat, educ, indonesia, ...",260,misbah z. (2020),<table><tr><td>Authors:</td><td>misbah z.; gul...,...,-0.2204,2,"{'Links': 5.0, 'Citations': 5.0, 'Norm. citati...","{'Pub. year': 2020.0, 'Citations': 5.0, 'Norm....",-6.25,0.44,2.1,-0.9,2.89,4
348,10.1007/s10984-018-9276-y,Competence and knowledge development in compet...,Theory and research in the field of competence...,Agriculture vocational education; Competence d...,,Competence and knowledge development in compet...,"[compet, knowledg, develop, compet, base, voca...",286,misbah z. (2019),<table><tr><td>Authors:</td><td>misbah z.; gul...,...,-0.282,2,"{'Links': 10.0, 'Citations': 4.0, 'Norm. citat...","{'Pub. year': 2019.0, 'Citations': 4.0, 'Norm....",-6.11,1.89,1.1,-0.21,4.31,4
349,10.1080/13573322.2017.1397507,Helping students build competences in physical...,"In the French education system, the current cu...",competence; experience; Learning; physical edu...,conceptual framework; empiricism; human; human...,Helping students build competences in physical...,"[help, student, build, compet, physic, educ, t...",295,escalié g. (2019),<table><tr><td>Authors:</td><td>escalié g.; re...,...,-0.0504,2,"{'Links': 1.0, 'Citations': 11.0, 'Norm. citat...","{'Pub. year': 2019.0, 'Citations': 11.0, 'Norm...",-7.45,3.76,1.09,2.8,0.83,1
369,10.1007/s10984-016-9209-6,Design principles for hybrid learning configur...,"In today’s knowledge society, there is a deman...",Authentic learning; Educational design researc...,,Design principles for hybrid learning configur...,"[design, principl, hybrid, learn, configur, in...",451,cremers p.h.m. (2016),<table><tr><td>Authors:</td><td>cremers p.h.m....,...,-0.048,2,"{'Links': 1.0, 'Citations': 20.0, 'Norm. citat...","{'Pub. year': 2016.0, 'Citations': 20.0, 'Norm...",-6.19,3.39,0.18,1.05,5.46,4
371,10.1016/j.tate.2016.02.006,Team learning and its association with the imp...,Competence-based education (CBE) is the leadin...,Competence-based education; Teacher teams; Tea...,,Team learning and its association with the imp...,"[team, learn, associ, implement, compet, base,...",466,wijnia l. (2016),<table><tr><td>Authors:</td><td>wijnia l.; kun...,...,-0.13,2,"{'Links': 10.0, 'Citations': 23.0, 'Norm. cita...","{'Pub. year': 2016.0, 'Citations': 23.0, 'Norm...",-7.85,-10.54,-1.06,1.41,-0.45,3


In [193]:
df_topic["Topic"].value_counts()

1    12
4    10
3     7
2     3
Name: Topic, dtype: int64

In [194]:
lsi.print_topics(num_topics)

[(0,
  '-0.342*"team" + -0.216*"cbe" + -0.133*"pitfal" + -0.131*"vet" + -0.126*"polici" + -0.121*"teacher" + -0.120*"profession" + -0.118*"effect" + -0.102*"implement" + -0.099*"teach"'),
 (1,
  '-0.677*"team" + -0.244*"cbe" + -0.163*"effect" + -0.141*"satisfact" + 0.101*"polici" + 0.101*"profession" + -0.096*"associ" + 0.087*"career" + -0.086*"skill" + -0.085*"size"'),
 (2,
  '-0.436*"career" + -0.321*"convers" + -0.225*"mentor" + -0.217*"parti" + -0.211*"dialogu" + 0.190*"polici" + -0.150*"commun" + -0.128*"intervent" + -0.128*"employ" + -0.128*"shift"'),
 (3,
  '-0.452*"pitfal" + -0.269*"vet" + -0.169*"cbe" + -0.169*"2004" + 0.160*"digit" + 0.138*"team" + 0.136*"model" + 0.134*"action" + -0.123*"connect" + 0.121*"individu"'),
 (4,
  '-0.221*"polici" + -0.217*"hrm" + -0.185*"profession" + 0.162*"cbve" + 0.159*"programm" + 0.155*"design" + 0.146*"principl" + 0.139*"knowledg" + 0.135*"digit" + -0.135*"manag"')]

In [196]:
for ix in df_topic[df_topic["Topic"] == 1].index:
    row = df_topic.loc[ix]
    print(row["Title"],"\n")

It is the Shared Aims, Trust and Compassion that Allow People to Prosper: Teacher Educators´ Lifelong Learning in Competence-based Education 

How do teachers collaborate in Hungarian VET schools? A quantitative study of forms, perceptions of impact and related individual and organisational factors 

How Do Students Describe Their Study Processes in the Competence-Based Vocational Special Education Teacher Training? 

Helping students build competences in physical education: theoretical proposals and illustrations 

The relationship between perceived competence and earned credits in competence-based higher education 

Teaching in innovative vocational education in the Netherlands 

Can training teachers stimulate career learning conversations? Analysis of vocational training conversations in Dutch secondary vocational education 

Authentic and self-directed learning in vocational education: Challenges to vocational educators 

Aspects of competence-based education as footholds to impro