In [1]:
import pickle
import pandas as pd
pd.set_option('display.max_rows', 100)
from techminer2.science_mapping.co_occurrence import communities
from techminer2.refine.thesaurus.descriptors import apply_thesaurus
from techminer2.refine.thesaurus.descriptors import list_cleanup
from techminer2.refine.thesaurus.descriptors import check_integrity

list_cleanup(root_dir="./")
check_integrity(root_dir="./")
apply_thesaurus(root_dir="./")


custom_items_by_year = dict()
occ_by_year = {
    2014: 14,
    2015: 10,
    2016: 6,
    2017: 8,
    2018: 5,
    2019: 7,
    2020: 9,
    2021: 8,
    2022: 6,
    2023: 6,
}


for year in range(2014, 2024):

    print('------- '+str(year)+' -------')
    
    clusters = communities(
        #
        # COLUMN PARAMS:
        field="descriptors",
        top_n=None,
        occ_range=(occ_by_year[year], None),
        gc_range=(None, None),
        custom_items=None,
        #
        # NETWORK PARAMS:
        algorithm_or_dict="louvain",
        association_index="association",
        #
        # DATABASE PARAMS:
        root_dir="./",
        database="main",
        year_filter=(year, year),
        cited_by_filter=(None, None),
        document_type=[
            'Article',
            'Book chapter',
            'Conference paper',
            'Review',
            'Book',
            'Conference review',
            'Short survey',
            'Letter',
        ],
    )
    selected_items = [term for col in clusters.columns for term in clusters[col].to_list()]
    selected_items = [term for term in selected_items if term != '']
    selected_items = [' '.join(term.split(' ')[:-1]) for term in selected_items]
    custom_items_by_year[year] = selected_items
    display(clusters.head(10))    

custom_items = set(item for year in custom_items_by_year.keys() for item in custom_items_by_year[year])
with open('reports/custom_items.pkl', 'wb') as file:
    pickle.dump(custom_items, file)

Note: to be able to use all crisp methods, you need to install some additional packages:  {'graph_tool', 'infomap', 'wurlitzer', 'bayanpy'}
Note: to be able to use all crisp methods, you need to install some additional packages:  {'ASLPAw', 'pyclustering'}
Note: to be able to use all crisp methods, you need to install some additional packages:  {'infomap', 'wurlitzer'}
--INFO-- Checking `descriptors.the.txt` integrity.
--INFO-- Applying `descriptors.the.txt` thesaurus to author/index keywords and abstract/title words
------- 2014 -------


Unnamed: 0,CL_0,CL_1,CL_2
0,COMPETITIVE_INTELLIGENCE 134:1436,INFORMATION_SYSTEMS 058:0532,DATA_MINING 043:0691
1,INFORMATION_ANALYSIS 110:1088,DECISION_SUPPORT_SYSTEM 055:0742,COMPETITIVENESS 021:0034
2,DECISION_MAKING 069:0712,BUSINESS_INTELLIGENCE_SYSTEM 055:0444,COMPETITIVE_ADVANTAGES 018:0041
3,DATA_WAREHOUSE 048:0423,ARTIFICIAL_INTELLIGENCE 021:0310,DECISION_MAKING_PROCESSES 016:0060
4,BIG_DATA 021:0556,BUSINESS_ANALYTICS 020:0528,ON_LINE_ANALYTICAL_PROCESSING 015:0463
5,DATA_ANALYTICS 021:0418,INFORMATION_USE 020:0176,INFORMATION_TECHNOLOGIES 014:0089
6,BUSINESS_INTELLIGENCE_TOOLS 021:0253,,
7,OPERATIONS_RESEARCH 017:0240,,
8,BUSINESS_INTELLIGENCE_SOLUTIONS 017:0147,,
9,SEMANTICS 015:0177,,


------- 2015 -------


Unnamed: 0,CL_0,CL_1,CL_2
0,INFORMATION_ANALYSIS 168:1774,DECISION_MAKING 071:0786,DECISION_SUPPORT_SYSTEM 037:0567
1,COMPETITIVE_INTELLIGENCE 129:1409,BUSINESS_INTELLIGENCE_SYSTEM 065:0567,DATA_MINING 031:0476
2,BIG_DATA 035:0855,INFORMATION_SYSTEMS 051:0622,DECISION_SUPPORT 019:0166
3,DATA_ANALYTICS 030:0436,DATA_WAREHOUSE 033:0269,ARTIFICIAL_INTELLIGENCE 016:0333
4,ANALYTICS 017:0266,DECISION_MAKERS 019:0268,BUSINESS_INTELLIGENCE_APPLICATIONS 016:0296
5,DATA_HANDLING 017:0163,BUSINESS_INTELLIGENCE_SOLUTIONS 019:0114,OPERATIONS_RESEARCH 016:0189
6,BUSINESS_ANALYTICS 017:0151,BUSINESS_INTELLIGENCE_TOOLS 016:0113,BUSINESS_PROCESSES 014:0158
7,SOCIAL_NETWORKS 014:0070,COMPETITIVENESS 015:0146,KNOWLEDGE_MANAGEMENT 013:0086
8,CLOUD_COMPUTING 013:0158,DECISION_MAKING_PROCESSES 015:0081,INFORMATION_TECHNOLOGIES 012:0072
9,DATA_VISUALISATION 013:0100,DECISIONS 014:0120,


------- 2016 -------


Unnamed: 0,CL_0,CL_1,CL_2,CL_3
0,INFORMATION_ANALYSIS 170:1645,BIG_DATA 040:0743,INFORMATION_SYSTEMS 060:0520,DATA_WAREHOUSE 048:0412
1,COMPETITIVE_INTELLIGENCE 127:1212,DATA_MINING 037:0248,BUSINESS_INTELLIGENCE_SYSTEM 053:0802,DATA_SOURCES 015:0062
2,DECISION_MAKING 071:0696,DATA_ANALYTICS 028:0245,DECISION_SUPPORT_SYSTEM 033:0407,DATA_WAREHOUSING 013:0104
3,OPERATIONS_RESEARCH 028:0222,BUSINESS_INTELLIGENCE_TOOLS 019:0199,INFORMATION_USE 014:0489,ON_LINE_ANALYTICAL_PROCESSING 013:0066
4,ANALYTICS 020:0109,DATA_VISUALISATION 014:0183,DECISION_SUPPORT 014:0296,HEALTH_CARE 011:0101
5,BUSINESS_INTELLIGENCE_SOLUTIONS 018:0095,ARTIFICIAL_INTELLIGENCE 013:0095,SUCCESS_FACTORS 011:0333,INSIGHTS 010:0218
6,KNOWLEDGE_MANAGEMENT 017:0136,SOCIAL_NETWORKS 012:0145,COMPETITIVENESS 008:0056,EDUCATION 010:0119
7,DECISION_MAKING_PROCESSES 015:0127,BUSINESS_INTELLIGENCE_APPLICATIONS 011:0116,COMPETITIVE_ADVANTAGES 007:0121,EXTRACT_TRANSFORM_AND_LOAD 009:0177
8,BUSINESS_PROCESSES 014:0111,BIG_DATA_ANALYTICS 009:0429,ORGANIZATIONAL_PERFORMANCE 007:0075,BUSINESS_INTELLIGENCE_PROJECTS 009:0142
9,PERFORMANCE_INDICATORS 013:0124,DECISION_MAKERS 009:0088,BUSINESS_INTELLIGENCE_IMPLEMENTATIONS 007:0067,HIGHER_EDUCATION 007:0049


------- 2017 -------


Unnamed: 0,CL_0,CL_1,CL_2
0,DECISION_MAKING 075:0874,BIG_DATA 059:1428,INFORMATION_ANALYSIS 161:2063
1,BUSINESS_INTELLIGENCE_SYSTEM 060:0789,DATA_MINING 046:0744,COMPETITIVE_INTELLIGENCE 124:1627
2,INFORMATION_SYSTEMS 053:0715,DATA_ANALYTICS 032:0953,OPERATIONS_RESEARCH 022:0187
3,DATA_WAREHOUSE 049:0386,ANALYTICS 023:0491,COMPETITIVE_ADVANTAGES 017:0192
4,INFORMATION_USE 028:0156,DATA_HANDLING 017:0194,COMPETITIVENESS 013:0111
5,DECISION_SUPPORT_SYSTEM 027:0223,BUSINESS_ANALYTICS 015:0401,META_DATA 013:0093
6,ARTIFICIAL_INTELLIGENCE 023:0213,SALES 013:0222,KNOWLEDGE_MANAGEMENT 011:0179
7,DECISION_MAKING_PROCESSES 019:0153,BIG_DATA_ANALYTICS 013:0181,DATA_VISUALISATION 011:0106
8,INFORMATION_TECHNOLOGIES 018:0371,PREDICTIVE_ANALYTICS 011:0269,PROJECT_MANAGERS 011:0101
9,BUSINESS_INTELLIGENCE_SOLUTIONS 017:0105,CLOUD_COMPUTING 010:0088,EDUCATION 011:0048


------- 2018 -------


Unnamed: 0,CL_0,CL_1,CL_2,CL_3
0,BIG_DATA 053:1045,INFORMATION_ANALYSIS 145:1358,INFORMATION_SYSTEMS 059:0591,DATA_WAREHOUSE 044:0456
1,DATA_MINING 047:0471,COMPETITIVE_INTELLIGENCE 119:1049,BUSINESS_INTELLIGENCE_SYSTEM 053:0430,BUSINESS_INTELLIGENCE_TOOLS 018:0102
2,DATA_ANALYTICS 041:0608,DECISION_MAKING 084:0913,INFORMATION_USE 045:0356,ON_LINE_ANALYTICAL_PROCESSING 013:0032
3,ARTIFICIAL_INTELLIGENCE 025:0248,BUSINESS_INTELLIGENCE_SOLUTIONS 020:0121,DECISION_SUPPORT_SYSTEM 031:0385,DATA_SOURCES 012:0111
4,BUSINESS_INTELLIGENCE_APPLICATIONS 019:0129,KNOWLEDGE_MANAGEMENT 019:0249,ANALYTICS 028:0498,BUSINESS_DECISIONS 009:0200
5,BUSINESS_ANALYTICS 018:0249,DECISION_MAKING_PROCESSES 017:0185,COMPETITIVENESS 017:0249,META_DATA 009:0047
6,VISUALIZATIONS 015:0072,HEALTH_CARE 014:0122,COMPETITIVE_ADVANTAGES 011:0133,BUSINESS_INTELLIGENCE_TECHNOLOGY 008:0083
7,BIG_DATA_ANALYTICS 014:0391,PERFORMANCE_INDICATORS 013:0108,DECISION_SUPPORT 011:0076,EXTRACT_TRANSFORM_AND_LOAD 008:0026
8,DATA_VISUALISATION 014:0062,DECISION_MAKERS 012:0236,SUCCESS_FACTORS 010:0078,COMPANY_PERFORMANCE 007:0150
9,MACHINE_LEARNING 012:0121,BUSINESS_PROCESSES 012:0220,BUSINESS_INTELLIGENCE_PROJECTS 010:0071,EDUCATION_INSTITUTIONS 007:0069


------- 2019 -------


Unnamed: 0,CL_0,CL_1,CL_2,CL_3
0,INFORMATION_ANALYSIS 123:1262,DECISION_MAKING 085:0586,DATA_WAREHOUSE 049:0223,DATA_ANALYTICS 037:0166
1,COMPETITIVE_INTELLIGENCE 091:0777,INFORMATION_SYSTEMS 042:0176,DATA_MINING 034:0226,ANALYTICS 032:0611
2,BIG_DATA 054:0530,BUSINESS_INTELLIGENCE_SYSTEM 039:0422,DATA_HANDLING 022:0226,DATA_VISUALISATION 016:0103
3,BUSINESS_INTELLIGENCE_TOOLS 026:0097,INFORMATION_USE 034:0102,DATA_SOURCES 014:0084,BIG_DATA_ANALYTICS 014:0200
4,BUSINESS_INTELLIGENCE_SOLUTIONS 017:0057,DECISION_SUPPORT_SYSTEM 028:0281,ON_LINE_ANALYTICAL_PROCESSING 012:0088,VISUALIZATIONS 011:0059
5,KNOWLEDGE_MANAGEMENT 016:0111,COMPETITIVENESS 024:0254,SOCIAL_NETWORKS 011:0131,SEMANTICS 008:0036
6,BUSINESS_ANALYTICS 015:0158,ARTIFICIAL_INTELLIGENCE 024:0111,DATAMART 011:0016,BUSINESS_INTELLIGENCE_TECHNIQUES 007:0098
7,INFORMATION_TECHNOLOGIES 015:0063,COMPETITIVE_ADVANTAGES 017:0069,NATURAL_LANGUAGE_PROCESSING_SYSTEMS 009:0033,DASHBOARDS 007:0036
8,PRODUCTS 014:0232,DECISION_MAKERS 013:0226,DATA_PROCESSING 009:0024,META_DATA 007:0027
9,BUSINESS_INTELLIGENCE_APPLICATIONS 014:0080,DIGITAL_STORAGE 013:0018,MARKETS 007:0066,


------- 2020 -------


Unnamed: 0,CL_0,CL_1,CL_2,CL_3
0,BIG_DATA 052:267,INFORMATION_ANALYSIS 101:580,INFORMATION_SYSTEMS 042:164,DATA_WAREHOUSE 040:117
1,DATA_ANALYTICS 040:237,DECISION_MAKING 089:408,INFORMATION_USE 029:064,BUSINESS_INTELLIGENCE_TOOLS 030:108
2,DATA_MINING 033:152,BUSINESS_INTELLIGENCE_SYSTEM 056:326,ANALYTICS 028:242,EXTRACT_TRANSFORM_AND_LOAD 016:053
3,DECISION_SUPPORT_SYSTEM 031:136,BUSINESS_INTELLIGENCE_SOLUTIONS 017:135,DATA_VISUALISATION 024:075,ON_LINE_ANALYTICAL_PROCESSING 014:085
4,COMPETITIVE_INTELLIGENCE 028:229,DATA_SOURCES 017:069,DASHBOARDS 013:035,BUSINESS_PROCESSES 011:132
5,ARTIFICIAL_INTELLIGENCE 025:158,DECISION_MAKING_PROCESSES 014:041,ORGANIZATIONAL_PERFORMANCE 012:138,BUSINESS_INTELLIGENCE_PLATFORMS 010:035
6,MACHINE_LEARNING 020:158,SUCCESS_FACTORS 013:050,COMPETITIVE_ADVANTAGES 011:063,DATAMART 010:014
7,INTERNET_OF_THINGS 014:104,DECISION_MAKERS 012:016,BUSINESS_ANALYTICS 011:050,
8,INFORMATION_TECHNOLOGIES 013:048,DATA_QUALITY 010:102,,
9,SOCIAL_MEDIA 012:112,INTELLIGENT_SYSTEMS 010:032,,


------- 2021 -------


Unnamed: 0,CL_0,CL_1,CL_2,CL_3
0,BIG_DATA 40:286,INFORMATION_ANALYSIS 96:369,DATA_WAREHOUSE 27:049,BUSINESS_INTELLIGENCE_TOOLS 31:051
1,DATA_ANALYTICS 38:290,DECISION_MAKING 64:314,DATA_MINING 25:063,DATA_VISUALISATION 18:081
2,ARTIFICIAL_INTELLIGENCE 28:163,BUSINESS_INTELLIGENCE_SYSTEM 43:266,BUSINESS_PROCESSES 17:036,COVID_19 12:105
3,MACHINE_LEARNING 22:141,INFORMATION_SYSTEMS 41:074,COMPETITIVE_INTELLIGENCE 14:151,BUSINESS_DATA 12:020
4,ANALYTICS 20:162,INFORMATION_USE 31:051,COMPETITIVENESS 14:038,VISUALIZATIONS 10:029
5,BUSINESS_ANALYTICS 18:027,DECISION_SUPPORT_SYSTEM 16:058,DATA_QUALITY 10:024,DASHBOARDS 09:010
6,BUSINESS_INTELLIGENCE_APPLICATIONS 13:096,DECISIONS 14:023,ENTERPRISE_RESOURCE_PLANNING 09:109,
7,SALES 13:077,COMPETITIVE_ADVANTAGES 13:051,MARKETS 09:018,
8,BIG_DATA_ANALYTICS 11:130,HEALTH_CARE 13:036,INDUSTRY_40 08:186,
9,SOCIAL_NETWORKS 11:038,DATA_HANDLING 12:100,DATA_COLLECTION 08:092,


------- 2022 -------


Unnamed: 0,CL_0,CL_1,CL_2,CL_3,CL_4
0,INFORMATION_ANALYSIS 107:268,DECISION_SUPPORT_SYSTEM 028:038,DATA_ANALYTICS 051:158,KNOWLEDGE_MANAGEMENT 016:050,DATA_VISUALISATION 022:075
1,DECISION_MAKING 072:214,MACHINE_LEARNING 024:044,BIG_DATA 032:043,COMPETITIVENESS 016:040,VISUALIZATIONS 015:062
2,BUSINESS_INTELLIGENCE_SYSTEM 037:058,INFORMATION_SYSTEMS 020:034,ARTIFICIAL_INTELLIGENCE 028:059,COMPETITIVE_ADVANTAGES 012:040,DASHBOARDS 014:011
3,DATA_WAREHOUSE 029:034,SALES 016:030,ANALYTICS 023:063,COMPETITIVE_INTELLIGENCE 011:019,DIGITAL_TRANSFORMATIONS 012:033
4,BUSINESS_INTELLIGENCE_TOOLS 024:038,INFORMATION_TECHNOLOGIES 015:065,DATA_MINING 022:090,INNOVATIVENESS 010:057,BUSINESS_INTELLIGENCE_DASHBOARDS 010:019
5,BUSINESS_INTELLIGENCE_SOLUTIONS 017:016,BUSINESS_PROCESSES 015:030,BUSINESS_ANALYTICS 022:025,FINANCE 009:082,PERFORMANCE_INDICATORS 010:017
6,DATA_HANDLING 015:030,INFORMATION_USE 013:010,DATA_SOURCES 014:064,STRUCTURAL_EQUATIONS 009:036,HEALTH_CARE 008:019
7,INTELLIGENT_TOOLS 012:018,BUSINESS_INTELLIGENCE_APPLICATIONS 012:013,SOCIAL_NETWORKS 013:065,PRODUCTS 008:011,HOSPITALS 007:013
8,HUMAN_RESOURCE_MANAGERS 011:009,BUSINESS_INTELLIGENCE_ARCHITECTURE 011:023,SOCIAL_MEDIA 012:061,STATISTICAL_POPULATION 007:044,BUSINESS_INTELLIGENCE_SOFTWARE 006:014
9,SUCCESS_FACTORS 010:012,DECISIONS 011:013,COVID_19 012:020,ORGANIZATIONAL_PERFORMANCE 007:027,BENCHMARKS 006:003


------- 2023 -------


Unnamed: 0,CL_0,CL_1,CL_2,CL_3,CL_4
0,INFORMATION_ANALYSIS 95:70,MACHINE_LEARNING 31:17,INSIGHTS 19:08,DATA_ANALYTICS 59:83,DATA_VISUALISATION 29:06
1,DECISION_MAKING 92:94,DATA_MINING 24:10,INFORMATION_TECHNOLOGIES 18:08,BIG_DATA 42:95,VISUALIZATIONS 14:03
2,BUSINESS_INTELLIGENCE_SYSTEM 55:52,SALES 15:06,INFORMATION_SYSTEMS 17:24,ARTIFICIAL_INTELLIGENCE 42:60,META_DATA 13:01
3,BUSINESS_INTELLIGENCE_TOOLS 36:28,COVID_19 12:12,STRUCTURAL_EQUATIONS 14:67,BUSINESS_ANALYTICS 27:23,ACTIONABLE_INSIGHTS 10:01
4,DATA_WAREHOUSE 29:65,DEEP_LEARNING 12:12,DATA_QUALITY 13:76,BUSINESS_PROCESSES 13:30,BUSINESS_INTELLIGENCE_PLATFORMS 10:01
5,DECISION_SUPPORT_SYSTEM 24:60,HEALTH_CARE 11:31,BUSINESS_INTELLIGENCE_IMPLEMENTATIONS 13:26,ANALYTICS 13:10,PROCESSES 08:01
6,COMPETITIVE_INTELLIGENCE 22:18,SOCIAL_NETWORKS 11:21,KNOWLEDGE_MANAGEMENT 13:09,BIG_DATA_ANALYTICS 12:18,PREDICTIVE_ANALYTICS 08:00
7,COMPETITIVENESS 17:05,SOCIAL_MEDIA 11:08,INFORMATION_QUALITY 11:61,DIGITAL_TRANSFORMATIONS 09:09,COMPUTER_SCIENCES 07:02
8,BUSINESS_INTELLIGENCE_SOLUTIONS 15:09,LEARNING_ALGORITHMS 10:09,SUCCESS_FACTORS 09:31,BLOCK_CHAIN 08:06,
9,COMPETITIVE_ADVANTAGES 14:00,BUSINESS_DATA 10:05,COMPANY_PERFORMANCE 09:23,INDUSTRY 07:13,
