In [1]:
from techminer2.science_mapping.co_occurrence import communities_summary
from techminer2.refine.thesaurus.descriptors import apply_thesaurus


def generate_json_data():

    occ_by_year = {
        2014: 14,
        2015: 10,
        2016: 6,
        2017: 8,
        2018: 5,
        2019: 7,
        2020: 9,
        2021: 8,
        2022: 6,
        2023: 6,
    }

    data = {}

    for year in range(2014, 2024):
        #
        print(year, end="  ")

        year_filter = (year, year)

        m = communities_summary(
            #
            # PARAMS:
            field="descriptors",
            #
            # SUMMARY PARAMS:
            conserve_counters=False,
            #
            # FILTER PARAMS:
            top_n=None,
            occ_range=(occ_by_year[year], None),
            gc_range=(None, None),
            custom_items=None,
            #
            # NETWORK PARAMS:
            algorithm_or_dict="louvain",
            association_index="association",
            #
            # DATABASE PARAMS:
            root_dir="./",
            database="main",
            year_filter=year_filter,
            cited_by_filter=(None, None),
            document_type=[
                'Article',
                'Book chapter',
                'Conference paper',
                'Review',
                'Book',
                'Conference review',
                'Short survey',
                'Letter',
            ],            
        )

        m["Terms"] = m["Terms"].str.split("; ")
        m["Terms"] = m["Terms"].map(lambda x: [t.strip().replace("_", " ").title() for t in x[:10]])

        data[str(year)] = {}
        for cluster in range(m.shape[0]):
            data[str(year)][str(cluster)] = m["Terms"][cluster]
    
    print()
    return data


#
# Generate the data for the clusters per year.
apply_thesaurus(root_dir="./")
json_data = generate_json_data()

for key in json_data.keys():
    print(f"{key} {len(json_data[key])}")

Note: to be able to use all crisp methods, you need to install some additional packages:  {'infomap', 'wurlitzer', 'bayanpy', 'graph_tool'}
Note: to be able to use all crisp methods, you need to install some additional packages:  {'pyclustering', 'ASLPAw'}
Note: to be able to use all crisp methods, you need to install some additional packages:  {'infomap', 'wurlitzer'}
--INFO-- Applying `descriptors.the.txt` thesaurus to author/index keywords and abstract/title words
2014  2015  2016  2017  2018  2019  2020  2021  2022  2023  
2014 3
2015 3
2016 4
2017 3
2018 4
2019 4
2020 4
2021 4
2022 5
2023 5


In [127]:
import sankey

# 2014 3
# 2015 3
# 2016 4
# 2017 3
# 2018 4
# 2019 4
# 2020 4
# 2021 4
# 2022 5
# 2023 5


def flip_cluster(json_data, year, c1, c2):
    aux = json_data[year][c1]
    json_data[year][c1] = json_data[year][c2]
    json_data[year][c2] = aux
    return json_data


# json_data = flip_cluster(json_data, '2017', '0', '1')


BIG_DATA_BLUE = 'RGB(31,119,180)'
DATA_WAREHOUSE_GREEN = 'RGB(44,160,44)'
INFORMATION_ANALYSIS_RED = 'RGB(214,39,40)'
INFORMATION_SYSTEMS_ORANGE = 'RGB(255,102,0)'
DECISION_MAKING_PURPLE = 'RGB(148,103,189)'
DATA_VISUALIZATION_BROWN = 'RGB(140,86,75)'
DATA_ANALYTICS_YELLOW = 'RGB(227,119,194)'
DECISION_SUPPORT_SYSTEMS_GRAY = 'RGB(127,127,127)'
DATA_MINING_PINK = 'RGB(188,189,34)'
COMPETITIVE_INTELLIGENCE__CYAN = 'RGB(23,190,207)'
MACHINE_LEARNING_OLIVE = 'RGB(188,189,34)'
INSIGHTS_SKY = 'RGB(0, 191, 255)'
KNOWLEDGE_MANAGEMENT_MAGENTA = 'RGB(255, 0, 255)'
BI_TOOLS_FOREST = 'RGB(25, 25, 112)'

sankey.make_sankey_plot(
    json_data,
    y=[ 
        # 2014: 3
        0.15,
        0.45,
        0.75,
        #
        # 2015: 3
        0.15,
        0.45,
        0.75,
        #
        # 2016: 4
        0.15,
        0.45,
        0.75,
        0.95,
        #
        # 2017: 3
        0.13,
        0.40,
        0.70,
        #
        # 2018: 4
        0.11,
        0.35,
        0.60,
        0.80,
        #
        # 2019: 4
        0.10,
        0.45,
        0.75,
        0.95,
        #
        # 2020: 4
        0.12,
        0.40,
        0.60,
        0.80,
        #
        # 2021: 4
        0.11,
        0.40,
        0.63, 
        0.80,
        #
        # 2022: 5
        0.07,
        0.295,
        0.55,
        0.76,    
        0.90,
        #
        # 2023: 5
        0.155,
        0.41,
        0.63,
        0.83,                           
        0.99,
    ],
    color=[
        COMPETITIVE_INTELLIGENCE__CYAN, INFORMATION_SYSTEMS_ORANGE, DATA_MINING_PINK,
        INFORMATION_ANALYSIS_RED, DECISION_MAKING_PURPLE, DECISION_SUPPORT_SYSTEMS_GRAY,
        INFORMATION_ANALYSIS_RED, BIG_DATA_BLUE, INFORMATION_SYSTEMS_ORANGE, DATA_WAREHOUSE_GREEN,
        DECISION_MAKING_PURPLE, BIG_DATA_BLUE, INFORMATION_ANALYSIS_RED,
        BIG_DATA_BLUE, INFORMATION_ANALYSIS_RED, INFORMATION_SYSTEMS_ORANGE, DATA_WAREHOUSE_GREEN,
        INFORMATION_ANALYSIS_RED, DECISION_MAKING_PURPLE, DATA_WAREHOUSE_GREEN, DATA_ANALYTICS_YELLOW,
        BIG_DATA_BLUE, INFORMATION_ANALYSIS_RED, INFORMATION_SYSTEMS_ORANGE, DATA_WAREHOUSE_GREEN,
        BIG_DATA_BLUE, INFORMATION_ANALYSIS_RED, DATA_WAREHOUSE_GREEN, BI_TOOLS_FOREST,
        INFORMATION_ANALYSIS_RED, DECISION_SUPPORT_SYSTEMS_GRAY, DATA_ANALYTICS_YELLOW, KNOWLEDGE_MANAGEMENT_MAGENTA, DATA_VISUALIZATION_BROWN,
        INFORMATION_ANALYSIS_RED, MACHINE_LEARNING_OLIVE, INSIGHTS_SKY, DATA_ANALYTICS_YELLOW, DATA_VISUALIZATION_BROWN,

    ],
).update_layout(width=1600, height=1100, font={'size': 6}).show()

In [None]:
from techminer2.science_mapping.co_occurrence import report

def generate_report():

    for year in range(2016, 2024):
        print(year)

        if year == 2016:
            year_filter = (2014, 2016)
        else:
            year_filter = (year, year)

        report(
            field='descriptors',
            #
            # COLUMN PARAMS:
            top_n=None,
            occ_range=(3, None),
            gc_range=(None, None),
            custom_items=None,
            #
            # NETWORK PARAMS:
            algorithm_or_dict="louvain",
            association_index="association",
            #
            # DATABASE PARAMS:
            root_dir="./",
            database="main",
            year_filter=year_filter,
            cited_by_filter=(None, None),
        )

        !mv reports/co_occurrence/* clusters_by_year_description/{year}/

generate_report()