In [1]:
from techminer2.science_mapping.co_occurrence import communities_summary
from techminer2.refine.thesaurus.descriptors import apply_thesaurus


def generate_json_data():
    data = {}

    for year in range(2016, 2024):
        print(year, end="  ")

        if year == 2016:
            year_filter = (2014, 2016)
        else:
            year_filter = (year, year)

        m = communities_summary(
            #
            # PARAMS:
            field="descriptors",
            #
            # SUMMARY PARAMS:
            conserve_counters=False,
            #
            # FILTER PARAMS:
            top_n=None,
            occ_range=(3, None),
            gc_range=(None, None),
            custom_items=None,
            #
            # NETWORK PARAMS:
            algorithm_or_dict="louvain",
            association_index="association",
            #
            # DATABASE PARAMS:
            root_dir="./",
            database="main",
            year_filter=year_filter,
            cited_by_filter=(None, None),
        )

        m["Terms"] = m["Terms"].str.split("; ")
        m["Terms"] = m["Terms"].map(lambda x: [t.strip().replace("_", " ").title() for t in x[:10]])

        data[str(year)] = {}
        for cluster in range(m.shape[0]):
            data[str(year)][str(cluster)] = m["Terms"][cluster]
    
    print()
    return data


#
# Generate the data for the clusters per year.
apply_thesaurus(root_dir="./")
json_data = generate_json_data()

for key in json_data.keys():
    print(f"{key} {len(json_data[key])}")

Note: to be able to use all crisp methods, you need to install some additional packages:  {'bayanpy', 'wurlitzer', 'infomap', 'graph_tool'}
Note: to be able to use all crisp methods, you need to install some additional packages:  {'pyclustering', 'ASLPAw'}
Note: to be able to use all crisp methods, you need to install some additional packages:  {'wurlitzer', 'infomap'}
--INFO-- Applying `descriptors.the.txt` thesaurus to author/index keywords and abstract/title words
2016  2017  2018  2019  2020  2021  2022  2023  
2016 2
2017 2
2018 2
2019 3
2020 4
2021 4
2022 4
2023 4


In [48]:
import sankey

# 2016 2
# 2017 2
# 2018 2
# 2019 3
# 2020 4
# 2021 4
# 2022 4
# 2023 4


def flip_cluster(json_data, year, c1, c2):
    aux = json_data[year][c1]
    json_data[year][c1] = json_data[year][c2]
    json_data[year][c2] = aux
    return json_data


# json_data = flip_cluster(json_data, '2017', '0', '1')


AI_DRIVEN_HRM_GREEN = 'RGB(44,160,44)'
BIG_DATA_ANALYTICS_BLUE = 'RGB(31,119,180)'
ADOPTION_AND_DECISON_MAKING_RED = 'RGB(214,39,40)'
TECHONOLOGY_ETHICS_ORANGE = 'RGB(255,102,0)'

sankey.make_sankey_plot(
    json_data,
    y=[
        #
        # 2016:2
        0.02,
        0.10,
        #
        # 2017: 2
        0.07,
        0.27,
        #
        # 2018: 2
        0.11,
        0.30,
        #
        # 2019: 3
        0.17,
        0.55,
        0.75,
        #
        # 2020: 4
        0.12,
        0.40,
        0.60,
        0.80,
        #
        # 2021: 4
        0.10,
        0.30,
        0.52, 
        0.80,
        #
        # 2022: 4
        0.15,
        0.48,
        0.76,    
        0.96,
        #
        # 2023: 4
        0.05,
        0.27,
        0.58,                           
        0.87,
    ],
    color=[
        BIG_DATA_ANALYTICS_BLUE, AI_DRIVEN_HRM_GREEN,
        ADOPTION_AND_DECISON_MAKING_RED, TECHONOLOGY_ETHICS_ORANGE,
        BIG_DATA_ANALYTICS_BLUE, AI_DRIVEN_HRM_GREEN,
        AI_DRIVEN_HRM_GREEN, BIG_DATA_ANALYTICS_BLUE, AI_DRIVEN_HRM_GREEN,
        ADOPTION_AND_DECISON_MAKING_RED, BIG_DATA_ANALYTICS_BLUE, AI_DRIVEN_HRM_GREEN, AI_DRIVEN_HRM_GREEN,
        AI_DRIVEN_HRM_GREEN, TECHONOLOGY_ETHICS_ORANGE, ADOPTION_AND_DECISON_MAKING_RED, BIG_DATA_ANALYTICS_BLUE,
        AI_DRIVEN_HRM_GREEN, ADOPTION_AND_DECISON_MAKING_RED, ADOPTION_AND_DECISON_MAKING_RED, AI_DRIVEN_HRM_GREEN,
        BIG_DATA_ANALYTICS_BLUE, ADOPTION_AND_DECISON_MAKING_RED, AI_DRIVEN_HRM_GREEN, ADOPTION_AND_DECISON_MAKING_RED,
    ],
).update_layout(width=1500, height=900, font={'size': 6}).show()

In [None]:
from techminer2.science_mapping.co_occurrence import report

def generate_report():

    for year in range(2016, 2024):
        print(year)

        if year == 2016:
            year_filter = (2014, 2016)
        else:
            year_filter = (year, year)

        report(
            field='descriptors',
            #
            # COLUMN PARAMS:
            top_n=None,
            occ_range=(3, None),
            gc_range=(None, None),
            custom_items=None,
            #
            # NETWORK PARAMS:
            algorithm_or_dict="louvain",
            association_index="association",
            #
            # DATABASE PARAMS:
            root_dir="./",
            database="main",
            year_filter=year_filter,
            cited_by_filter=(None, None),
        )

        !mv reports/co_occurrence/* clusters_by_year_description/{year}/

generate_report()